In [None]:
import pandas as pd

In [45]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, root_mean_squared_error

In [3]:
db = load_diabetes()

In [4]:
db

{'data': array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
          0.01990749, -0.01764613],
        [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
         -0.06833155, -0.09220405],
        [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
          0.00286131, -0.02593034],
        ...,
        [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
         -0.04688253,  0.01549073],
        [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
          0.04452873, -0.02593034],
        [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
         -0.00422151,  0.00306441]], shape=(442, 10)),
 'target': array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
         69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
         68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
         87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
        259.,  53., 190., 142.,  75., 142.

In [5]:
x = db.data
y = db.target
features = db.feature_names
target_name = "disease_progression"

In [6]:
df = pd.DataFrame(data=x, columns=features)

In [7]:
df

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
0,0.038076,0.050680,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204
2,0.085299,0.050680,0.044451,-0.005670,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.025930
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641
...,...,...,...,...,...,...,...,...,...,...
437,0.041708,0.050680,0.019662,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207
438,-0.005515,0.050680,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018114,0.044485
439,0.041708,0.050680,-0.015906,0.017293,-0.037344,-0.013840,-0.024993,-0.011080,-0.046883,0.015491
440,-0.045472,-0.044642,0.039062,0.001215,0.016318,0.015283,-0.028674,0.026560,0.044529,-0.025930


In [8]:
df[target_name] = y

In [9]:
df

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,disease_progression
0,0.038076,0.050680,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.050680,0.044451,-0.005670,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.025930,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0
...,...,...,...,...,...,...,...,...,...,...,...
437,0.041708,0.050680,0.019662,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207,178.0
438,-0.005515,0.050680,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018114,0.044485,104.0
439,0.041708,0.050680,-0.015906,0.017293,-0.037344,-0.013840,-0.024993,-0.011080,-0.046883,0.015491,132.0
440,-0.045472,-0.044642,0.039062,0.001215,0.016318,0.015283,-0.028674,0.026560,0.044529,-0.025930,220.0


In [14]:
x = df.drop(columns=["disease_progression"])
y = df["disease_progression"]

In [33]:
x_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=23)

In [34]:
x_train

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
2,0.085299,0.050680,0.044451,-0.005670,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.025930
13,0.005383,0.050680,-0.001895,0.008101,-0.004321,-0.015719,-0.002903,-0.002592,0.038394,-0.013504
325,-0.001882,-0.044642,0.054152,-0.066506,0.072732,0.056619,-0.043401,0.084863,0.084492,0.048628
319,0.019913,-0.044642,0.004572,0.045972,-0.018080,-0.054549,0.063367,-0.039493,0.028658,0.061054
148,-0.060003,0.050680,0.015350,-0.019442,0.036958,0.048164,0.019187,-0.002592,-0.030748,-0.001078
...,...,...,...,...,...,...,...,...,...,...
31,-0.023677,-0.044642,-0.065486,-0.081413,-0.038720,-0.053610,0.059685,-0.076395,-0.037129,-0.042499
438,-0.005515,0.050680,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018114,0.044485
40,0.005383,0.050680,-0.008362,0.021872,0.054845,0.073215,-0.024993,0.034309,0.012551,0.094191
230,-0.038207,0.050680,0.071397,-0.057313,0.153914,0.155887,0.000779,0.071948,0.050281,0.069338


In [37]:
lr = LinearRegression()
lr.fit(x_train, y_train)


0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [38]:
lr.coef_

array([  -19.84569955,  -271.44799521,   518.60568284,   326.13921989,
       -1003.87022985,   624.04203978,   220.7339424 ,   263.93739547,
         829.85679767,    77.75019971])

In [39]:
lr.intercept_

np.float64(151.5282462062113)

In [40]:
y_pred = lr.predict(X_test)

In [41]:
y_pred

array([290.8985031 , 132.81832981, 140.44601181,  30.52576133,
       200.92486072, 198.95481117, 196.91371478, 147.93181701,
       213.17880041, 212.11885067, 136.77869205,  96.85057178,
       173.44867039,  85.40990027, 102.6441814 , 190.11087616,
       230.27392183, 188.42716944, 145.85718393,  76.25597227,
       289.33891569, 142.42898724, 157.45287569, 175.22511105,
       115.61830159, 151.32886756, 161.36115082, 156.88516338,
       293.08980029,  97.65390157, 165.43589399, 243.24452314,
       155.37196652,  86.43635045, 180.13404212, 228.21249797,
       136.36419614, 146.49716977, 189.16065615, 123.61635775,
       156.84652106, 256.71670786,  64.11134827, 148.91188505,
        74.5383303 , 256.93570828, 181.30611481,  49.44566501,
       188.65611663, 102.77129967, 158.28018226, 121.39943881,
       238.79858498, 183.78237578, 229.5481387 , 107.89634653,
       264.6432688 ,  79.13535336, 179.66252053, 240.73588818,
       191.26275668, 191.41644954, 124.6655909 , 182.17

In [42]:
y_test

114    258.0
261    103.0
193    116.0
110    104.0
357    221.0
       ...  
267    115.0
195    122.0
353    109.0
410    209.0
139    281.0
Name: disease_progression, Length: 89, dtype: float64

In [46]:
r2_score(y_test, y_pred)

0.4588389962630466

In [47]:
mean_absolute_error(y_test, y_pred)

44.63376977028737

In [48]:
mean_squared_error(y_test, y_pred)

3171.586619243448

In [49]:
root_mean_squared_error(y_test, y_pred)

56.31684134646978