## Creating and saving the model

In [44]:
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [45]:
df = load_diabetes()
df

{'data': array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
          0.01990842, -0.01764613],
        [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
         -0.06832974, -0.09220405],
        [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
          0.00286377, -0.02593034],
        ...,
        [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
         -0.04687948,  0.01549073],
        [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
          0.04452837, -0.02593034],
        [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
         -0.00421986,  0.00306441]]),
 'target': array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
         69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
         68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
         87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
        259.,  53., 190., 142.,  75., 142., 155., 225.,  59

In [46]:
df.data

array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990842, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06832974, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286377, -0.02593034],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04687948,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452837, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00421986,  0.00306441]])

In [47]:
df.target

array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
        69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
        68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
        87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
       259.,  53., 190., 142.,  75., 142., 155., 225.,  59., 104., 182.,
       128.,  52.,  37., 170., 170.,  61., 144.,  52., 128.,  71., 163.,
       150.,  97., 160., 178.,  48., 270., 202., 111.,  85.,  42., 170.,
       200., 252., 113., 143.,  51.,  52., 210.,  65., 141.,  55., 134.,
        42., 111.,  98., 164.,  48.,  96.,  90., 162., 150., 279.,  92.,
        83., 128., 102., 302., 198.,  95.,  53., 134., 144., 232.,  81.,
       104.,  59., 246., 297., 258., 229., 275., 281., 179., 200., 200.,
       173., 180.,  84., 121., 161.,  99., 109., 115., 268., 274., 158.,
       107.,  83., 103., 272.,  85., 280., 336., 281., 118., 317., 235.,
        60., 174., 259., 178., 128.,  96., 126., 28

In [48]:
X_train,X_test,y_train,y_test = train_test_split(df.data,df.target,test_size=0.3,random_state=10)

In [49]:
model = LinearRegression()

In [50]:
model.fit(X_train,y_train)

LinearRegression()

In [51]:
model.predict(X_test)

array([150.03846506, 202.69053028, 178.00715834,  75.30072575,
       170.32549522, 124.96422162, 122.39159421, 261.19763849,
        88.2191457 ,  80.64779219, 125.96301002, 130.48124638,
       166.0638621 ,  95.09350669,  53.16039264, 221.24978857,
       143.70586289, 116.18365738, 199.70613438,  77.3277389 ,
       192.23987312, 240.94383811,  69.58276641, 214.82798147,
        58.59702147, 153.94550348, 162.81830163, 154.40508044,
       158.67955936, 111.81226551, 276.61815706, 171.85438562,
        61.17202334, 155.15410088, 209.51310536, 195.86914234,
        69.16414245, 187.97325555, 290.78564404, 195.70261987,
       205.16116409, 146.51668683, 219.27958274, 114.58469489,
        80.34908418,  96.11243005,  75.8915491 ,  77.83746401,
       288.77318205, 142.65760852,  93.03432301, 148.15636814,
       106.84266923, 226.55469817, 246.13179601,  77.41387386,
       114.55836713, 131.20306002, 200.895207  , 104.64056585,
       171.84079377, 103.4981139 , 126.53282662,  89.96

In [52]:
score = model.score(X_test,y_test)
round(score,2)

0.47

## Saving the model

In [53]:
import pickle

In [54]:
#creating a pickle file
with open("pickle_diabetes_model.pkl","wb") as file:
    pickle.dump(model,file)

In [55]:
#open a pickle file
with open("pickle_diabetes_model.pkl","rb") as f:
    pickled_model_file = pickle.load(f)

In [56]:
pickled_model_file.score(X_test,y_test)

0.47465388057520974

In [57]:
pickled_model_file.predict([[20,1,15,1,5,6,8,9,7,5]])

array([17993.30638745])