In [102]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error

# Load dataset 

In [103]:
diabetes = datasets.load_diabetes()

# Check information about our dataset

In [104]:
print(diabetes.keys())

dict_keys(['data', 'target', 'frame', 'DESCR', 'feature_names', 'data_filename', 'target_filename'])


In [105]:
print(diabetes.data)

[[ 0.03807591  0.05068012  0.06169621 ... -0.00259226  0.01990842
  -0.01764613]
 [-0.00188202 -0.04464164 -0.05147406 ... -0.03949338 -0.06832974
  -0.09220405]
 [ 0.08529891  0.05068012  0.04445121 ... -0.00259226  0.00286377
  -0.02593034]
 ...
 [ 0.04170844  0.05068012 -0.01590626 ... -0.01107952 -0.04687948
   0.01549073]
 [-0.04547248 -0.04464164  0.03906215 ...  0.02655962  0.04452837
  -0.02593034]
 [-0.04547248 -0.04464164 -0.0730303  ... -0.03949338 -0.00421986
   0.00306441]]


In [106]:
print(diabetes.DESCR)

.. _diabetes_dataset:

Diabetes dataset
----------------

Ten baseline variables, age, sex, body mass index, average blood
pressure, and six blood serum measurements were obtained for each of n =
442 diabetes patients, as well as the response of interest, a
quantitative measure of disease progression one year after baseline.

**Data Set Characteristics:**

  :Number of Instances: 442

  :Number of Attributes: First 10 columns are numeric predictive values

  :Target: Column 11 is a quantitative measure of disease progression one year after baseline

  :Attribute Information:
      - age     age in years
      - sex
      - bmi     body mass index
      - bp      average blood pressure
      - s1      tc, T-Cells (a type of white blood cells)
      - s2      ldl, low-density lipoproteins
      - s3      hdl, high-density lipoproteins
      - s4      tch, thyroid stimulating hormone
      - s5      ltg, lamotrigine
      - s6      glu, blood sugar level

Note: Each of these 10 feature va

In [107]:
diabetes_X = diabetes.data

# Now we are spliting the data : there are 442 records in our datasets so last 400 records we are use training purpose and remaining 42 records we are using testing purpose 

In [108]:
diabetes_X_train = diabetes_X[:-400]
diabetes_X_test = diabetes_X[-42:]

# Also we are splite the target values same as a variable (i.e., see we are use above cell same use here).

In [109]:
diabetes_Y_train = diabetes.target[:-400]
diabetes_Y_test = diabetes.target[-42:]

# Now we are going train our model


In [110]:
model = linear_model.LinearRegression()

In [111]:
model.fit(diabetes_X_train, diabetes_Y_train)

LinearRegression()

# Now our model is ready to predict

In [112]:
diabetes_Y_predict = model.predict(diabetes_X_test)

In [113]:
diabetes_Y_predict

array([184.74796412,  43.37516002, 135.67604559, 248.05369975,
       178.28818562, 338.62715705,  97.38275012,  91.41705164,
       205.73944159, 192.05748971, 204.14555995, 119.8121365 ,
       166.53618242,  76.51913935, 151.91108925, 168.10245276,
       172.75266968,  97.1337862 ,  40.75168968,  51.39473119,
        73.98085635, 205.33854373, 197.65476216, 189.60103104,
       178.92970212, 154.73824091, 126.65612443, 104.95476359,
       212.95461354,  82.77988861, 102.14290455, 126.16452412,
       162.57336124,  36.06243745,  61.14455114, 102.42963341,
        53.85469761, 195.12102733,  36.05115715,  81.79893282,
       237.12281813,  58.38356896])

# Now we are find Mean Squared Error: 
* MSE = actual - predicted and it's square

In [114]:
print("Mean Squared Error is:", mean_squared_error(diabetes_Y_test, diabetes_Y_predict))

Mean Squared Error is: 2353.662033786457


## find the weights or coefficent

In [115]:
print("Weight:", model.coef_)

Weight: [ -133.23501403  -147.26780714   398.2159149    391.41268408
 -1381.37761065   441.690436     572.86248014   709.53819614
  1458.66474463  -420.16344493]


In [116]:
print("The intercept is:", model.intercept_)

The intercept is: 145.9851967674698
