In [1]:
import pandas as pd
import numpy as np

<h4 class="text-center"> Linear Model </h4>

In [2]:
from sklearn.linear_model import LinearRegression

In [3]:
#datasets
from sklearn.datasets import load_boston

In [4]:
boston = load_boston()

In [5]:
feature = boston.data

In [6]:
feature.shape

(506, 13)

In [7]:
target = boston.target

In [8]:
target.shape

(506,)

In [9]:
boston.feature_names

array(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',
       'TAX', 'PTRATIO', 'B', 'LSTAT'], dtype='<U7')

<span class="badge"> Linear Regression </span>

In [10]:
regression = LinearRegression()

In [11]:
regression.fit(feature,target)

LinearRegression()

In [14]:
regression.score(feature,target)

0.7406426641094094

In [15]:
##### Intercept
regression.intercept_

36.459488385089855

In [16]:
regression.coef_

array([-1.08011358e-01,  4.64204584e-02,  2.05586264e-02,  2.68673382e+00,
       -1.77666112e+01,  3.80986521e+00,  6.92224640e-04, -1.47556685e+00,
        3.06049479e-01, -1.23345939e-02, -9.52747232e-01,  9.31168327e-03,
       -5.24758378e-01])

<span class="badge"> Predict </span>

In [17]:
target[2]

34.7

In [18]:
regression.predict(feature)[2]

30.567596718601642

In [23]:
regression.coef_[2]*100

2.0558626367068915

<h4 class="text-center"> Effects of Feature </h4>

In [24]:
from sklearn.preprocessing import PolynomialFeatures

In [29]:
feature = boston.data[:,0:2]

In [30]:
# creating a interaction
interaction = PolynomialFeatures(degree=3, include_bias=False,interaction_only=True)

In [31]:
feature_interaction = interaction.fit_transform(feature)

In [32]:
feature_interaction.shape

(506, 3)

###### Linear Regression

In [33]:
inter_reg = LinearRegression()

In [34]:
inter_reg.fit(feature_interaction,target)

LinearRegression()

In [35]:
feature[0]

array([6.32e-03, 1.80e+01])

In [37]:
interaction_term = np.multiply(feature[:, 0], feature[:, 1])

In [39]:
interaction_term[0]

0.11376

In [40]:
feature_interaction[0]

array([6.3200e-03, 1.8000e+01, 1.1376e-01])

<h4 class="text-center"> Non-Linear </h4>

In [41]:
polynominal = PolynomialFeatures(degree=3,include_bias=False)

In [42]:
feature_poly = polynominal.fit_transform(feature)

In [43]:
feature_poly.shape

(506, 9)

In [44]:
non_reg = LinearRegression()

In [45]:
non_reg.fit(feature_poly,target)

LinearRegression()

In [46]:
feature[0]

array([6.32e-03, 1.80e+01])

In [48]:
feature[0]**2

array([3.99424e-05, 3.24000e+02])

In [49]:
feature[0]**3

array([2.52435968e-07, 5.83200000e+03])

In [47]:
feature_poly[0]

array([6.32000000e-03, 1.80000000e+01, 3.99424000e-05, 1.13760000e-01,
       3.24000000e+02, 2.52435968e-07, 7.18963200e-04, 2.04768000e+00,
       5.83200000e+03])

<h4 class=text-center> Reduce Variance </h4>

In [50]:
feature = boston.data

In [51]:
#### Library
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler

In [52]:
scaler = StandardScaler()

In [53]:
feature_scale = scaler.fit_transform(feature)

In [54]:
regression = Ridge(alpha=0.5)

In [55]:
regression.fit(feature_scale,target)

Ridge(alpha=0.5)

In [56]:
regression.alpha

0.5

In [57]:
regression.coef_

array([-0.92396151,  1.07393055,  0.12895159,  0.68346136, -2.0427575 ,
        2.67854971,  0.01627328, -3.09063352,  2.62636926, -2.04312573,
       -2.05646414,  0.8490591 , -3.73711409])

#### Selecting idle value for Alpha

In [58]:
from sklearn.linear_model import RidgeCV

In [59]:
ridge_cv = RidgeCV(alphas=[0.1,1.0,10.0])

In [60]:
ridge_cv.fit(feature_scale,target)

RidgeCV(alphas=array([ 0.1,  1. , 10. ]))

In [61]:
ridge_cv.alpha_

1.0

<h4 class="text-center"> Reducing number of feature </h4>

In [62]:
from sklearn.linear_model import Lasso 

In [64]:
lasso_reg = Lasso(alpha=0.5)

In [65]:
lasso_reg.fit(feature_scale,target)

Lasso(alpha=0.5)

In [68]:
lasso_reg.coef_

array([-0.11526463,  0.        , -0.        ,  0.39707879, -0.        ,
        2.97425861, -0.        , -0.17056942, -0.        , -0.        ,
       -1.59844856,  0.54313871, -3.66614361])

In [69]:
#we can extract the important features

In [70]:
boston.feature_names

array(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',
       'TAX', 'PTRATIO', 'B', 'LSTAT'], dtype='<U7')

In [73]:
pd.DataFrame(lasso_reg.coef_ , index =boston.feature_names,columns=['Coef'] )

Unnamed: 0,Coef
CRIM,-0.115265
ZN,0.0
INDUS,-0.0
CHAS,0.397079
NOX,-0.0
RM,2.974259
AGE,-0.0
DIS,-0.170569
RAD,-0.0
TAX,-0.0
