In [1]:
from sklearn.datasets import load_boston
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor

In [34]:
import numpy as np

In [3]:
import pandas as pd

In [2]:
data = load_boston()

In [74]:
df = pd.concat([pd.DataFrame(data.data, columns=data.feature_names), pd.Series(data.target, name='Target')], axis=1)

In [6]:
lin_reg = LinearRegression()
svr_reg = SVR()
rf_reg = RandomForestRegressor()
gbr_reg = GradientBoostingRegressor()
etr_reg = ExtraTreesRegressor()

In [7]:
lin_rfe = RFE(lin_reg)
svr_rfe = RFE(svr_reg)
rf_rfe = RFE(rf_reg)
gbr_rfe = RFE(gbr_reg)
etr_rfe = RFE(etr_reg)

In [8]:
X = df.drop('Target', axis=1).values
y = df.Target.values

In [10]:
lin_rfe.fit(X,y)

RFE(estimator=LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False),
  n_features_to_select=None, step=1, verbose=0)

In [12]:
rf_rfe.fit(X,y)

RFE(estimator=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
           oob_score=False, random_state=None, verbose=0, warm_start=False),
  n_features_to_select=None, step=1, verbose=0)

In [13]:
gbr_rfe.fit(X,y)

RFE(estimator=GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.1, loss='ls', max_depth=3, max_features=None,
             max_leaf_nodes=None, min_impurity_decrease=0.0,
             min_impurity_split=None, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=100, presort='auto', random_state=None,
             subsample=1.0, verbose=0, warm_start=False),
  n_features_to_select=None, step=1, verbose=0)

In [14]:
etr_rfe.fit(X,y)

RFE(estimator=ExtraTreesRegressor(bootstrap=False, criterion='mse', max_depth=None,
          max_features='auto', max_leaf_nodes=None,
          min_impurity_decrease=0.0, min_impurity_split=None,
          min_samples_leaf=1, min_samples_split=2,
          min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
          oob_score=False, random_state=None, verbose=0, warm_start=False),
  n_features_to_select=None, step=1, verbose=0)

In [16]:
lin_feature = lin_rfe.support_
rf_feature = rf_rfe.support_
gbr_feature = gbr_rfe.support_
etr_feature = etr_rfe.support_

In [18]:
print(lin_feature, rf_feature ,gbr_feature, etr_feature)

[False False False  True  True  True False  True False False  True False
  True] [ True False False False  True  True False  True False  True False False
  True] [ True False False False  True  True False  True False  True False False
  True] [False False False False  True  True False  True False  True  True False
  True]


In [21]:
df.iloc[:10,lin_feature]

Unnamed: 0,CHAS,NOX,RM,DIS,PTRATIO,LSTAT
0,0.0,0.538,6.575,4.09,15.3,4.98
1,0.0,0.469,6.421,4.9671,17.8,9.14
2,0.0,0.469,7.185,4.9671,17.8,4.03
3,0.0,0.458,6.998,6.0622,18.7,2.94
4,0.0,0.458,7.147,6.0622,18.7,5.33
5,0.0,0.458,6.43,6.0622,18.7,5.21
6,0.0,0.524,6.012,5.5605,15.2,12.43
7,0.0,0.524,6.172,5.9505,15.2,19.15
8,0.0,0.524,5.631,6.0821,15.2,29.93
9,0.0,0.524,6.004,6.5921,15.2,17.1


In [23]:
df.iloc[:10, rf_feature]

Unnamed: 0,CRIM,NOX,RM,DIS,TAX,LSTAT
0,0.00632,0.538,6.575,4.09,296.0,4.98
1,0.02731,0.469,6.421,4.9671,242.0,9.14
2,0.02729,0.469,7.185,4.9671,242.0,4.03
3,0.03237,0.458,6.998,6.0622,222.0,2.94
4,0.06905,0.458,7.147,6.0622,222.0,5.33
5,0.02985,0.458,6.43,6.0622,222.0,5.21
6,0.08829,0.524,6.012,5.5605,311.0,12.43
7,0.14455,0.524,6.172,5.9505,311.0,19.15
8,0.21124,0.524,5.631,6.0821,311.0,29.93
9,0.17004,0.524,6.004,6.5921,311.0,17.1


In [25]:
df.iloc[:10,gbr_feature]

Unnamed: 0,CRIM,NOX,RM,DIS,TAX,LSTAT
0,0.00632,0.538,6.575,4.09,296.0,4.98
1,0.02731,0.469,6.421,4.9671,242.0,9.14
2,0.02729,0.469,7.185,4.9671,242.0,4.03
3,0.03237,0.458,6.998,6.0622,222.0,2.94
4,0.06905,0.458,7.147,6.0622,222.0,5.33
5,0.02985,0.458,6.43,6.0622,222.0,5.21
6,0.08829,0.524,6.012,5.5605,311.0,12.43
7,0.14455,0.524,6.172,5.9505,311.0,19.15
8,0.21124,0.524,5.631,6.0821,311.0,29.93
9,0.17004,0.524,6.004,6.5921,311.0,17.1


In [27]:
df.iloc[:10, etr_feature]

Unnamed: 0,NOX,RM,DIS,TAX,PTRATIO,LSTAT
0,0.538,6.575,4.09,296.0,15.3,4.98
1,0.469,6.421,4.9671,242.0,17.8,9.14
2,0.469,7.185,4.9671,242.0,17.8,4.03
3,0.458,6.998,6.0622,222.0,18.7,2.94
4,0.458,7.147,6.0622,222.0,18.7,5.33
5,0.458,6.43,6.0622,222.0,18.7,5.21
6,0.524,6.012,5.5605,311.0,15.2,12.43
7,0.524,6.172,5.9505,311.0,15.2,19.15
8,0.524,5.631,6.0821,311.0,15.2,29.93
9,0.524,6.004,6.5921,311.0,15.2,17.1


In [75]:
RM = df['RM'].copy()

In [79]:
RM.isnull().sum()

0

In [77]:
df.loc[np.arange(1, len(RM), 34), 'RM'] = np.NaN

In [78]:
df.isnull().sum()

CRIM        0
ZN          0
INDUS       0
CHAS        0
NOX         0
RM         15
AGE         0
DIS         0
RAD         0
TAX         0
PTRATIO     0
B           0
LSTAT       0
Target      0
dtype: int64

In [84]:
RM_new = df['RM'].interpolate(method= 'linear',limit_direction='both')

In [91]:
RM_cube = df['RM'].interpolate(method= 'cubic',limit_direction='both')

In [93]:
RM_quad = df['RM'].interpolate(method= 'quadratic',limit_direction='both')

In [101]:
new_df = pd.concat([RM, RM_new, RM_cube, RM_quad], axis=1)

In [102]:
newer_df = new_df.loc[np.arange(1, len(RM), 34)]

In [104]:
newer_df.columns = ['RM', 'RM_lin', 'RM_cube', 'RM_quad']

In [105]:
newer_df.columns

Index(['RM', 'RM_lin', 'RM_cube', 'RM_quad'], dtype='object')

In [96]:
from sklearn.metrics import r2_score

In [107]:
r2_score(newer_df['RM'], newer_df['RM_lin'])

-0.8076132178375257

In [108]:
r2_score(newer_df['RM'], newer_df['RM_cube'])

-1.534296898877245

In [110]:
r2_score(newer_df['RM'], newer_df['RM_quad'])

-1.341232127630633