In [1]:
import pandas as pd
import warnings

warnings.filterwarnings('ignore')
house_price_df=pd.read_csv('House_price_Data.csv')# stores the dataset in a dataframe

y=house_price_df['SalePrice']

house_price_df.drop(columns='SalePrice',inplace=True)

In [2]:
na_df=pd.DataFrame({'column_name':house_price_df.columns,
                    'na_count':house_price_df.isnull().sum(),
                    'na_percentage':house_price_df.isnull().sum()/house_price_df.shape[0]*100})

na_df.sort_values(by='na_percentage',ascending=False)

Unnamed: 0,column_name,na_count,na_percentage
PoolQC,PoolQC,1453,99.520548
MiscFeature,MiscFeature,1406,96.301370
Alley,Alley,1369,93.767123
Fence,Fence,1179,80.753425
FireplaceQu,FireplaceQu,690,47.260274
...,...,...,...
TotalBsmtSF,TotalBsmtSF,0,0.000000
Heating,Heating,0,0.000000
MSSubClass,MSSubClass,0,0.000000
CentralAir,CentralAir,0,0.000000


In [3]:
cols_to_drop=list(na_df[na_df['na_percentage']>50].index)#dropping the columns with higher NA percentage

for col in house_price_df.columns:
    if len(house_price_df[col].unique())==1 or len(house_price_df[col].unique())==house_price_df.shape[0]:
        cols_to_drop.append(col)
        
cols_to_drop

['Alley', 'PoolQC', 'Fence', 'MiscFeature', 'Id']

# Dropping unneccesary columns

In [4]:
house_price_df.drop(columns=cols_to_drop,inplace=True)

# train-test split

In [5]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(house_price_df,y,test_size=0.2,random_state=42)

In [6]:
num_cols=[col for col in house_price_df.columns if house_price_df[col].dtype=='int64' or house_price_df[col].dtype=='float64']

cat_cols=[col for col in house_price_df.columns if house_price_df[col].dtype=='object']

# Missing value imputation

In [7]:


for col in num_cols:
    x_train[col]=x_train[col].fillna(x_train[col].mean())
    x_test[col]=x_test[col].fillna(x_train[col].mean())

In [8]:
for col in cat_cols:
    x_train[col]=x_train[col].fillna(x_train[col].mode()[0])
    x_test[col]=x_test[col].fillna(x_train[col].mode()[0])

In [9]:
house_price_df['SaleType'].mode()[0]

'WD'

# scaling the continuous variables

In [10]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler,StandardScaler


In [11]:
minmaxscaler=MinMaxScaler()

for col in num_cols:
    x_train[col]=minmaxscaler.fit_transform(np.array(x_train[col]).reshape(-1,1))
    x_test[col]=minmaxscaler.transform(np.array(x_test[col]).reshape(-1,1))


In [12]:
x_train

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,LotShape,LandContour,Utilities,LotConfig,LandSlope,...,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SaleType,SaleCondition
254,0.000000,RL,0.167808,0.033186,Pave,Reg,Lvl,AllPub,Inside,Gtl,...,0.000000,0.000000,0.0,0.0,0.0,0.0,0.454545,1.00,WD,Normal
1066,0.235294,RL,0.130137,0.030555,Pave,IR1,Lvl,AllPub,Inside,Gtl,...,0.073126,0.000000,0.0,0.0,0.0,0.0,0.363636,0.75,WD,Normal
638,0.058824,RL,0.157534,0.034948,Pave,Reg,Lvl,AllPub,Inside,Gtl,...,0.000000,0.297101,0.0,0.0,0.0,0.0,0.363636,0.50,WD,Normal
799,0.176471,RL,0.133562,0.027577,Pave,Reg,Lvl,AllPub,Corner,Gtl,...,0.000000,0.478261,0.0,0.0,0.0,0.0,0.454545,0.25,WD,Normal
380,0.176471,RL,0.099315,0.017294,Pave,Reg,Lvl,AllPub,Inside,Gtl,...,0.000000,0.438406,0.0,0.0,0.0,0.0,0.363636,1.00,WD,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1095,0.000000,RL,0.195205,0.037472,Pave,IR1,Lvl,AllPub,Inside,Gtl,...,0.040219,0.000000,0.0,0.0,0.0,0.0,0.181818,0.25,WD,Normal
1130,0.176471,RL,0.150685,0.030400,Pave,Reg,Lvl,AllPub,Inside,Gtl,...,0.080439,0.000000,0.0,0.0,0.0,0.0,1.000000,0.75,WD,Normal
1294,0.000000,RL,0.133562,0.032120,Pave,Reg,Lvl,AllPub,Inside,Gtl,...,0.000000,0.000000,0.0,0.0,0.0,0.0,0.272727,0.00,WD,Normal
860,0.176471,RL,0.116438,0.029643,Pave,Reg,Lvl,AllPub,Corner,Gtl,...,0.438757,0.000000,0.0,0.0,0.0,0.0,0.454545,0.25,WD,Normal


# One _Hot encoding the categorical variables

In [13]:
oe_train_df=pd.get_dummies(x_train[cat_cols])
oe_test_df=pd.get_dummies(x_test[cat_cols])

In [14]:
x_train_oe,x_test_oe=oe_train_df.align(oe_test_df,axis=1,join='inner',fill_value=0)

In [15]:
x_train_final=pd.concat([x_train_oe,x_train[num_cols]],axis=1)
x_test_final=pd.concat([x_test_oe,x_test[num_cols]],axis=1)

In [16]:
x_train_final

Unnamed: 0,MSZoning_C (all),MSZoning_FV,MSZoning_RH,MSZoning_RL,MSZoning_RM,Street_Grvl,Street_Pave,LotShape_IR1,LotShape_IR2,LotShape_IR3,...,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold
254,0,0,0,1,0,0,1,0,0,0,...,0.207334,0.291715,0.000000,0.000000,0.0,0.0,0.0,0.0,0.454545,1.00
1066,0,0,0,1,0,0,1,1,0,0,...,0.267983,0.000000,0.073126,0.000000,0.0,0.0,0.0,0.0,0.363636,0.75
638,0,0,0,1,0,0,1,0,0,0,...,0.000000,0.382730,0.000000,0.297101,0.0,0.0,0.0,0.0,0.363636,0.50
799,0,0,0,1,0,0,1,0,0,0,...,0.169252,0.000000,0.000000,0.478261,0.0,0.0,0.0,0.0,0.454545,0.25
380,0,0,0,1,0,0,1,0,0,0,...,0.217207,0.000000,0.000000,0.438406,0.0,0.0,0.0,0.0,0.363636,1.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1095,0,0,0,1,0,0,1,1,0,0,...,0.310296,0.000000,0.040219,0.000000,0.0,0.0,0.0,0.0,0.181818,0.25
1130,0,0,0,1,0,0,1,0,0,0,...,0.406206,0.502917,0.080439,0.000000,0.0,0.0,0.0,0.0,1.000000,0.75
1294,0,0,0,1,0,0,1,0,0,0,...,0.403385,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.272727,0.00
860,0,0,0,1,0,0,1,0,0,0,...,0.152327,0.000000,0.438757,0.000000,0.0,0.0,0.0,0.0,0.454545,0.25


In [17]:
x_train_final.shape
x_test_final.shape

(292, 237)

# Training a Linear Regression Model

In [18]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score,mean_squared_error

linreg=LinearRegression()

linreg.fit(x_train_final,y_train)

predictions=linreg.predict(x_test_final)

In [19]:
r2_score(y_test,predictions)# Evaluating the performance of the trained model
mean_squared_error(y_test,predictions)

959520861.6335616

# Lasso regression model

In [20]:
from sklearn.linear_model import Lasso


    
reg=Lasso(alpha=0.01)
reg.fit(x_train_final,y_train)

print('Lasso Regression: R^2 score on training set', reg.score(x_train_final,y_train)*100)

print('Lasso Regression: R^2 score on test set', reg.score(x_test_final, y_test)*100)



Lasso Regression: R^2 score on training set 90.81050610587181
Lasso Regression: R^2 score on test set 87.46514063526773


In [21]:
from sklearn.linear_model import Lasso

def lasso(alphas):
    df=pd.DataFrame()
    df['Feature']=x_train_final.columns
    
    for alpha in alphas:
        lasso=Lasso(alpha=alpha)
        lasso.fit(x_train_final,y_train)
        col_name='Alpha=%f' % alpha
        df[col_name]=lasso.coef_
    return df
lasso([3,5,6])
        

Unnamed: 0,Feature,Alpha=3.000000,Alpha=5.000000,Alpha=6.000000
0,MSZoning_C (all),-24503.299772,-22961.218399,-21956.977882
1,MSZoning_FV,10110.472520,9815.250336,9777.037738
2,MSZoning_RH,-2995.229445,-2435.678920,-2091.781280
3,MSZoning_RL,1128.367226,1331.803612,1448.717349
4,MSZoning_RM,-0.000000,0.000000,-0.000000
...,...,...,...,...
232,ScreenPorch,18426.235192,18231.713587,18303.944636
233,PoolArea,20216.351059,19219.224917,19130.333960
234,MiscVal,-1200.591938,-0.000000,-0.000000
235,MoSold,-2834.083633,-2713.238308,-2636.319900


# Decision tree regressor

In [22]:
from sklearn.tree import DecisionTreeRegressor

dtr=DecisionTreeRegressor(random_state=42,max_depth=7,max_leaf_nodes=15)

dtr.fit(x_train_final,y_train)

dtr.predict(x_test_final)

print('Traning score:',dtr.score(x_train_final,y_train))

print('Test score:',dtr.score(x_test_final,y_test))

Traning score: 0.8097105670248076
Test score: 0.8174772637665451


# hyperparametertunning  for  DecisionTreeRegressor

In [23]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

hyp_dict={'max_depth':[8,10,12,15,18],
         'min_samples_split':[8,10,14],
         'max_leaf_nodes':[8,9,10,12]
         }


hyp_dict1={'max_depth':[8,10,12,15,18],
         'min_samples_split':[8,10,14],
         'max_leaf_nodes':[8,9,10,12]
         }

gcv=GridSearchCV(estimator=dtr,param_grid=hyp_dict,cv=10,verbose=42)
rcv=RandomizedSearchCV(estimator=dtr,param_distributions=hyp_dict,cv=10,n_iter=20)

gcv.fit(x_train_final,y_train)
rcv.fit(x_train_final,y_train)

gcv.best_params_
gcv.best_score_

rcv.best_params_
rcv.best_score_

dtc_gcv_pred=gcv.predict(x_test_final)
dtc_rcv_pred=rcv.predict(x_test_final)


dtc_gcv_pred=gcv.predict(x_test_final)
dtc_rcv_pred=rcv.predict(x_test_final)

Fitting 10 folds for each of 60 candidates, totalling 600 fits
[CV] max_depth=8, max_leaf_nodes=8, min_samples_split=8 ..............
[CV]  max_depth=8, max_leaf_nodes=8, min_samples_split=8, score=0.728, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=8, min_samples_split=8 ..............
[CV]  max_depth=8, max_leaf_nodes=8, min_samples_split=8, score=0.660, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=8, min_samples_split=8 ..............
[CV]  max_depth=8, max_leaf_nodes=8, min_samples_split=8, score=0.672, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=8, min_samples_split=8 ..............
[CV]  max_depth=8, max_leaf_nodes=8, min_samples_split=8, score=0.538, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=8, min_samples_split=8 ..............
[CV]  max_depth=8, max_leaf_nodes=8, min_samples_split=8, score=0.636, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=8, min_samples_split=8 ..............
[CV]  max_depth=8, max_leaf_nodes=8, min_samples_split=8, score=0.679, total=   0.0s
[

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.1s remaining:    0.0s



[CV] max_depth=8, max_leaf_nodes=8, min_samples_split=8 ..............
[CV]  max_depth=8, max_leaf_nodes=8, min_samples_split=8, score=0.673, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=8, min_samples_split=10 .............
[CV]  max_depth=8, max_leaf_nodes=8, min_samples_split=10, score=0.728, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=8, min_samples_split=10 .............
[CV]  max_depth=8, max_leaf_nodes=8, min_samples_split=10, score=0.660, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=8, min_samples_split=10 .............
[CV]  max_depth=8, max_leaf_nodes=8, min_samples_split=10, score=0.672, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=8, min_samples_split=10 .............
[CV]  max_depth=8, max_leaf_nodes=8, min_samples_split=10, score=0.538, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=8, min_samples_split=10 .............
[CV]  max_depth=8, max_leaf_nodes=8, min_samples_split=10, score=0.636, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=8, min_samples_split=10 .

[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  11 out of  11 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  13 out of  13 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  14 out of  14 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  16 out of  16 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  17 out of  17 | elapsed:    0.3s remaining:    0.0s


[CV]  max_depth=8, max_leaf_nodes=8, min_samples_split=10, score=0.679, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=8, min_samples_split=10 .............
[CV]  max_depth=8, max_leaf_nodes=8, min_samples_split=10, score=0.639, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=8, min_samples_split=10 .............
[CV]  max_depth=8, max_leaf_nodes=8, min_samples_split=10, score=0.708, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=8, min_samples_split=10 .............
[CV]  max_depth=8, max_leaf_nodes=8, min_samples_split=10, score=0.627, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=8, min_samples_split=10 .............
[CV]  max_depth=8, max_leaf_nodes=8, min_samples_split=10, score=0.673, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=8, min_samples_split=14 .............
[CV]  max_depth=8, max_leaf_nodes=8, min_samples_split=14, score=0.728, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=8, min_samples_split=14 .............
[CV]  max_depth=8, max_leaf_nodes=8, min_samples_split=14,

[Parallel(n_jobs=1)]: Done  18 out of  18 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  19 out of  19 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  20 out of  20 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  21 out of  21 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  22 out of  22 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  23 out of  23 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  24 out of  24 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  25 out of  25 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  26 out of  26 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  28 out of  28 | elapsed:    0.5s remaining:    0.0s


[CV]  max_depth=8, max_leaf_nodes=8, min_samples_split=14, score=0.708, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=8, min_samples_split=14 .............
[CV]  max_depth=8, max_leaf_nodes=8, min_samples_split=14, score=0.627, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=8, min_samples_split=14 .............
[CV]  max_depth=8, max_leaf_nodes=8, min_samples_split=14, score=0.673, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=9, min_samples_split=8 ..............
[CV]  max_depth=8, max_leaf_nodes=9, min_samples_split=8, score=0.748, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=9, min_samples_split=8 ..............
[CV]  max_depth=8, max_leaf_nodes=9, min_samples_split=8, score=0.475, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=9, min_samples_split=8 ..............
[CV]  max_depth=8, max_leaf_nodes=9, min_samples_split=8, score=0.684, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=9, min_samples_split=8 ..............
[CV]  max_depth=8, max_leaf_nodes=9, min_samples_split=8, sco

[Parallel(n_jobs=1)]: Done  29 out of  29 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:    0.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  31 out of  31 | elapsed:    0.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  32 out of  32 | elapsed:    0.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  33 out of  33 | elapsed:    0.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  34 out of  34 | elapsed:    0.7s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  35 out of  35 | elapsed:    0.7s remaining:    0.0s



[CV] max_depth=8, max_leaf_nodes=9, min_samples_split=8 ..............
[CV]  max_depth=8, max_leaf_nodes=9, min_samples_split=8, score=0.706, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=9, min_samples_split=8 ..............
[CV]  max_depth=8, max_leaf_nodes=9, min_samples_split=8, score=0.654, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=9, min_samples_split=8 ..............
[CV]  max_depth=8, max_leaf_nodes=9, min_samples_split=8, score=0.724, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=9, min_samples_split=8 ..............
[CV]  max_depth=8, max_leaf_nodes=9, min_samples_split=8, score=0.651, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=9, min_samples_split=8 ..............
[CV]  max_depth=8, max_leaf_nodes=9, min_samples_split=8, score=0.667, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=9, min_samples_split=10 .............
[CV]  max_depth=8, max_leaf_nodes=9, min_samples_split=10, score=0.748, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=9, min_samples_split=10 .....

[Parallel(n_jobs=1)]: Done  36 out of  36 | elapsed:    0.7s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  37 out of  37 | elapsed:    0.7s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  38 out of  38 | elapsed:    0.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  39 out of  39 | elapsed:    0.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:    0.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  41 out of  41 | elapsed:    0.8s remaining:    0.0s



[CV] max_depth=8, max_leaf_nodes=9, min_samples_split=10 .............
[CV]  max_depth=8, max_leaf_nodes=9, min_samples_split=10, score=0.543, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=9, min_samples_split=10 .............
[CV]  max_depth=8, max_leaf_nodes=9, min_samples_split=10, score=0.655, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=9, min_samples_split=10 .............
[CV]  max_depth=8, max_leaf_nodes=9, min_samples_split=10, score=0.706, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=9, min_samples_split=10 .............
[CV]  max_depth=8, max_leaf_nodes=9, min_samples_split=10, score=0.654, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=9, min_samples_split=10 .............
[CV]  max_depth=8, max_leaf_nodes=9, min_samples_split=10, score=0.724, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=9, min_samples_split=10 .............
[CV]  max_depth=8, max_leaf_nodes=9, min_samples_split=10, score=0.651, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=9, min_samples_split=10 

[CV]  max_depth=8, max_leaf_nodes=12, min_samples_split=10, score=0.582, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=12, min_samples_split=10 ............
[CV]  max_depth=8, max_leaf_nodes=12, min_samples_split=10, score=0.665, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=12, min_samples_split=10 ............
[CV]  max_depth=8, max_leaf_nodes=12, min_samples_split=10, score=0.707, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=12, min_samples_split=10 ............
[CV]  max_depth=8, max_leaf_nodes=12, min_samples_split=10, score=0.625, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=12, min_samples_split=10 ............
[CV]  max_depth=8, max_leaf_nodes=12, min_samples_split=10, score=0.724, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=12, min_samples_split=10 ............
[CV]  max_depth=8, max_leaf_nodes=12, min_samples_split=10, score=0.548, total=   0.0s
[CV] max_depth=8, max_leaf_nodes=12, min_samples_split=10 ............
[CV]  max_depth=8, max_leaf_nodes=12, min_samples_sp

[CV]  max_depth=10, max_leaf_nodes=9, min_samples_split=8, score=0.654, total=   0.0s
[CV] max_depth=10, max_leaf_nodes=9, min_samples_split=8 .............
[CV]  max_depth=10, max_leaf_nodes=9, min_samples_split=8, score=0.724, total=   0.0s
[CV] max_depth=10, max_leaf_nodes=9, min_samples_split=8 .............
[CV]  max_depth=10, max_leaf_nodes=9, min_samples_split=8, score=0.651, total=   0.0s
[CV] max_depth=10, max_leaf_nodes=9, min_samples_split=8 .............
[CV]  max_depth=10, max_leaf_nodes=9, min_samples_split=8, score=0.667, total=   0.0s
[CV] max_depth=10, max_leaf_nodes=9, min_samples_split=10 ............
[CV]  max_depth=10, max_leaf_nodes=9, min_samples_split=10, score=0.748, total=   0.0s
[CV] max_depth=10, max_leaf_nodes=9, min_samples_split=10 ............
[CV]  max_depth=10, max_leaf_nodes=9, min_samples_split=10, score=0.475, total=   0.0s
[CV] max_depth=10, max_leaf_nodes=9, min_samples_split=10 ............
[CV]  max_depth=10, max_leaf_nodes=9, min_samples_split=

[CV]  max_depth=10, max_leaf_nodes=12, min_samples_split=8, score=0.665, total=   0.0s
[CV] max_depth=10, max_leaf_nodes=12, min_samples_split=8 ............
[CV]  max_depth=10, max_leaf_nodes=12, min_samples_split=8, score=0.502, total=   0.0s
[CV] max_depth=10, max_leaf_nodes=12, min_samples_split=8 ............
[CV]  max_depth=10, max_leaf_nodes=12, min_samples_split=8, score=0.704, total=   0.0s
[CV] max_depth=10, max_leaf_nodes=12, min_samples_split=8 ............
[CV]  max_depth=10, max_leaf_nodes=12, min_samples_split=8, score=0.582, total=   0.0s
[CV] max_depth=10, max_leaf_nodes=12, min_samples_split=8 ............
[CV]  max_depth=10, max_leaf_nodes=12, min_samples_split=8, score=0.665, total=   0.0s
[CV] max_depth=10, max_leaf_nodes=12, min_samples_split=8 ............
[CV]  max_depth=10, max_leaf_nodes=12, min_samples_split=8, score=0.707, total=   0.0s
[CV] max_depth=10, max_leaf_nodes=12, min_samples_split=8 ............
[CV]  max_depth=10, max_leaf_nodes=12, min_samples_s

[CV]  max_depth=12, max_leaf_nodes=8, min_samples_split=14, score=0.627, total=   0.0s
[CV] max_depth=12, max_leaf_nodes=8, min_samples_split=14 ............
[CV]  max_depth=12, max_leaf_nodes=8, min_samples_split=14, score=0.673, total=   0.0s
[CV] max_depth=12, max_leaf_nodes=9, min_samples_split=8 .............
[CV]  max_depth=12, max_leaf_nodes=9, min_samples_split=8, score=0.748, total=   0.0s
[CV] max_depth=12, max_leaf_nodes=9, min_samples_split=8 .............
[CV]  max_depth=12, max_leaf_nodes=9, min_samples_split=8, score=0.475, total=   0.0s
[CV] max_depth=12, max_leaf_nodes=9, min_samples_split=8 .............
[CV]  max_depth=12, max_leaf_nodes=9, min_samples_split=8, score=0.684, total=   0.0s
[CV] max_depth=12, max_leaf_nodes=9, min_samples_split=8 .............
[CV]  max_depth=12, max_leaf_nodes=9, min_samples_split=8, score=0.543, total=   0.0s
[CV] max_depth=12, max_leaf_nodes=9, min_samples_split=8 .............
[CV]  max_depth=12, max_leaf_nodes=9, min_samples_split=

[CV]  max_depth=12, max_leaf_nodes=10, min_samples_split=14, score=0.661, total=   0.0s
[CV] max_depth=12, max_leaf_nodes=10, min_samples_split=14 ...........
[CV]  max_depth=12, max_leaf_nodes=10, min_samples_split=14, score=0.486, total=   0.0s
[CV] max_depth=12, max_leaf_nodes=10, min_samples_split=14 ...........
[CV]  max_depth=12, max_leaf_nodes=10, min_samples_split=14, score=0.681, total=   0.0s
[CV] max_depth=12, max_leaf_nodes=10, min_samples_split=14 ...........
[CV]  max_depth=12, max_leaf_nodes=10, min_samples_split=14, score=0.602, total=   0.0s
[CV] max_depth=12, max_leaf_nodes=10, min_samples_split=14 ...........
[CV]  max_depth=12, max_leaf_nodes=10, min_samples_split=14, score=0.650, total=   0.0s
[CV] max_depth=12, max_leaf_nodes=10, min_samples_split=14 ...........
[CV]  max_depth=12, max_leaf_nodes=10, min_samples_split=14, score=0.695, total=   0.0s
[CV] max_depth=12, max_leaf_nodes=10, min_samples_split=14 ...........
[CV]  max_depth=12, max_leaf_nodes=10, min_sam

[CV] max_depth=15, max_leaf_nodes=8, min_samples_split=10 ............
[CV]  max_depth=15, max_leaf_nodes=8, min_samples_split=10, score=0.673, total=   0.0s
[CV] max_depth=15, max_leaf_nodes=8, min_samples_split=14 ............
[CV]  max_depth=15, max_leaf_nodes=8, min_samples_split=14, score=0.728, total=   0.0s
[CV] max_depth=15, max_leaf_nodes=8, min_samples_split=14 ............
[CV]  max_depth=15, max_leaf_nodes=8, min_samples_split=14, score=0.660, total=   0.0s
[CV] max_depth=15, max_leaf_nodes=8, min_samples_split=14 ............
[CV]  max_depth=15, max_leaf_nodes=8, min_samples_split=14, score=0.672, total=   0.0s
[CV] max_depth=15, max_leaf_nodes=8, min_samples_split=14 ............
[CV]  max_depth=15, max_leaf_nodes=8, min_samples_split=14, score=0.538, total=   0.0s
[CV] max_depth=15, max_leaf_nodes=8, min_samples_split=14 ............
[CV]  max_depth=15, max_leaf_nodes=8, min_samples_split=14, score=0.636, total=   0.0s
[CV] max_depth=15, max_leaf_nodes=8, min_samples_spl

[CV]  max_depth=15, max_leaf_nodes=10, min_samples_split=10, score=0.643, total=   0.0s
[CV] max_depth=15, max_leaf_nodes=10, min_samples_split=10 ...........
[CV]  max_depth=15, max_leaf_nodes=10, min_samples_split=10, score=0.666, total=   0.0s
[CV] max_depth=15, max_leaf_nodes=10, min_samples_split=14 ...........
[CV]  max_depth=15, max_leaf_nodes=10, min_samples_split=14, score=0.661, total=   0.0s
[CV] max_depth=15, max_leaf_nodes=10, min_samples_split=14 ...........
[CV]  max_depth=15, max_leaf_nodes=10, min_samples_split=14, score=0.486, total=   0.0s
[CV] max_depth=15, max_leaf_nodes=10, min_samples_split=14 ...........
[CV]  max_depth=15, max_leaf_nodes=10, min_samples_split=14, score=0.681, total=   0.0s
[CV] max_depth=15, max_leaf_nodes=10, min_samples_split=14 ...........
[CV]  max_depth=15, max_leaf_nodes=10, min_samples_split=14, score=0.602, total=   0.0s
[CV] max_depth=15, max_leaf_nodes=10, min_samples_split=14 ...........
[CV]  max_depth=15, max_leaf_nodes=10, min_sam

[CV]  max_depth=18, max_leaf_nodes=8, min_samples_split=10, score=0.672, total=   0.0s
[CV] max_depth=18, max_leaf_nodes=8, min_samples_split=10 ............
[CV]  max_depth=18, max_leaf_nodes=8, min_samples_split=10, score=0.538, total=   0.0s
[CV] max_depth=18, max_leaf_nodes=8, min_samples_split=10 ............
[CV]  max_depth=18, max_leaf_nodes=8, min_samples_split=10, score=0.636, total=   0.0s
[CV] max_depth=18, max_leaf_nodes=8, min_samples_split=10 ............
[CV]  max_depth=18, max_leaf_nodes=8, min_samples_split=10, score=0.679, total=   0.0s
[CV] max_depth=18, max_leaf_nodes=8, min_samples_split=10 ............
[CV]  max_depth=18, max_leaf_nodes=8, min_samples_split=10, score=0.639, total=   0.0s
[CV] max_depth=18, max_leaf_nodes=8, min_samples_split=10 ............
[CV]  max_depth=18, max_leaf_nodes=8, min_samples_split=10, score=0.708, total=   0.0s
[CV] max_depth=18, max_leaf_nodes=8, min_samples_split=10 ............
[CV]  max_depth=18, max_leaf_nodes=8, min_samples_sp

[CV]  max_depth=18, max_leaf_nodes=10, min_samples_split=8, score=0.727, total=   0.0s
[CV] max_depth=18, max_leaf_nodes=10, min_samples_split=8 ............
[CV]  max_depth=18, max_leaf_nodes=10, min_samples_split=8, score=0.643, total=   0.0s
[CV] max_depth=18, max_leaf_nodes=10, min_samples_split=8 ............
[CV]  max_depth=18, max_leaf_nodes=10, min_samples_split=8, score=0.666, total=   0.0s
[CV] max_depth=18, max_leaf_nodes=10, min_samples_split=10 ...........
[CV]  max_depth=18, max_leaf_nodes=10, min_samples_split=10, score=0.661, total=   0.0s
[CV] max_depth=18, max_leaf_nodes=10, min_samples_split=10 ...........
[CV]  max_depth=18, max_leaf_nodes=10, min_samples_split=10, score=0.486, total=   0.0s
[CV] max_depth=18, max_leaf_nodes=10, min_samples_split=10 ...........
[CV]  max_depth=18, max_leaf_nodes=10, min_samples_split=10, score=0.681, total=   0.0s
[CV] max_depth=18, max_leaf_nodes=10, min_samples_split=10 ...........
[CV]  max_depth=18, max_leaf_nodes=10, min_sample

[Parallel(n_jobs=1)]: Done 600 out of 600 | elapsed:   13.9s finished


In [24]:
from sklearn.metrics import r2_score

r2_score(y_test,dtc_gcv_pred)

0.7779549268414158

In [25]:
r2_score(y_test,dtc_rcv_pred)

0.7779549268414158

# stochastic gradient descent

In [26]:
from sklearn.linear_model import SGDRegressor
sgd=SGDRegressor(loss='squared_loss',max_iter=1000,eta0=0.0001,learning_rate='invscaling',warm_start=True,verbose=3)

sgd.fit(x_train_final,y_train)

-- Epoch 1
Norm: 18042.32, NNZs: 237, Bias: 3303.313812, T: 1168, Avg. loss: 10154875611.621038
Total training time: 0.00 seconds.
-- Epoch 2
Norm: 24371.03, NNZs: 237, Bias: 4444.981764, T: 2336, Avg. loss: 4647563033.535912
Total training time: 0.00 seconds.
-- Epoch 3
Norm: 27758.46, NNZs: 237, Bias: 5041.929817, T: 3504, Avg. loss: 3256164996.847992
Total training time: 0.00 seconds.
-- Epoch 4
Norm: 29790.92, NNZs: 237, Bias: 5387.122864, T: 4672, Avg. loss: 2748024562.801862
Total training time: 0.00 seconds.
-- Epoch 5
Norm: 31110.24, NNZs: 237, Bias: 5599.319199, T: 5840, Avg. loss: 2521298233.062831
Total training time: 0.01 seconds.
-- Epoch 6
Norm: 32008.19, NNZs: 237, Bias: 5732.619949, T: 7008, Avg. loss: 2399617267.204082
Total training time: 0.01 seconds.
-- Epoch 7
Norm: 32643.21, NNZs: 237, Bias: 5816.436576, T: 8176, Avg. loss: 2322421923.842938
Total training time: 0.01 seconds.
-- Epoch 8
Norm: 33129.11, NNZs: 237, Bias: 5872.074657, T: 9344, Avg. loss: 2265546401.7

Norm: 52679.58, NNZs: 237, Bias: 6356.966084, T: 216080, Avg. loss: 1074745621.662205
Total training time: 0.19 seconds.
-- Epoch 186
Norm: 52740.32, NNZs: 237, Bias: 6359.321319, T: 217248, Avg. loss: 1073086204.709345
Total training time: 0.19 seconds.
-- Epoch 187
Norm: 52801.79, NNZs: 237, Bias: 6361.935298, T: 218416, Avg. loss: 1071434715.450821
Total training time: 0.19 seconds.
-- Epoch 188
Norm: 52862.74, NNZs: 237, Bias: 6364.463139, T: 219584, Avg. loss: 1069795263.656541
Total training time: 0.20 seconds.
-- Epoch 189
Norm: 52923.49, NNZs: 237, Bias: 6366.994055, T: 220752, Avg. loss: 1068163335.332404
Total training time: 0.20 seconds.
-- Epoch 190
Norm: 52983.06, NNZs: 237, Bias: 6369.222240, T: 221920, Avg. loss: 1066541410.818708
Total training time: 0.20 seconds.
-- Epoch 191
Norm: 53042.99, NNZs: 237, Bias: 6371.606420, T: 223088, Avg. loss: 1064928478.829007
Total training time: 0.20 seconds.
-- Epoch 192
Norm: 53100.67, NNZs: 237, Bias: 6373.371430, T: 224256, Avg. 

Total training time: 0.37 seconds.
-- Epoch 356
Norm: 61181.91, NNZs: 237, Bias: 6712.119604, T: 415808, Avg. loss: 880221505.371742
Total training time: 0.38 seconds.
-- Epoch 357
Norm: 61223.26, NNZs: 237, Bias: 6713.737552, T: 416976, Avg. loss: 879433886.677171
Total training time: 0.38 seconds.
-- Epoch 358
Norm: 61265.06, NNZs: 237, Bias: 6715.522913, T: 418144, Avg. loss: 878647601.695009
Total training time: 0.38 seconds.
-- Epoch 359
Norm: 61306.38, NNZs: 237, Bias: 6717.170086, T: 419312, Avg. loss: 877865357.464944
Total training time: 0.38 seconds.
-- Epoch 360
Norm: 61347.92, NNZs: 237, Bias: 6718.911950, T: 420480, Avg. loss: 877085638.764324
Total training time: 0.38 seconds.
-- Epoch 361
Norm: 61388.28, NNZs: 237, Bias: 6720.267486, T: 421648, Avg. loss: 876307044.710392
Total training time: 0.38 seconds.
-- Epoch 362
Norm: 61429.39, NNZs: 237, Bias: 6721.900148, T: 422816, Avg. loss: 875532229.323510
Total training time: 0.38 seconds.
-- Epoch 363
Norm: 61470.55, NNZs:

Norm: 67921.73, NNZs: 237, Bias: 6964.014873, T: 634224, Avg. loss: 769059539.845715
Total training time: 0.57 seconds.
-- Epoch 544
Norm: 67952.46, NNZs: 237, Bias: 6964.845831, T: 635392, Avg. loss: 768611418.265096
Total training time: 0.57 seconds.
-- Epoch 545
Norm: 67984.35, NNZs: 237, Bias: 6966.118749, T: 636560, Avg. loss: 768165527.714896
Total training time: 0.57 seconds.
-- Epoch 546
Norm: 68015.97, NNZs: 237, Bias: 6967.313443, T: 637728, Avg. loss: 767721020.572313
Total training time: 0.57 seconds.
-- Epoch 547
Norm: 68047.64, NNZs: 237, Bias: 6968.528681, T: 638896, Avg. loss: 767277616.842164
Total training time: 0.57 seconds.
-- Epoch 548
Norm: 68078.43, NNZs: 237, Bias: 6969.427089, T: 640064, Avg. loss: 766835976.370241
Total training time: 0.57 seconds.
-- Epoch 549
Norm: 68109.84, NNZs: 237, Bias: 6970.569348, T: 641232, Avg. loss: 766394784.530152
Total training time: 0.57 seconds.
-- Epoch 550
Norm: 68141.56, NNZs: 237, Bias: 6971.844631, T: 642400, Avg. loss: 7

Norm: 72966.25, NNZs: 237, Bias: 7119.418717, T: 844464, Avg. loss: 703925681.209255
Total training time: 0.76 seconds.
-- Epoch 724
Norm: 72991.04, NNZs: 237, Bias: 7120.000033, T: 845632, Avg. loss: 703633501.129303
Total training time: 0.76 seconds.
-- Epoch 725
Norm: 73016.45, NNZs: 237, Bias: 7120.846069, T: 846800, Avg. loss: 703340537.709431
Total training time: 0.76 seconds.
-- Epoch 726
Norm: 73041.18, NNZs: 237, Bias: 7121.430002, T: 847968, Avg. loss: 703050414.006841
Total training time: 0.76 seconds.
-- Epoch 727
Norm: 73065.97, NNZs: 237, Bias: 7122.040687, T: 849136, Avg. loss: 702759629.702506
Total training time: 0.76 seconds.
-- Epoch 728
Norm: 73090.48, NNZs: 237, Bias: 7122.551866, T: 850304, Avg. loss: 702468975.665751
Total training time: 0.76 seconds.
-- Epoch 729
Norm: 73115.58, NNZs: 237, Bias: 7123.314833, T: 851472, Avg. loss: 702180846.726032
Total training time: 0.77 seconds.
-- Epoch 730
Norm: 73140.11, NNZs: 237, Bias: 7123.853137, T: 852640, Avg. loss: 7

Norm: 76784.91, NNZs: 237, Bias: 7213.660169, T: 1039520, Avg. loss: 662340090.194960
Total training time: 0.94 seconds.
-- Epoch 891
Norm: 76805.34, NNZs: 237, Bias: 7213.928635, T: 1040688, Avg. loss: 662126676.675914
Total training time: 0.94 seconds.
-- Epoch 892
Norm: 76826.05, NNZs: 237, Bias: 7214.320473, T: 1041856, Avg. loss: 661917293.672221
Total training time: 0.94 seconds.
-- Epoch 893
Norm: 76846.61, NNZs: 237, Bias: 7214.656306, T: 1043024, Avg. loss: 661705215.691316
Total training time: 0.95 seconds.
-- Epoch 894
Norm: 76867.44, NNZs: 237, Bias: 7215.114280, T: 1044192, Avg. loss: 661495253.637167
Total training time: 0.95 seconds.
-- Epoch 895
Norm: 76888.34, NNZs: 237, Bias: 7215.608360, T: 1045360, Avg. loss: 661284312.496006
Total training time: 0.95 seconds.
-- Epoch 896
Norm: 76909.19, NNZs: 237, Bias: 7216.093217, T: 1046528, Avg. loss: 661074566.030600
Total training time: 0.95 seconds.
-- Epoch 897
Norm: 76930.18, NNZs: 237, Bias: 7216.643892, T: 1047696, Avg.

SGDRegressor(eta0=0.0001, verbose=3, warm_start=True)

# Random forest Regressor

In [27]:
from sklearn.ensemble import RandomForestRegressor

rf=RandomForestRegressor(n_estimators=20,random_state=42,max_depth=7,max_leaf_nodes=15)
rf.fit(x_train_final,y_train)

rf_pred=rf.predict(x_test_final)
r2_score(y_test,rf_pred)

0.8319760460614782

# HyperparametersTunning for RandomForestRegressor

In [28]:
#imports
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

#defining hyperparameters
hyp_dict={'max_depth':[8,10,12,15,18],
         'min_samples_split':[8,10,14],
         'max_leaf_nodes':[8,9,10,12]
         }

hyp_dict1={'max_depth':[8,10,12,15,18],
         'min_samples_split':[8,10,14],
         'max_leaf_nodes':[8,9,10,12]
         }


gcv=GridSearchCV(estimator=dtr,param_grid=hyp_dict,cv=10)
rcv=RandomizedSearchCV(estimator=dtr,param_distributions=hyp_dict1,cv=10,n_iter=20)

#fitting the model
gcv.fit(x_train_final,y_train)
rcv.fit(x_train_final,y_train)

#to get best parameters and score
gcv.best_params_
gcv.best_score_

rcv.best_params_
rcv.best_score_

#predicting on test data
dtc_gcv_pred=gcv.predict(x_test_final)
dtc_rcv_pred=rcv.predict(x_test_final)

In [29]:
#calculating r2 score for randomsearch cv
r2_score(y_test,dtc_rcv_pred)

0.7779549268414158

In [30]:
#calculating r2 score for gridsearch cv 
r2_score(y_test,dtc_gcv_pred)

0.7779549268414158

# GradientBoostRegressor model

In [31]:
from sklearn.ensemble import GradientBoostingRegressor

Gbr=GradientBoostingRegressor(loss='ls', learning_rate=0.1, n_estimators=20,max_depth=7,max_leaf_nodes=15,
                               warm_start=True,verbose=3,)
Gbr.fit(x_train_final,y_train)


      Iter       Train Loss   Remaining Time 
         1  5046934080.4606            0.23s
         2  4297624486.3448            0.15s
         3  3659634116.2827            0.18s
         4  3141182021.2971            0.26s
         5  2713553197.3889            0.24s
         6  2350519491.1664            0.22s
         7  2039235940.0607            0.21s
         8  1781591242.7478            0.19s
         9  1565165868.5158            0.18s
        10  1382234534.5059            0.17s
        11  1230252334.7398            0.15s
        12  1095715099.4752            0.13s
        13   981388373.3207            0.11s
        14   884365923.5561            0.10s
        15   803606679.7401            0.08s
        16   729052282.5053            0.06s
        17   663438828.8763            0.05s
        18   605805544.6293            0.03s
        19   554759386.0500            0.02s
        20   512863376.8127            0.00s


GradientBoostingRegressor(max_depth=7, max_leaf_nodes=15, n_estimators=20,
                          verbose=3, warm_start=True)

In [32]:
gbr_gbr_pred=Gbr.predict(x_test_final)
gbr_gbr_pred


array([143782.38736684, 304700.736544  , 139865.60734218, 158809.98418345,
       304297.54154691, 112280.65230839, 206218.81026272, 160537.73368955,
       112280.65230839, 139133.01056864, 168430.12385183, 133169.91641835,
       127456.75628323, 205787.77396324, 180825.1629839 , 137082.00036627,
       192573.50713207, 142151.3643592 , 131445.97176314, 204864.54330595,
       169624.25916508, 209254.09103316, 172422.68247366, 134527.64451186,
       194663.17156466, 169029.89124096, 189438.18513756, 129259.75239906,
       177860.65622759, 191950.4459323 , 130555.71493226, 229347.9811434 ,
       169549.10069124, 123305.22574056, 239841.29491675, 143417.41365391,
       138974.44576494, 211951.97651714, 298602.96448953, 129093.55344364,
       136551.82524857, 224228.05743438, 128400.81558556, 328581.53250803,
       138317.07109186, 151748.1862147 , 127543.88047254, 131490.63521861,
       393756.02316634, 144552.49158156, 124046.37893401, 184294.63456416,
       135404.41226104, 3

# HYperparameterTunning for GradientBoostingRegressor

In [None]:
#imports

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

#defining hyperparameters
hyp_dict={'max_depth':[1,3,5,6,7,9],
         'min_samples_split':[8,10,14],
         'max_leaf_nodes':[8,9,10,12]
         }

gcv=GridSearchCV(GradientBoostingRegressor(),estimator=Gbr,param_grid=hyp_dict,cv=10,verbose=42)
rcv=RandomizedSearchCV(GradientBoostingRegressor(),estimator=Gbr,param_distributions=hyp_dict,cv=10,n_iter=20)

#fitting the model
gcv.fit(x_train_final,y_train)
rcv.fit(x_train_final,y_train)

#to get best parameters and score
gcv.best_params_
gcv.best_score_

rcv.best_params_
rcv.best_score_

#predicting on test data
dtc_gcv_pred=gcv.predict(x_test_final)
dtc_rcv_pred=rcv.predict(x_test_final)

In [None]:
r2_score(y_test,dtc_gcv_pred)

# AdaBoostingRegressor Model

In [37]:
#imports
from sklearn.ensemble import AdaBoostRegressor




#instantiate ,fit  AdaBoostingModel,and make predictions
Abr=AdaBoostRegressor(base_estimator=None, n_estimators=50, learning_rate=1.0, loss='linear', random_state=42)

Abr.fit(x_train_final,y_train)

Abr_pred=Abr.predict(x_test_final)



In [41]:
r2_score(y_test,Abr_pred)


0.82958316789958

In [39]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

#define the hyperparameters we want to tune

param_grid={'n_estimators':[50,60,70],'learning_rate':[0.001,0.01,0.1]}
param_grid1={'n_estimators':[50,60,80,100,200],'learning_rate':[0.001,0.01,0.1,0.2,0.5]}


#instantiate ,fit GridSearchCV,RandomizedSearchCV and make predictions

gs_ab=GridSearchCV(AdaBoostRegressor(),param_grid=param_grid)
rs_gb=RandomizedSearchCV(AdaBoostRegressor(),param_distributions=param_grid1)

gs_ab.fit(x_train_final,y_train)
rs_gb.fit(x_train_final,y_train)

y_predict=gs_ab.predict(x_test_final)
x_predict=rs_gb.predict(x_test_final)

