In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.tree import plot_tree
from sklearn.ensemble import VotingClassifier, VotingRegressor
from sklearn.tree import DecisionTreeRegressor 
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.model_selection import StratifiedKFold, cross_val_score, KFold
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

In [2]:
df = pd.read_csv("C:/Users/Administrator.DAI-PC2/Desktop/ML/Day 5/Concrete_Data.csv")

y = df['Strength']
X = df.drop('Strength', axis = 1)

In [3]:
#performance evaluation using regressors 
dtr = DecisionTreeRegressor(random_state=24)
lr = LinearRegression()
ridge = Ridge()
lasso = Lasso()

voting = VotingRegressor([('LR',lr), ('DTR', dtr), ('RID', ridge), ('LS', lasso)])

In [4]:
#voting 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=24)

voting.fit(X_train, y_train)
y_pred = voting.predict(X_test)
print("Voting:", r2_score(y_test, y_pred))

Voting: 0.726968009759825


In [5]:
#linear regression
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
r2_lr = r2_score(y_test, y_pred)
print("LR:", r2_lr)

LR: 0.5771752777048791


In [6]:
#decision tree
dtr.fit(X_train, y_train)
y_pred = dtr.predict(X_test)
r2_dtr = r2_score(y_test, y_pred)
print("TREE:", r2_dtr)

TREE: 0.83089474226832


In [7]:
#ridge
ridge.fit(X_train, y_train)
y_pred = ridge.predict(X_test)
r2_ri = r2_score(y_test, y_pred)
print("RIDGE:", r2_ri)

RIDGE: 0.5771749099675626


In [8]:
#lasso
lasso.fit(X_train, y_train)
y_pred = lasso.predict(X_test)
r2_ls = r2_score(y_test, y_pred)
print("LASSO:", r2_ls)

LASSO: 0.576333587787164


In [9]:
#weighted average
voting1 = VotingRegressor([('LR',lr), ('DTR', dtr), ('RID', ridge), ('LS', lasso)], weights=[r2_lr, r2_dtr, r2_ri, r2_ls])
voting1.fit(X_train, y_train)
y_pred = voting1.predict(X_test)
r2_voting = r2_score(y_test, y_pred)
print("Weighted voting",r2_voting)

Weighted voting 0.7604627780013105


In [10]:
# using GCV and declaring parameters

kfold = KFold(n_splits = 5, shuffle = True, random_state=24)

params = {'RID__alpha': np.linspace(0.001,3,5), 'LS__alpha': np.linspace(0.001,3,5),
          'DTR__max_depth': [None,3, 4, 5], 'DTR__min_samples_leaf':[1, 5, 10], 'DTR__min_samples_split': [2, 5, 10]}
gcv = GridSearchCV(voting, param_grid = params, cv = kfold, scoring = 'r2', n_jobs = -1)
gcv.fit(X, y)
print(gcv.best_score_)
print(gcv.best_params_)


0.7444419391024694
{'DTR__max_depth': None, 'DTR__min_samples_leaf': 1, 'DTR__min_samples_split': 2, 'LS__alpha': 0.001, 'RID__alpha': 0.001}


In [11]:
# using GCV and declaring parameters for weighted

params = {'RID__alpha': np.linspace(0.001,3,5), 'LS__alpha': np.linspace(0.001,3,5),
          'DTR__max_depth': [None,3, 4, 5], 'DTR__min_samples_leaf':[1, 5, 10], 'DTR__min_samples_split': [2, 5, 10]}
gcv = GridSearchCV(voting1, param_grid = params, cv = kfold, scoring = 'r2', n_jobs = -1)
gcv.fit(X, y)
print(gcv.best_score_)
print(gcv.best_params_)

0.7754886685757552
{'DTR__max_depth': None, 'DTR__min_samples_leaf': 1, 'DTR__min_samples_split': 2, 'LS__alpha': 0.001, 'RID__alpha': 0.001}


In [12]:
#uisng randomizedsearchcv
params1 = {'RID__alpha': np.linspace(0.001,3,10), 'LS__alpha': np.linspace(0.001,3,10),
          'DTR__max_depth': [None,3, 4, 5], 'DTR__min_samples_split': [2, 4, 5, 8, 10],'DTR__min_samples_leaf':[1,4, 5, 8, 10]}

rgcv = RandomizedSearchCV(voting, param_distributions=params1, cv=kfold, scoring='r2', n_jobs=-1, n_iter=20, random_state=24)

rgcv.fit(X,y)
print(rgcv.best_params_)
print(rgcv.best_score_)

{'RID__alpha': 3.0, 'LS__alpha': 2.666777777777778, 'DTR__min_samples_split': 5, 'DTR__min_samples_leaf': 1, 'DTR__max_depth': None}
0.7438333204906099


In [14]:
best = rgcv.best_estimator_
best

In [16]:
pd_rgcv = pd.DataFrame(rgcv.cv_results_)
pd_rgcv.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_RID__alpha,param_LS__alpha,param_DTR__min_samples_split,param_DTR__min_samples_leaf,param_DTR__max_depth,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.033277,0.005399,0.008962,0.00063,3.0,3.0,8,4,,"{'RID__alpha': 3.0, 'LS__alpha': 3.0, 'DTR__mi...",0.702272,0.74116,0.770905,0.724455,0.752866,0.738332,0.02355,4
1,0.018914,0.002177,0.005178,0.00429,1.667111,1.000667,2,5,5.0,"{'RID__alpha': 1.6671111111111112, 'LS__alpha'...",0.690515,0.727139,0.745833,0.70767,0.732246,0.72068,0.019429,7
2,0.014696,0.004164,0.012506,0.006253,1.000667,2.333556,10,4,3.0,"{'RID__alpha': 1.0006666666666666, 'LS__alpha'...",0.669127,0.700802,0.700303,0.667685,0.708128,0.689209,0.017216,18
3,0.018748,0.006249,0.006249,0.007654,3.0,1.333889,5,1,4.0,"{'RID__alpha': 3.0, 'LS__alpha': 1.33388888888...",0.678444,0.718606,0.733585,0.690685,0.721155,0.708495,0.020554,13
4,0.021872,0.007653,0.009373,0.007653,0.001,0.001,2,8,4.0,"{'RID__alpha': 0.001, 'LS__alpha': 0.001, 'DTR...",0.678621,0.718317,0.732563,0.690987,0.721308,0.708359,0.020193,14
