In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.tree import plot_tree
from sklearn.ensemble import VotingClassifier, VotingRegressor
from sklearn.tree import DecisionTreeRegressor 
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.model_selection import StratifiedKFold, cross_val_score, KFold
from sklearn.model_selection import GridSearchCV

In [2]:
df = pd.read_csv("C:/Users/Administrator.DAI-PC2/Desktop/ML/Day 5/Concrete_Data.csv")

y = df['Strength']
X = df.drop('Strength', axis = 1)

In [3]:
#performance evaluation using regressors 
dtr = DecisionTreeRegressor(random_state=24)
lr = LinearRegression()
ridge = Ridge()
lasso = Lasso()

voting = VotingRegressor([('LR',lr), ('DTR', dtr), ('RID', ridge), ('LS', lasso)])

In [4]:
#voting 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=24)

voting.fit(X_train, y_train)
y_pred = voting.predict(X_test)
print("Voting:", r2_score(y_test, y_pred))

Voting: 0.726968009759825


In [5]:
#linear regression
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
r2_lr = r2_score(y_test, y_pred)
print("LR:", r2_lr)

LR: 0.5771752777048791


In [6]:
#decision tree
dtr.fit(X_train, y_train)
y_pred = dtr.predict(X_test)
r2_dtr = r2_score(y_test, y_pred)
print("TREE:", r2_dtr)

TREE: 0.83089474226832


In [7]:
#ridge
ridge.fit(X_train, y_train)
y_pred = ridge.predict(X_test)
r2_ri = r2_score(y_test, y_pred)
print("RIDGE:", r2_ri)

RIDGE: 0.5771749099675626


In [8]:
#lasso
lasso.fit(X_train, y_train)
y_pred = lasso.predict(X_test)
r2_ls = r2_score(y_test, y_pred)
print("LASSO:", r2_ls)

LASSO: 0.576333587787164


In [9]:
#weighted average
voting1 = VotingRegressor([('LR',lr), ('DTR', dtr), ('RID', ridge), ('LS', lasso)], weights=[r2_lr, r2_dtr, r2_ri, r2_ls])
voting1.fit(X_train, y_train)
y_pred = voting1.predict(X_test)
r2_voting = r2_score(y_test, y_pred)
print("Weighted voting",r2_voting)

Weighted voting 0.7604627780013105


In [20]:
# using GCV and declaring parameters

kfold = KFold(n_splits = 5, shuffle = True, random_state=24)

params = {'RID__alpha': np.linspace(0.001,3,5), 'LS__alpha': np.linspace(0.001,3,5),
          'DTR__max_depth': [None,3, 4, 5], 'DTR__min_samples_leaf':[1, 5, 10], 'DTR__min_samples_split': [2, 5, 10]}
gcv = GridSearchCV(voting, param_grid = params, cv = kfold, scoring = 'r2', n_jobs = -1)
gcv.fit(X, y)
print(gcv.best_score_)
print(gcv.best_params_)


0.7444419391024694
{'DTR__max_depth': None, 'DTR__min_samples_leaf': 1, 'DTR__min_samples_split': 2, 'LS__alpha': 0.001, 'RID__alpha': 0.001}


In [21]:
# using GCV and declaring parameters for weighted

params = {'RID__alpha': np.linspace(0.001,3,5), 'LS__alpha': np.linspace(0.001,3,5),
          'DTR__max_depth': [None,3, 4, 5], 'DTR__min_samples_leaf':[1, 5, 10], 'DTR__min_samples_split': [2, 5, 10]}
gcv = GridSearchCV(voting1, param_grid = params, cv = kfold, scoring = 'r2', n_jobs = -1)
gcv.fit(X, y)
print(gcv.best_score_)
print(gcv.best_params_)

0.7754886685757552
{'DTR__max_depth': None, 'DTR__min_samples_leaf': 1, 'DTR__min_samples_split': 2, 'LS__alpha': 0.001, 'RID__alpha': 0.001}


In [10]:
print(voting.get_params())

{'estimators': [('LR', LinearRegression()), ('DTR', DecisionTreeRegressor(random_state=24)), ('RID', Ridge()), ('LS', Lasso())], 'n_jobs': None, 'verbose': False, 'weights': None, 'LR': LinearRegression(), 'DTR': DecisionTreeRegressor(random_state=24), 'RID': Ridge(), 'LS': Lasso(), 'LR__copy_X': True, 'LR__fit_intercept': True, 'LR__n_jobs': None, 'LR__positive': False, 'DTR__ccp_alpha': 0.0, 'DTR__criterion': 'squared_error', 'DTR__max_depth': None, 'DTR__max_features': None, 'DTR__max_leaf_nodes': None, 'DTR__min_impurity_decrease': 0.0, 'DTR__min_samples_leaf': 1, 'DTR__min_samples_split': 2, 'DTR__min_weight_fraction_leaf': 0.0, 'DTR__monotonic_cst': None, 'DTR__random_state': 24, 'DTR__splitter': 'best', 'RID__alpha': 1.0, 'RID__copy_X': True, 'RID__fit_intercept': True, 'RID__max_iter': None, 'RID__positive': False, 'RID__random_state': None, 'RID__solver': 'auto', 'RID__tol': 0.0001, 'LS__alpha': 1.0, 'LS__copy_X': True, 'LS__fit_intercept': True, 'LS__max_iter': 1000, 'LS__pos