# 1. Voting

## Voting Classification

In [17]:
import pandas as pd
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.ensemble import VotingClassifier
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, roc_auc_score
import os
import numpy as np
import warnings
warnings.filterwarnings('ignore')

#### Kyphosis Dataset

In [3]:
os.chdir(r"C:\CDAC\6_Practical_Machine_Learning\Cases\Kyphosis")

In [4]:
kyph = pd.read_csv("Kyphosis.csv")
dum_kyph = pd.get_dummies(kyph, drop_first=True)
X = dum_kyph.drop('Kyphosis_present', axis=1)
y = dum_kyph['Kyphosis_present']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y,
                                                    random_state=2023,
                                                    test_size=0.3)

In [6]:
svm = SVC(probability=True, kernel='linear', random_state=2023 )
nb = GaussianNB()
lda = LinearDiscriminantAnalysis()

voting = VotingClassifier([('SVM',svm),('NB',nb),('LDA',lda)], voting='soft')

In [7]:
voting.fit(X_train, y_train)
y_pred = voting.predict(X_test)
print(accuracy_score(y_test, y_pred))
y_pred_prob = voting.predict_proba(X_test)[:,1]
print(roc_auc_score(y_test, y_pred_prob))

0.76
0.75


In [8]:
##################### K-Folds CV $$$$$$$$$$$$$$$$$$$$$$$$$$$$
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2023)
results = cross_val_score(voting, X, y, scoring='roc_auc',
                          cv=kfold)
print(results.mean())

0.8400641025641026


In [9]:
################## Grid Search CV ###########################
print(voting.get_params())
params = {'SVM__C': [0.01, 0.1, 0.5, 1, 1.5, 2]}
gcv = GridSearchCV(voting, param_grid=params, 
                   cv=kfold, scoring='roc_auc')
gcv.fit(X, y)
print(gcv.best_params_)
print(gcv.best_score_)

{'estimators': [('SVM', SVC(kernel='linear', probability=True, random_state=2023)), ('NB', GaussianNB()), ('LDA', LinearDiscriminantAnalysis())], 'flatten_transform': True, 'n_jobs': None, 'verbose': False, 'voting': 'soft', 'weights': None, 'SVM': SVC(kernel='linear', probability=True, random_state=2023), 'NB': GaussianNB(), 'LDA': LinearDiscriminantAnalysis(), 'SVM__C': 1.0, 'SVM__break_ties': False, 'SVM__cache_size': 200, 'SVM__class_weight': None, 'SVM__coef0': 0.0, 'SVM__decision_function_shape': 'ovr', 'SVM__degree': 3, 'SVM__gamma': 'scale', 'SVM__kernel': 'linear', 'SVM__max_iter': -1, 'SVM__probability': True, 'SVM__random_state': 2023, 'SVM__shrinking': True, 'SVM__tol': 0.001, 'SVM__verbose': False, 'NB__priors': None, 'NB__var_smoothing': 1e-09, 'LDA__covariance_estimator': None, 'LDA__n_components': None, 'LDA__priors': None, 'LDA__shrinkage': None, 'LDA__solver': 'svd', 'LDA__store_covariance': False, 'LDA__tol': 0.0001}
{'SVM__C': 0.01}
0.8400641025641026


#### Bankruptcy Dataset

In [11]:
os.chdir(r"C:\CDAC\6_Practical_Machine_Learning\Cases\Bankruptcy")

In [12]:
brupt = pd.read_csv("Bankruptcy.csv")
X = brupt.drop(['NO', 'D'], axis=1)
y = brupt['D']

In [13]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

In [14]:
nb = GaussianNB()
dtc = DecisionTreeClassifier(random_state=2023)
lr = LogisticRegression()
voting = VotingClassifier([('NB',nb),('DT',dtc),('LR',lr)], voting='soft')

In [15]:
print(voting.get_params())


{'estimators': [('NB', GaussianNB()), ('DT', DecisionTreeClassifier(random_state=2023)), ('LR', LogisticRegression())], 'flatten_transform': True, 'n_jobs': None, 'verbose': False, 'voting': 'soft', 'weights': None, 'NB': GaussianNB(), 'DT': DecisionTreeClassifier(random_state=2023), 'LR': LogisticRegression(), 'NB__priors': None, 'NB__var_smoothing': 1e-09, 'DT__ccp_alpha': 0.0, 'DT__class_weight': None, 'DT__criterion': 'gini', 'DT__max_depth': None, 'DT__max_features': None, 'DT__max_leaf_nodes': None, 'DT__min_impurity_decrease': 0.0, 'DT__min_samples_leaf': 1, 'DT__min_samples_split': 2, 'DT__min_weight_fraction_leaf': 0.0, 'DT__random_state': 2023, 'DT__splitter': 'best', 'LR__C': 1.0, 'LR__class_weight': None, 'LR__dual': False, 'LR__fit_intercept': True, 'LR__intercept_scaling': 1, 'LR__l1_ratio': None, 'LR__max_iter': 100, 'LR__multi_class': 'auto', 'LR__n_jobs': None, 'LR__penalty': 'l2', 'LR__random_state': None, 'LR__solver': 'lbfgs', 'LR__tol': 0.0001, 'LR__verbose': 0, 'L

In [18]:
params = {'DT__max_depth':[None, 3, 4, 5],
          'DT__min_samples_split':[2, 4, 6, 8, 10],
          'DT__min_samples_leaf':[1, 3, 5, 9, 11],
          'LR__penalty':['l1','l2','elasticnet',None]}
gcv = GridSearchCV(voting, param_grid=params,
                   scoring='roc_auc',n_jobs=-1,
                   cv=kfold, verbose=3)
gcv.fit(X, y)
pd_cv = pd.DataFrame( gcv.cv_results_ )
print(gcv.best_params_)
print(gcv.best_score_)

Fitting 5 folds for each of 400 candidates, totalling 2000 fits
{'DT__max_depth': 4, 'DT__min_samples_leaf': 1, 'DT__min_samples_split': 2, 'LR__penalty': None}
0.8964497041420119


## Voting Regression

In [19]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import VotingRegressor
from sklearn.metrics import r2_score
import os
import numpy as np

In [20]:
os.chdir(r"C:\CDAC\6_Practical_Machine_Learning\Datasets")

In [22]:
housing = pd.read_csv("Housing.csv")
dum_hous = pd.get_dummies(housing, drop_first=True)
X = dum_hous.drop('price', axis=1)
y = dum_hous['price']

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    random_state=2023,
                                                    test_size=0.3)

In [24]:
dtr = DecisionTreeRegressor(random_state=2023)
elastic = ElasticNet()
lr = LinearRegression()
voting = VotingRegressor([('DT',dtr),('ELASTIC',elastic),('LR',lr)])

In [25]:
voting.fit(X_train, y_train)
y_pred = voting.predict(X_test)
print(r2_score(y_test, y_pred))

0.6194223932792857


In [26]:
########################## K-Folds CV $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
voting = VotingRegressor([('DT',dtr),('ELASTIC',elastic),('LR',lr)])
kfold = KFold(n_splits=5, shuffle=True, random_state=2023)
results = cross_val_score(voting, X, y, cv=kfold)
print(results.mean())

0.621768009956367


In [27]:
####### with weights
results = cross_val_score(dtr, X, y, cv=kfold)
r2_dtr = results.mean()

results = cross_val_score(elastic, X, y, cv=kfold)
r2_elastic = results.mean()

results = cross_val_score(lr, X, y, cv=kfold)
r2_lr = results.mean()

voting = VotingRegressor([('DT',dtr),('ELASTIC',elastic),('LR',lr)],
                         weights=[r2_dtr, r2_elastic, r2_lr])

In [28]:
kfold = KFold(n_splits=5, shuffle=True, random_state=2023)
results = cross_val_score(voting, X, y, cv=kfold)
print(results.mean())

0.6375134919933002


In [30]:
################## Grid Search CV ####################################
voting = VotingRegressor([('DT',dtr),('ELASTIC',elastic),('LR',lr)],
                         weights=[r2_dtr, r2_elastic, r2_lr])
# print(voting.get_params())
# params = {'max_depth':[None, 3, 4, 5],
#           'min_samples_split':[2, 5, 10],
#           'min_samples_leaf':[1, 4, 10],
#   ElasticNet: alpha=np.linspace(0,10,5),
#                l1_ratio=np.linspace(0,1,5)}
params = {'DT__max_depth':[None, 3, 4, 5],
          'DT__min_samples_split':[2, 5, 10],
          'DT__min_samples_leaf':[1, 4, 10],
          'ELASTIC__alpha':np.linspace(0,10,5),
          'ELASTIC__l1_ratio':np.linspace(0,1,5)}
gcv = GridSearchCV(voting, param_grid=params, 
                   scoring='r2', cv=kfold)
gcv.fit(X, y)
print(gcv.best_params_)
print(gcv.best_score_)

{'DT__max_depth': None, 'DT__min_samples_leaf': 1, 'DT__min_samples_split': 10, 'ELASTIC__alpha': 0.0, 'ELASTIC__l1_ratio': 0.0}
0.647853638192189


# 2. Bagging