In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold, cross_val_score, KFold
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.tree import plot_tree
from sklearn.ensemble import VotingClassifier
from sklearn.tree import DecisionTreeClassifier 

In [2]:
df = pd.read_csv("C:/Users/Administrator.DAI-PC2/Desktop/ML/Day1/Sonar.csv")
le = LabelEncoder()
y = le.fit_transform(df["Class"])
X = df.drop("Class", axis = 1)
print(le.classes_)

['M' 'R']


In [3]:
#voting classifier 
#performance evaluation using classifier
svc_l = SVC(kernel = 'linear', probability = True, random_state=24)
std_scaler = StandardScaler()
pipe_l = Pipeline([('SCL', std_scaler),('SVM', svc_l)])

svc_r = SVC(kernel = 'rbf', probability = True, random_state=24)
std_scaler = StandardScaler()
pipe_r = Pipeline([('SCL', std_scaler),('SVM', svc_r)])

lr = LogisticRegression()
lda =LinearDiscriminantAnalysis()
dtc = DecisionTreeClassifier(random_state=24)


voting = VotingClassifier([('LR',lr), ('SVML', pipe_l),
                          ('SVM_R', pipe_r), ('LDA', lda), ('DTC', dtc)], voting='soft')


In [4]:
# fit to voting
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=24, stratify=y)

voting.fit(X_train, y_train)
y_pred = voting.predict(X_test)
print(accuracy_score(y_test, y_pred))

0.8095238095238095


In [6]:
kfold = StratifiedKFold(n_splits = 5, shuffle = True, random_state=24)

params = {'SVML__SVM__C': np.linspace(0.001,3,5), 'LR__C': np.linspace(0.001,3,5),
          'SVM_R__SVM__C': np.linspace(0.001,3,5),'SVM_R__SVM__gamma': np.linspace(0.001,3,5) ,
          'DTC__max_depth': [None, 2,3]}
gcv = GridSearchCV(voting, param_grid = params, cv = kfold, scoring = 'neg_log_loss', n_jobs = -1)
gcv.fit(X, y)
print(gcv.best_score_)
print(gcv.best_params_)


-0.466330230792261
{'DTC__max_depth': None, 'LR__C': 3.0, 'SVML__SVM__C': 0.75075, 'SVM_R__SVM__C': 3.0, 'SVM_R__SVM__gamma': 0.001}
