In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score, GridSearchCV
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import accuracy_score, log_loss, r2_score
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.svm import SVC

In [2]:
df = pd.read_csv("C:/Users/Administrator.DAI-PC2/Desktop/ML/Day1/Glass.csv")
le = LabelEncoder()
y = le.fit_transform(df["Type"])
X = df.drop("Type", axis = 1)
le.classes_

array(['building_windows_float_processed',
       'building_windows_non_float_processed', 'containers', 'headlamps',
       'tableware', 'vehicle_windows_float_processed'], dtype=object)

In [3]:
svc = SVC(C= 0.1, kernel = 'linear', probability = True, random_state=24)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=24, stratify=y)

kfold = StratifiedKFold(n_splits = 5, shuffle = True, random_state=24)

svc.fit(X_train, y_train)

y_pred = svc.predict(X_test)
print(accuracy_score(y_test, y_pred))

y_pred_prob = svc.predict_proba(X_test)
print(log_loss(y_test, y_pred_prob))

0.5230769230769231
0.9845774262957872


In [4]:
# SVC with scaling using pipeline, kernel = 'linear'

std_scaler = StandardScaler()
std_mm = MinMaxScaler()
pipe = Pipeline([('SCL', None), ('SVC',svc)])
params = {'SVC__C': np.linspace(0.001, 5, 20), 'SVC__decision_function_shape' : ['ovo', 'ovr'], 'SCL':[std_scaler, std_mm, None]}
gcv = GridSearchCV(pipe, param_grid = params, cv = kfold, scoring = 'neg_log_loss')
gcv.fit(X, y)
print(gcv.best_score_)
print(gcv.best_params_)

-0.931260449901816
{'SCL': None, 'SVC__C': 0.7903157894736842, 'SVC__decision_function_shape': 'ovo'}


In [5]:
#kernel = poly, SVC__decision_function_shape = ovo, ovr

svc1 = SVC(C= 0.1, kernel = 'poly', probability = True, random_state=24)

pipe = Pipeline([('SCL', None), ('SVC',svc1)])
params = {'SVC__C': np.linspace(0.001, 5, 20), 'SCL':[std_scaler, std_mm, None], 'SVC__degree' : [2,3], 'SVC__coef0': np.linspace(0, 3, 5), 'SVC__decision_function_shape' : ['ovo', 'ovr']}
gcv_poly = GridSearchCV(pipe, param_grid = params, cv = kfold, scoring = 'neg_log_loss', verbose = 2)
gcv_poly.fit(X, y)
print(gcv_poly.best_score_)
print(gcv_poly.best_params_)

Fitting 5 folds for each of 1200 candidates, totalling 6000 fits
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__coef0=0.0, SVC__decision_function_shape=ovo, SVC__degree=2; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__coef0=0.0, SVC__decision_function_shape=ovo, SVC__degree=2; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__coef0=0.0, SVC__decision_function_shape=ovo, SVC__degree=2; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__coef0=0.0, SVC__decision_function_shape=ovo, SVC__degree=2; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__coef0=0.0, SVC__decision_function_shape=ovo, SVC__degree=2; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__coef0=0.0, SVC__decision_function_shape=ovo, SVC__degree=3; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__coef0=0.0, SVC__decision_function_shape=ovo, SVC__degree=3; total time=   0.0s
[CV] END SCL=StandardScaler(), S

In [6]:
#kernel = rbf

svc2 = SVC(C= 0.1, kernel = 'rbf', probability = True, random_state=24)

pipe = Pipeline([('SCL', None), ('SVC',svc2)])
params = {'SVC__C': np.linspace(0.001, 5, 20), 'SCL':[std_scaler, std_mm, None], 'SVC__degree' : [2,3], 'SVC__gamma': np.linspace(0.001, 5, 5), 'SVC__decision_function_shape' : ['ovo', 'ovr']}
gcv_rbf = GridSearchCV(pipe, param_grid = params, cv = kfold, scoring = 'neg_log_loss', verbose = 2)
gcv_rbf.fit(X, y)
print(gcv_rbf.best_score_)
print(gcv_rbf.best_params_)

Fitting 5 folds for each of 1200 candidates, totalling 6000 fits
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__decision_function_shape=ovo, SVC__degree=2, SVC__gamma=0.001; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__decision_function_shape=ovo, SVC__degree=2, SVC__gamma=0.001; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__decision_function_shape=ovo, SVC__degree=2, SVC__gamma=0.001; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__decision_function_shape=ovo, SVC__degree=2, SVC__gamma=0.001; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__decision_function_shape=ovo, SVC__degree=2, SVC__gamma=0.001; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__decision_function_shape=ovo, SVC__degree=2, SVC__gamma=1.2507499999999998; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__decision_function_shape=ovo, SVC__degree=2, SVC__gamma=1.2507499999999998; total time=

In [7]:
pd_cv = pd.DataFrame(gcv.cv_results_)
pd_cv.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_SCL,param_SVC__C,param_SVC__decision_function_shape,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.021711,0.003296,0.003983,1.830707e-06,StandardScaler(),0.001,ovo,"{'SCL': StandardScaler(), 'SVC__C': 0.001, 'SV...",-0.975764,-1.101543,-1.076629,-1.013295,-1.015706,-1.036587,0.045827,115
1,0.018922,0.000891,0.003586,0.0004880524,StandardScaler(),0.001,ovr,"{'SCL': StandardScaler(), 'SVC__C': 0.001, 'SV...",-0.975764,-1.101543,-1.076629,-1.013295,-1.015706,-1.036587,0.045827,115
2,0.01932,0.000796,0.002989,1.37044e-06,StandardScaler(),0.264105,ovo,"{'SCL': StandardScaler(), 'SVC__C': 0.26410526...",-0.858117,-0.994997,-0.99026,-0.96384,-0.877114,-0.936866,0.057844,5
3,0.01952,0.000488,0.002988,6.468134e-07,StandardScaler(),0.264105,ovr,"{'SCL': StandardScaler(), 'SVC__C': 0.26410526...",-0.858117,-0.994997,-0.99026,-0.96384,-0.877114,-0.936866,0.057844,5
4,0.022797,0.003762,0.003386,0.0004880326,StandardScaler(),0.527211,ovo,"{'SCL': StandardScaler(), 'SVC__C': 0.52721052...",-0.890385,-0.985367,-1.019818,-0.965741,-0.850888,-0.94244,0.062393,13


In [8]:
pd_cv_rbf = pd.DataFrame(gcv_rbf.cv_results_)
pd_cv_rbf.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_SCL,param_SVC__C,param_SVC__decision_function_shape,param_SVC__degree,param_SVC__gamma,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.030476,0.003367,0.00478,0.000398,StandardScaler(),0.001,ovo,2,0.001,"{'SCL': StandardScaler(), 'SVC__C': 0.001, 'SV...",-1.10017,-1.204278,-1.258535,-1.054622,-1.161915,-1.155904,0.07249,1181
1,0.035852,0.003021,0.006374,0.003308,StandardScaler(),0.001,ovo,2,1.25075,"{'SCL': StandardScaler(), 'SVC__C': 0.001, 'SV...",-1.090952,-0.986803,-1.008263,-1.037847,-1.21234,-1.067241,0.080534,1001
2,0.037047,0.009156,0.005179,0.001464,StandardScaler(),0.001,ovo,2,2.5005,"{'SCL': StandardScaler(), 'SVC__C': 0.001, 'SV...",-1.178841,-1.111523,-1.131089,-1.122846,-1.32984,-1.174828,0.08083,1185
3,0.030873,0.001781,0.004581,0.000488,StandardScaler(),0.001,ovo,2,3.75025,"{'SCL': StandardScaler(), 'SVC__C': 0.001, 'SV...",-1.255958,-1.208715,-1.212581,-1.2122,-1.377784,-1.253447,0.064557,1193
4,0.030474,0.001847,0.004382,0.000488,StandardScaler(),0.001,ovo,2,5.0,"{'SCL': StandardScaler(), 'SVC__C': 0.001, 'SV...",-1.312888,-1.254172,-1.248718,-1.259876,-1.411238,-1.297379,0.061393,1197


In [9]:
pd_cv_poly = pd.DataFrame(gcv_poly.cv_results_)
pd_cv_poly.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_SCL,param_SVC__C,param_SVC__coef0,param_SVC__decision_function_shape,param_SVC__degree,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.025098,0.001464,0.004581,0.0007972958,StandardScaler(),0.001,0.0,ovo,2,"{'SCL': StandardScaler(), 'SVC__C': 0.001, 'SV...",-1.073965,-1.305335,-1.228776,-1.122554,-1.152366,-1.176599,0.081655,1197
1,0.026093,0.002702,0.004183,0.0003984213,StandardScaler(),0.001,0.0,ovo,3,"{'SCL': StandardScaler(), 'SVC__C': 0.001, 'SV...",-1.105826,-1.333956,-1.255901,-1.182749,-1.179757,-1.211638,0.077418,1199
2,0.030076,0.005988,0.00518,0.002389908,StandardScaler(),0.001,0.0,ovr,2,"{'SCL': StandardScaler(), 'SVC__C': 0.001, 'SV...",-1.073965,-1.305335,-1.228776,-1.122554,-1.152366,-1.176599,0.081655,1197
3,0.024898,0.001542,0.003983,5.519789e-07,StandardScaler(),0.001,0.0,ovr,3,"{'SCL': StandardScaler(), 'SVC__C': 0.001, 'SV...",-1.105826,-1.333956,-1.255901,-1.182749,-1.179757,-1.211638,0.077418,1199
4,0.024499,0.000795,0.003984,2.401291e-06,StandardScaler(),0.001,0.75,ovo,2,"{'SCL': StandardScaler(), 'SVC__C': 0.001, 'SV...",-0.969614,-1.116378,-1.117332,-1.017573,-1.028302,-1.04984,0.058178,785
