In [1]:
import pandas as pd
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import os
import numpy as np
from sklearn.pipeline import Pipeline 
from sklearn.preprocessing import StandardScaler, MinMaxScaler

#### Bankruptcy Dataset

In [3]:
os.chdir(r"C:\CDAC\6_Practical_Machine_Learning\Cases\Bankruptcy")

In [4]:
brupt = pd.read_csv("Bankruptcy.csv")
X = brupt.drop(['NO', 'D', 'YR'], axis=1)
y = brupt['D']

In [5]:
svm = SVC(kernel='linear', C=2)
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2023)
results = cross_val_score(svm, X, y, cv=kfold, scoring='roc_auc')
print(results.mean())

0.8840236686390532


In [6]:
## w/o Pipeline
params = {'C':np.linspace(0.1, 10, 20)}
svm = SVC(kernel='linear')
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2023)
gcv = GridSearchCV(svm, param_grid=params, cv=kfold, scoring='roc_auc')
gcv.fit(X, y)
print(gcv.best_params_)
print(gcv.best_score_)

{'C': 0.6210526315789474}
0.8840236686390532


In [7]:
## with Pipeline
scaler = MinMaxScaler()
svm = SVC(kernel='linear')
pipe_l = Pipeline([('SCL', scaler),('SVM', svm)])
params = {'SVM__C':np.linspace(0.1, 10, 20)}
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2023)
gcv = GridSearchCV(pipe_l, param_grid=params, cv=kfold, scoring='roc_auc')
gcv.fit(X, y)
print(gcv.best_params_)
print(gcv.best_score_)

{'SVM__C': 0.6210526315789474}
0.8846999154691462


In [8]:
######################### Polynomial #####################################
scaler = MinMaxScaler()
svm = SVC(kernel='poly')
pipe_l = Pipeline([('SCL', scaler),('SVM', svm)])
params = {'SVM__C':np.linspace(0.1, 10, 20),
          'SVM__degree':[2,3,4],
          'SVM__coef0':np.linspace(0, 10, 20)}
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2023)
gcv = GridSearchCV(pipe_l, param_grid=params, cv=kfold, scoring='roc_auc')
gcv.fit(X, y)
print(gcv.best_params_)
print(gcv.best_score_)

{'SVM__C': 0.1, 'SVM__coef0': 1.5789473684210527, 'SVM__degree': 2}
0.8917159763313609


In [9]:
######################### Radial ##########################################
scaler = MinMaxScaler()
svm = SVC(kernel='rbf')
pipe_l = Pipeline([('SCL', scaler),('SVM', svm)])
params = {'SVM__C':np.linspace(0.1, 10, 20),
          'SVM__gamma':np.linspace(0, 10, 20)}
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2023)
gcv = GridSearchCV(pipe_l, param_grid=params, cv=kfold, scoring='roc_auc')
gcv.fit(X, y)
print(gcv.best_params_)
print(gcv.best_score_)

{'SVM__C': 0.1, 'SVM__gamma': 1.5789473684210527}
0.8693153000845311


#### Kyphosis Dataset

In [10]:
os.chdir(r"C:\CDAC\6_Practical_Machine_Learning\Cases\Kyphosis")

In [11]:
kyphosis = pd.read_csv("Kyphosis.csv")
dum_kyp = pd.get_dummies(kyphosis, drop_first=True)

In [12]:
X = dum_kyp.drop('Kyphosis_present', axis=1)
y = dum_kyp['Kyphosis_present']

In [13]:
scaler = MinMaxScaler()
svm = SVC(kernel='linear')
pipe_l = Pipeline([('SCL', scaler),('SVM', svm)])
params = {'SVM__C':np.linspace(0.1, 10, 20)}
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2023)
gcv = GridSearchCV(pipe_l, param_grid=params, cv=kfold, scoring='roc_auc')
gcv.fit(X, y)
print(gcv.best_params_)
print(gcv.best_score_)

{'SVM__C': 10.0}
0.8221153846153847


In [14]:
######################### Polynomial #####################################
scaler = MinMaxScaler()
svm = SVC(kernel='poly')
pipe_l = Pipeline([('SCL', scaler),('SVM', svm)])
params = {'SVM__C':np.linspace(0.1, 10, 20),
          'SVM__degree':[2,3,4],
          'SVM__coef0':np.linspace(0, 10, 20)}
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2023)
gcv = GridSearchCV(pipe_l, param_grid=params, cv=kfold, scoring='roc_auc')
gcv.fit(X, y)
print(gcv.best_params_)
print(gcv.best_score_)

{'SVM__C': 0.6210526315789474, 'SVM__coef0': 4.2105263157894735, 'SVM__degree': 2}
0.871474358974359


In [15]:
######################### Radial ##########################################
scaler = MinMaxScaler()
svm = SVC(kernel='rbf')
pipe_l = Pipeline([('SCL', scaler),('SVM', svm)])
params = {'SVM__C':np.linspace(0.1, 10, 20),
          'SVM__gamma':np.linspace(0, 10, 20)}
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2023)
gcv = GridSearchCV(pipe_l, param_grid=params, cv=kfold, scoring='roc_auc')
gcv.fit(X, y)
print(gcv.best_params_)
print(gcv.best_score_)

{'SVM__C': 6.352631578947369, 'SVM__gamma': 0.5263157894736842}
0.8772435897435896


#### Wisconsin Dataset

In [16]:
os.chdir(r"C:\CDAC\6_Practical_Machine_Learning\Cases\Wisconsin")

In [17]:
b_cancer = pd.read_csv("BreastCancer.csv", index_col=0)
dum_canc = pd.get_dummies(b_cancer, drop_first=True)

In [18]:
X = dum_canc.drop('Class_Malignant', axis=1)
y = dum_canc['Class_Malignant']

In [19]:
######################### Linear ################################
scaler = MinMaxScaler()
svm = SVC(kernel='linear')
pipe_l = Pipeline([('SCL', scaler),('SVM', svm)])
params = {'SVM__C':np.linspace(0.1, 10, 5)}
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2023)
gcv = GridSearchCV(pipe_l, param_grid=params, cv=kfold, scoring='roc_auc')
gcv.fit(X, y)
print(gcv.best_params_)
print(gcv.best_score_)

{'SVM__C': 0.1}
0.9951600593653611
