In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold, cross_val_score, KFold
from sklearn.metrics import r2_score
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

In [8]:
df = pd.read_csv("C:/Users/Administrator.DAI-PC2/Desktop/ML/Day1/Bankruptcy.csv")

y = df["D"]
X = df.drop(["D","NO"], axis = 1)

svc = SVC(C= 0.1, kernel = 'linear', probability = True, random_state=24)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=24, stratify=y)

kfold = StratifiedKFold(n_splits = 5, shuffle = True, random_state=24)

svc.fit(X_train, y_train)

y_pred = svc.predict(X_test)
print(accuracy_score(y_test, y_pred))

y_pred_prob = svc.predict_proba(X_test)
print(log_loss(y_test, y_pred_prob))

0.75
0.529799204969636


In [9]:
# SVC with scaling using pipeline, kernel = 'linear'

std_scaler = StandardScaler()
std_mm = MinMaxScaler()
pipe = Pipeline([('SCL', None), ('SVC',svc)])
params = {'SVC__C': np.linspace(0.001, 5, 20), 'SCL':[std_scaler, std_mm, None]}
gcv = GridSearchCV(pipe, param_grid = params, cv = kfold, scoring = 'neg_log_loss')
gcv.fit(X, y)
print(gcv.best_score_)
print(gcv.best_params_)

-0.46079507089206617
{'SCL': None, 'SVC__C': 0.26410526315789473}


In [10]:
#kernel = poly

svc1 = SVC(C= 0.1, kernel = 'poly', probability = True, random_state=24)

pipe = Pipeline([('SCL', None), ('SVC',svc1)])
params = {'SVC__C': np.linspace(0.001, 5, 20), 'SCL':[std_scaler, std_mm, None], 'SVC__degree' : [2,3], 'SVC__coef0': np.linspace(0, 3, 5)}
gcv_poly = GridSearchCV(pipe, param_grid = params, cv = kfold, scoring = 'neg_log_loss', verbose = 2)
gcv_poly.fit(X, y)
print(gcv_poly.best_score_)
print(gcv_poly.best_params_)

Fitting 5 folds for each of 600 candidates, totalling 3000 fits
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__coef0=0.0, SVC__degree=2; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__coef0=0.0, SVC__degree=2; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__coef0=0.0, SVC__degree=2; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__coef0=0.0, SVC__degree=2; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__coef0=0.0, SVC__degree=2; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__coef0=0.0, SVC__degree=3; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__coef0=0.0, SVC__degree=3; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__coef0=0.0, SVC__degree=3; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__coef0=0.0, SVC__degree=3; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__coef0=0.0, SVC__degree=3; total

In [11]:
#kernel = rbf

svc2 = SVC(C= 0.1, kernel = 'rbf', probability = True, random_state=24)

pipe = Pipeline([('SCL', None), ('SVC',svc2)])
params = {'SVC__C': np.linspace(0.001, 5, 20), 'SCL':[std_scaler, std_mm, None], 'SVC__degree' : [2,3], 'SVC__gamma': np.linspace(0.001, 5, 5)}
gcv_rbf = GridSearchCV(pipe, param_grid = params, cv = kfold, scoring = 'neg_log_loss', verbose = 2)
gcv_rbf.fit(X, y)
print(gcv_rbf.best_score_)
print(gcv_rbf.best_params_)

Fitting 5 folds for each of 600 candidates, totalling 3000 fits
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__degree=2, SVC__gamma=0.001; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__degree=2, SVC__gamma=0.001; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__degree=2, SVC__gamma=0.001; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__degree=2, SVC__gamma=0.001; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__degree=2, SVC__gamma=0.001; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__degree=2, SVC__gamma=1.2507499999999998; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__degree=2, SVC__gamma=1.2507499999999998; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__degree=2, SVC__gamma=1.2507499999999998; total time=   0.0s
[CV] END SCL=StandardScaler(), SVC__C=0.001, SVC__degree=2, SVC__gamma=1.2507499999999998; total time=   0.0s
[CV] END SC