In [7]:
import pandas as pd
import numpy as np


from sklearn.decomposition import PCA

from sklearn.metrics import accuracy_score, roc_auc_score, r2_score
from sklearn.metrics import r2_score

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.model_selection import train_test_split

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

In [8]:
banka = pd.read_csv("C:\Hogwarts\machine_learning\Cases\Bankruptcy\Bankruptcy.csv",
                   index_col = 0)

In [9]:
X = banka.drop(['D', 'YR'] , axis =1)
y = banka['D']

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    stratify = y,
                                                    random_state = 2022,
                                                    train_size = 0.7)

In [11]:
scaler = StandardScaler()

In [29]:
prcomp = PCA()
svm = SVC(probability = True, random_state = 2022, kernel = 'linear')

In [24]:
pipe_pca_svm = Pipeline([('STD', scaler),
                        ('PCA',prcomp),
                        ('SVM',svm)])

In [25]:
print(pipe_pca_svm.get_params())

{'memory': None, 'steps': [('STD', StandardScaler()), ('PCA', PCA(random_state=2022)), ('SVM', SVC(kernel='linear', probability=True, random_state=2022))], 'verbose': False, 'STD': StandardScaler(), 'PCA': PCA(random_state=2022), 'SVM': SVC(kernel='linear', probability=True, random_state=2022), 'STD__copy': True, 'STD__with_mean': True, 'STD__with_std': True, 'PCA__copy': True, 'PCA__iterated_power': 'auto', 'PCA__n_components': None, 'PCA__n_oversamples': 10, 'PCA__power_iteration_normalizer': 'auto', 'PCA__random_state': 2022, 'PCA__svd_solver': 'auto', 'PCA__tol': 0.0, 'PCA__whiten': False, 'SVM__C': 1.0, 'SVM__break_ties': False, 'SVM__cache_size': 200, 'SVM__class_weight': None, 'SVM__coef0': 0.0, 'SVM__decision_function_shape': 'ovr', 'SVM__degree': 3, 'SVM__gamma': 'scale', 'SVM__kernel': 'linear', 'SVM__max_iter': -1, 'SVM__probability': True, 'SVM__random_state': 2022, 'SVM__shrinking': True, 'SVM__tol': 0.001, 'SVM__verbose': False}


In [26]:
params = {'PCA__n_components':[0.75,0.8,0.85,0.9,0.95],
          'SVM__C':[0.4,1,2,2.5]}
          

In [27]:
kfold = StratifiedKFold(n_splits = 5,
                       shuffle = True,
                       random_state = 2022)

In [31]:
gcv = GridSearchCV(pipe_pca_svm,
                  param_grid = params,
                  cv = kfold,
                  verbose = 3,
                  scoring = "roc_auc")
 
gcv.fit(X, y)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV 1/5] END PCA__n_components=0.75, SVM__C=0.4;, score=0.863 total time=   0.0s
[CV 2/5] END PCA__n_components=0.75, SVM__C=0.4;, score=0.835 total time=   0.0s
[CV 3/5] END PCA__n_components=0.75, SVM__C=0.4;, score=0.734 total time=   0.0s
[CV 4/5] END PCA__n_components=0.75, SVM__C=0.4;, score=0.970 total time=   0.0s
[CV 5/5] END PCA__n_components=0.75, SVM__C=0.4;, score=0.935 total time=   0.0s
[CV 1/5] END ..PCA__n_components=0.75, SVM__C=1;, score=0.868 total time=   0.0s
[CV 2/5] END ..PCA__n_components=0.75, SVM__C=1;, score=0.846 total time=   0.0s
[CV 3/5] END ..PCA__n_components=0.75, SVM__C=1;, score=0.722 total time=   0.0s
[CV 4/5] END ..PCA__n_components=0.75, SVM__C=1;, score=0.982 total time=   0.0s
[CV 5/5] END ..PCA__n_components=0.75, SVM__C=1;, score=0.929 total time=   0.0s
[CV 1/5] END ..PCA__n_components=0.75, SVM__C=2;, score=0.868 total time=   0.0s
[CV 2/5] END ..PCA__n_components=0.75, SVM__C=2

In [32]:
print(gcv.best_params_)

{'PCA__n_components': 0.75, 'SVM__C': 1}


In [33]:
print(gcv.best_score_)

0.869484361792054
