In [14]:
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV,StratifiedKFold
from sklearn.preprocessing import OneHotEncoder,StandardScaler,MinMaxScaler
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.compose import make_column_selector,make_column_transformer

In [2]:
comp_bankruptcy=pd.read_csv("data.csv")
comp_bankruptcy.head()

Unnamed: 0,Bankrupt?,ROA(C) before interest and depreciation before interest,ROA(A) before interest and % after tax,ROA(B) before interest and depreciation after tax,Operating Gross Margin,Realized Sales Gross Margin,Operating Profit Rate,Pre-tax net Interest Rate,After-tax net Interest Rate,Non-industry income and expenditure/revenue,...,Net Income to Total Assets,Total assets to GNP price,No-credit Interval,Gross Profit to Sales,Net Income to Stockholder's Equity,Liability to Equity,Degree of Financial Leverage (DFL),Interest Coverage Ratio (Interest expense to EBIT),Net Income Flag,Equity to Liability
0,1,0.370594,0.424389,0.40575,0.601457,0.601457,0.998969,0.796887,0.808809,0.302646,...,0.716845,0.009219,0.622879,0.601453,0.82789,0.290202,0.026601,0.56405,1,0.016469
1,1,0.464291,0.538214,0.51673,0.610235,0.610235,0.998946,0.79738,0.809301,0.303556,...,0.795297,0.008323,0.623652,0.610237,0.839969,0.283846,0.264577,0.570175,1,0.020794
2,1,0.426071,0.499019,0.472295,0.60145,0.601364,0.998857,0.796403,0.808388,0.302035,...,0.77467,0.040003,0.623841,0.601449,0.836774,0.290189,0.026555,0.563706,1,0.016474
3,1,0.399844,0.451265,0.457733,0.583541,0.583541,0.9987,0.796967,0.808966,0.30335,...,0.739555,0.003252,0.622929,0.583538,0.834697,0.281721,0.026697,0.564663,1,0.023982
4,1,0.465022,0.538432,0.522298,0.598783,0.598783,0.998973,0.797366,0.809304,0.303475,...,0.795016,0.003878,0.623521,0.598782,0.839973,0.278514,0.024752,0.575617,1,0.03549


In [4]:
X=comp_bankruptcy.drop('Bankrupt?',axis=1)
y=comp_bankruptcy['Bankrupt?']

In [23]:
prcomp=PCA()
scaler=MinMaxScaler()
svm=SVC(kernel='linear')
pipe_pca=Pipeline([('SCL',scaler),('PCA',prcomp),('SVM',svm)])

In [24]:
kfold=StratifiedKFold(n_splits=5,random_state=23,shuffle=True)
params={'PCA__n_components':[0.75,0.8,0.85,0.9],'SVM__C':np.linspace(0.001,3,5)}

In [26]:
gcv=GridSearchCV(pipe_pca,param_grid=params,cv=kfold,verbose=3)
gcv.fit(X,y)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV 1/5] END PCA__n_components=0.75, SVM__C=0.001;, score=0.968 total time=   0.2s
[CV 2/5] END PCA__n_components=0.75, SVM__C=0.001;, score=0.967 total time=   0.2s
[CV 3/5] END PCA__n_components=0.75, SVM__C=0.001;, score=0.968 total time=   0.3s
[CV 4/5] END PCA__n_components=0.75, SVM__C=0.001;, score=0.968 total time=   0.3s
[CV 5/5] END PCA__n_components=0.75, SVM__C=0.001;, score=0.968 total time=   0.2s
[CV 1/5] END PCA__n_components=0.75, SVM__C=0.75075;, score=0.968 total time=   0.2s
[CV 2/5] END PCA__n_components=0.75, SVM__C=0.75075;, score=0.968 total time=   0.3s
[CV 3/5] END PCA__n_components=0.75, SVM__C=0.75075;, score=0.968 total time=   0.3s
[CV 4/5] END PCA__n_components=0.75, SVM__C=0.75075;, score=0.968 total time=   0.3s
[CV 5/5] END PCA__n_components=0.75, SVM__C=0.75075;, score=0.968 total time=   0.2s
[CV 1/5] END PCA__n_components=0.75, SVM__C=1.5005;, score=0.968 total time=   0.3s
[CV 2/5] END P

[CV 4/5] END .PCA__n_components=0.9, SVM__C=3.0;, score=0.968 total time=   0.3s
[CV 5/5] END .PCA__n_components=0.9, SVM__C=3.0;, score=0.968 total time=   0.3s


In [27]:
print("Best Parameters: ",gcv.best_params_)
print("Best Score: ",gcv.best_score_)

Best Parameters:  {'PCA__n_components': 0.75, 'SVM__C': 0.75075}
Best Score:  0.9678838296581416
