In [1]:
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV,StratifiedKFold
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.compose import make_column_selector,make_column_transformer

In [2]:
hr=pd.read_csv("HR_comma_sep.csv")
hr.head()

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,left,promotion_last_5years,Department,salary
0,0.38,0.53,2,157,3,0,1,0,sales,low
1,0.8,0.86,5,262,6,0,1,0,sales,medium
2,0.1,0.77,6,247,4,0,1,0,sales,low
3,0.92,0.85,5,259,5,0,1,0,sales,low
4,0.89,1.0,5,224,5,0,1,0,sales,low


In [3]:
X=hr.drop('left',axis=1)
y=hr['left']

In [4]:
ohc=OneHotEncoder(sparse_output=False,drop='first')
ct_enc=make_column_transformer((ohc,make_column_selector(dtype_include=object)),
                              ('passthrough',make_column_selector(dtype_include=['int64','float64'])),
                              verbose_feature_names_out=False).set_output(transform='pandas')

In [5]:
prcomp=PCA()
scaler=StandardScaler()
svm=SVC(kernel='linear',random_state=23)
pipe_svc=Pipeline([('TRNF',ct_enc),('SCL',scaler),('PCA',prcomp),('SVC',svm)])

In [6]:
kfold=StratifiedKFold(n_splits=5,random_state=23,shuffle=True)

In [9]:
params={'PCA__n_components':[2,3,4,5],'SVC__C':np.linspace(0.001,5,5)}

In [10]:
gcv=GridSearchCV(pipe_svc,param_grid=params,cv=kfold,verbose=3)
gcv.fit(X,y)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV 1/5] END .PCA__n_components=2, SVC__C=0.001;, score=0.762 total time=   5.6s
[CV 2/5] END .PCA__n_components=2, SVC__C=0.001;, score=0.762 total time=   5.3s
[CV 3/5] END .PCA__n_components=2, SVC__C=0.001;, score=0.762 total time=   5.3s
[CV 4/5] END .PCA__n_components=2, SVC__C=0.001;, score=0.762 total time=   5.3s
[CV 5/5] END .PCA__n_components=2, SVC__C=0.001;, score=0.762 total time=   5.0s
[CV 1/5] END PCA__n_components=2, SVC__C=1.2507499999999998;, score=0.762 total time=   6.2s
[CV 2/5] END PCA__n_components=2, SVC__C=1.2507499999999998;, score=0.762 total time=   6.3s
[CV 3/5] END PCA__n_components=2, SVC__C=1.2507499999999998;, score=0.762 total time=   6.1s
[CV 4/5] END PCA__n_components=2, SVC__C=1.2507499999999998;, score=0.762 total time=   6.2s
[CV 5/5] END PCA__n_components=2, SVC__C=1.2507499999999998;, score=0.762 total time=   5.8s
[CV 1/5] END PCA__n_components=2, SVC__C=2.5004999999999997;, score=

[CV 1/5] END ...PCA__n_components=5, SVC__C=5.0;, score=0.762 total time=  10.0s
[CV 2/5] END ...PCA__n_components=5, SVC__C=5.0;, score=0.762 total time=   9.8s
[CV 3/5] END ...PCA__n_components=5, SVC__C=5.0;, score=0.762 total time=  10.0s
[CV 4/5] END ...PCA__n_components=5, SVC__C=5.0;, score=0.762 total time=  10.6s
[CV 5/5] END ...PCA__n_components=5, SVC__C=5.0;, score=0.762 total time=  10.4s


In [11]:
print("Best Parameters: ",gcv.best_params_)
print("Best Score: ",gcv.best_score_)

Best Parameters:  {'PCA__n_components': 2, 'SVC__C': 0.001}
Best Score:  0.7621207069023007
