In [1]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import roc_auc_score,accuracy_score
from sklearn.model_selection import train_test_split,StratifiedKFold,GridSearchCV
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer 
from sklearn.compose import make_column_selector
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [2]:
hr = pd.read_csv('HR_comma_sep.csv')

In [3]:
hr.head()

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,left,promotion_last_5years,Department,salary
0,0.38,0.53,2,157,3,0,1,0,sales,low
1,0.8,0.86,5,262,6,0,1,0,sales,medium
2,0.1,0.77,6,247,4,0,1,0,sales,low
3,0.92,0.85,5,259,5,0,1,0,sales,low
4,0.89,1.0,5,224,5,0,1,0,sales,low


In [4]:
X = hr.drop('left',axis=1)
y = hr['left']

In [5]:
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=24,test_size=0.3,stratify=y)

In [6]:

ohe = OneHotEncoder(sparse_output=False, drop='first').set_output(transform='pandas')
ct = make_column_transformer(('passthrough', make_column_selector(dtype_exclude=object)  ),
                             (ohe,make_column_selector(dtype_include=object)  ),
                             verbose_feature_names_out=False).set_output(transform='pandas')

scaler_mm = MinMaxScaler()
scaler_std = StandardScaler()

In [7]:
#Linear kernel
svm = SVC(kernel='linear',probability=True,random_state=24)
pipe = Pipeline([('CT',ct),('SCL',scaler_mm),('SVM',svm)])
pipe.fit(X_train,y_train)
y_pred = pipe.predict(X_test)
print(accuracy_score(y_test,y_pred))


0.7743943098466326


In [8]:
#GridSearch CV

In [9]:

svm = SVC(kernel='linear',probability=True,random_state=24)
pipe = Pipeline([('CT',ct),('SCL',scaler_mm),('SVM',svm)])
kfold = StratifiedKFold(n_splits=5,shuffle=True,random_state=24)
params = {'SVM__C' : np.linspace(0.001,5,3)}
gcv = GridSearchCV(pipe,param_grid=params,cv=kfold,scoring='roc_auc',verbose=3)
gcv.fit(X,y)



Fitting 5 folds for each of 3 candidates, totalling 15 fits
[CV 1/5] END ......................SVM__C=0.001;, score=0.803 total time=  26.4s
[CV 2/5] END ......................SVM__C=0.001;, score=0.793 total time=  25.3s
[CV 3/5] END ......................SVM__C=0.001;, score=0.802 total time=  25.4s
[CV 4/5] END ......................SVM__C=0.001;, score=0.796 total time=  25.4s
[CV 5/5] END ......................SVM__C=0.001;, score=0.813 total time=  25.6s
[CV 1/5] END .........SVM__C=2.5004999999999997;, score=0.807 total time=  27.8s
[CV 2/5] END .........SVM__C=2.5004999999999997;, score=0.801 total time=  27.8s
[CV 3/5] END .........SVM__C=2.5004999999999997;, score=0.808 total time=  28.0s
[CV 4/5] END .........SVM__C=2.5004999999999997;, score=0.802 total time=  27.7s
[CV 5/5] END .........SVM__C=2.5004999999999997;, score=0.818 total time=  28.1s
[CV 1/5] END ........................SVM__C=5.0;, score=0.808 total time=  30.3s
[CV 2/5] END ........................SVM__C=5.0;,

In [10]:
print(gcv.best_params_)
print(gcv.best_score_)


{'SVM__C': 5.0}
0.80745383329788


In [11]:
#Radial kernel
svm = SVC(kernel='rbf',probability=True,random_state=24)
pipe = Pipeline([('CT',ct),('SCL',scaler_mm),('SVM',svm)])
kfold = StratifiedKFold(n_splits=5,shuffle=True,random_state=24)
params = {'SVM__C' : np.linspace(0.001,5,3),'SVM__gamma':np.linspace(0.001,5,3)}
gcv = GridSearchCV(pipe,param_grid=params,cv=kfold,scoring='roc_auc',verbose=3)
gcv.fit(X,y)



Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5] END ....SVM__C=0.001, SVM__gamma=0.001;, score=0.767 total time=  31.7s
[CV 2/5] END ....SVM__C=0.001, SVM__gamma=0.001;, score=0.771 total time=  32.8s
[CV 3/5] END ....SVM__C=0.001, SVM__gamma=0.001;, score=0.762 total time=  31.9s
[CV 4/5] END ....SVM__C=0.001, SVM__gamma=0.001;, score=0.761 total time=  33.8s
[CV 5/5] END ....SVM__C=0.001, SVM__gamma=0.001;, score=0.792 total time=  33.7s
[CV 1/5] END SVM__C=0.001, SVM__gamma=2.5004999999999997;, score=0.931 total time=  34.5s
[CV 2/5] END SVM__C=0.001, SVM__gamma=2.5004999999999997;, score=0.931 total time=  35.2s
[CV 3/5] END SVM__C=0.001, SVM__gamma=2.5004999999999997;, score=0.941 total time=  33.2s
[CV 4/5] END SVM__C=0.001, SVM__gamma=2.5004999999999997;, score=0.928 total time=  34.1s
[CV 5/5] END SVM__C=0.001, SVM__gamma=2.5004999999999997;, score=0.935 total time=  33.5s
[CV 1/5] END ......SVM__C=0.001, SVM__gamma=5.0;, score=0.936 total time=  36.8s
[CV 

In [13]:
print(gcv.best_params_)
print(gcv.best_score_)


{'SVM__C': 5.0, 'SVM__gamma': 5.0}
0.9814093756912985
