In [1]:
from sklearn.ensemble import VotingClassifier
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn import tree
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor
from sklearn.tree import plot_tree
from sklearn.metrics import roc_auc_score,accuracy_score,r2_score
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer 
from sklearn.compose import make_column_selector
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
import warnings
warnings.filterwarnings("ignore")

In [2]:
hr = pd.read_csv('HR_comma_sep.csv')

In [3]:
X = hr.drop('left',axis=1)
y = hr['left']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=24, test_size=0.3, stratify=y)

In [5]:

ohe = OneHotEncoder(sparse_output=False,handle_unknown='ignore')
ct = make_column_transformer(('passthrough', make_column_selector(dtype_exclude=object)  ),
                             (ohe,make_column_selector(dtype_include=object)  ),
                             verbose_feature_names_out=False)


In [6]:
dtc = DecisionTreeClassifier(random_state=24, max_depth=3)
lr= LogisticRegression(random_state=24)
nb= GaussianNB()
voting = VotingClassifier([('DT',dtc), ('LR', lr),('NB', nb)], voting='soft')
pipe= Pipeline([('CT',ct),('VOTING', voting)])
pipe.get_params()
kfold = StratifiedKFold(n_splits=5,shuffle=True,random_state=24)
params = {  'VOTING__LR__C':np.linspace(0.001,3,5), 'VOTING__DT__min_samples_split':[2,10,20], 'VOTING__DT__min_samples_leaf':[1,10,20],'VOTING__DT__max_depth':[None,2,3]}
gcv = GridSearchCV(pipe,param_grid=params,cv=kfold,scoring='roc_auc',verbose=3)
gcv.fit(X,y)


Fitting 5 folds for each of 135 candidates, totalling 675 fits
[CV 1/5] END VOTING__DT__max_depth=None, VOTING__DT__min_samples_leaf=1, VOTING__DT__min_samples_split=2, VOTING__LR__C=0.001;, score=0.981 total time=   0.1s
[CV 2/5] END VOTING__DT__max_depth=None, VOTING__DT__min_samples_leaf=1, VOTING__DT__min_samples_split=2, VOTING__LR__C=0.001;, score=0.979 total time=   0.1s
[CV 3/5] END VOTING__DT__max_depth=None, VOTING__DT__min_samples_leaf=1, VOTING__DT__min_samples_split=2, VOTING__LR__C=0.001;, score=0.976 total time=   0.1s
[CV 4/5] END VOTING__DT__max_depth=None, VOTING__DT__min_samples_leaf=1, VOTING__DT__min_samples_split=2, VOTING__LR__C=0.001;, score=0.981 total time=   0.1s
[CV 5/5] END VOTING__DT__max_depth=None, VOTING__DT__min_samples_leaf=1, VOTING__DT__min_samples_split=2, VOTING__LR__C=0.001;, score=0.983 total time=   0.1s
[CV 1/5] END VOTING__DT__max_depth=None, VOTING__DT__min_samples_leaf=1, VOTING__DT__min_samples_split=2, VOTING__LR__C=0.75075;, score=0.971 

In [7]:
print(gcv.best_params_)
print(gcv.best_score_)

{'VOTING__DT__max_depth': None, 'VOTING__DT__min_samples_leaf': 1, 'VOTING__DT__min_samples_split': 2, 'VOTING__LR__C': 0.001}
0.980087913237963
