In [61]:
import pandas as pd
import numpy as np

from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn.metrics import r2_score

from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier

In [62]:
hr = pd.read_csv("C:\Hogwarts\machine_learning\Cases\human-resources-analytics\HR_comma_sep.csv")

In [63]:
hr.head()

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,left,promotion_last_5years,Department,salary
0,0.38,0.53,2,157,3,0,1,0,sales,low
1,0.8,0.86,5,262,6,0,1,0,sales,medium
2,0.11,0.88,7,272,4,0,1,0,sales,medium
3,0.72,0.87,5,223,5,0,1,0,sales,low
4,0.37,0.52,2,159,3,0,1,0,sales,low


In [74]:
y = hr["left"]

X = hr.drop(["left", "Department", "salary"], axis = 1)

In [75]:
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    stratify = y,
                                                    random_state = 2022,
                                                    train_size = 0.7)

In [78]:
dtc = DecisionTreeClassifier(random_state = 2022)

scaler = StandardScaler()
svm = SVC(probability = True,
         random_state = 2022,
         kernel = "linear")

pipe_svm = Pipeline([("STD", scaler), ("SVM", svm)])
da = LinearDiscriminantAnalysis()

voting = VotingClassifier([("Tree", dtc),
                          ("SVM_P", pipe_svm),
                          ("LDA", da)],
                         voting = "soft")

In [79]:
voting.fit(X_train, y_train)

VotingClassifier(estimators=[('Tree',
                              DecisionTreeClassifier(random_state=2022)),
                             ('SVM_P',
                              Pipeline(steps=[('STD', StandardScaler()),
                                              ('SVM',
                                               SVC(kernel='linear',
                                                   probability=True,
                                                   random_state=2022))])),
                             ('LDA', LinearDiscriminantAnalysis())],
                 voting='soft')

In [81]:
y_pred = voting.predict(X_test)
print(accuracy_score(y_test, y_pred))

0.9148888888888889


In [82]:
y_pred_prob = voting.predict_proba(X_test)[:,1]
print(roc_auc_score(y_test, y_pred_prob))

0.9733257743653503


In [84]:
# Separately evaluating Classifier

dtc.fit(X_train, y_train)
y_pred_prob = dtc.predict_proba(X_test)[:,1]
roc_dtc = roc_auc_score(y_test, y_pred_prob)

In [86]:
pipe_svm.fit(X_train, y_train)
y_pred_prob = dtc.predict_proba(X_test)[:,1]
roc_svm = roc_auc_score(y_test, y_pred_prob)

In [87]:
da.fit(X_train, y_train)
y_pred_prob = dtc.predict_proba(X_test)[:,1]
roc_da = roc_auc_score(y_test, y_pred_prob)

In [89]:
# weighted

voting = VotingClassifier([("Tree", dtc),
                          ("SVM_P", pipe_svm),
                          ("LDA", da)],
                         voting = "soft",
                         weights = [roc_dtc, roc_svm, roc_da])

In [90]:
voting.fit(X_train, y_train)

VotingClassifier(estimators=[('Tree',
                              DecisionTreeClassifier(random_state=2022)),
                             ('SVM_P',
                              Pipeline(steps=[('STD', StandardScaler()),
                                              ('SVM',
                                               SVC(kernel='linear',
                                                   probability=True,
                                                   random_state=2022))])),
                             ('LDA', LinearDiscriminantAnalysis())],
                 voting='soft',
                 weights=[0.9704868046178324, 0.9704868046178324,
                          0.9704868046178324])

In [92]:
y_pred = voting.predict(X_test)
print(accuracy_score(y_test, y_pred))

0.9148888888888889


In [95]:
y_pred_prob = voting.predict_proba(X_test)[:,1]
print(roc_auc_score(y_pred, y_pred_prob))

1.0
