In [1]:
import numpy as np, pandas as pd, seaborn as sns, matplotlib.pyplot as plt
pd.pandas.set_option('display.max_columns',None)

In [2]:
from sklearn.ensemble import StackingClassifier, RandomForestClassifier,AdaBoostClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
import lightgbm as lgb
from sklearn.tree import DecisionTreeClassifier

In [3]:
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score,fbeta_score

In [4]:
rfc = RandomForestClassifier()
ada = AdaBoostClassifier()
gbc = GradientBoostingClassifier()
lr = LogisticRegression()
svc = SVC()
gnb = GaussianNB()
xgb = XGBClassifier()
dt = DecisionTreeClassifier()

In [5]:
X = pd.read_csv('Data/X_train_enc.csv')
y = pd.read_csv('Data/y_train_enc.csv')

X_test = pd.read_csv('Data/X_test_enc.csv')
y_test = pd.read_csv('Data/y_test_enc.csv')

In [6]:
from imblearn.over_sampling import ADASYN
X, y = ADASYN().fit_resample(X, y)
print("Shape: ",X.shape,y.shape)
print("\n\nValue counts: \n",y['Attrition'].value_counts())

Shape:  (1685, 43) (1685, 1)


Value counts: 
 0    863
1    822
Name: Attrition, dtype: int64


## RandomForest as meta

In [7]:
estimators = [
    ('ada',ada),
    
    ('gbc',gbc),
    ('lr',lr),
    ('svc',svc),
    
    ('gnb',gnb),
    
    ('xgb',xgb),
    
    ('dt',dt)
    #('rfc',rfc)
]

In [8]:
stk_classifier = StackingClassifier(estimators=estimators, 
                                    final_estimator=RandomForestClassifier(n_estimators=100),
                                    passthrough=True,
                                    cv=5,
                                    n_jobs=-1,
                                    verbose=3)

In [17]:
stk_classifier.fit(X,y['Attrition'])
y_pred = stk_classifier.predict(X_test)
print('Accuracy:',accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

Accuracy: 0.8798185941043084
[[359  11]
 [ 42  29]]
              precision    recall  f1-score   support

           0       0.90      0.97      0.93       370
           1       0.72      0.41      0.52        71

    accuracy                           0.88       441
   macro avg       0.81      0.69      0.73       441
weighted avg       0.87      0.88      0.87       441



## Adaboost as meta

In [18]:
estimators = [
    #('ada',ada),
    
    ('gbc',gbc),
    ('lr',lr),
    ('svc',svc),
    
    ('gnb',gnb),
    
    ('xgb',xgb),
    
    ('dt',dt),
    ('rfc',rfc)
]

In [19]:
stk_classifier = StackingClassifier(estimators=estimators, 
                                    final_estimator=AdaBoostClassifier(),
                                    passthrough=True,
                                    cv=5,
                                    n_jobs=-1,
                                    verbose=3)

In [20]:
stk_classifier.fit(X,y['Attrition'])
y_pred = stk_classifier.predict(X_test)
print('Accuracy:',accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

Accuracy: 0.8662131519274376
[[355  15]
 [ 44  27]]
              precision    recall  f1-score   support

           0       0.89      0.96      0.92       370
           1       0.64      0.38      0.48        71

    accuracy                           0.87       441
   macro avg       0.77      0.67      0.70       441
weighted avg       0.85      0.87      0.85       441



## Gradient Boost as meta

In [21]:
estimators = [
    ('ada',ada),
    
    #('gbc',gbc),
    ('lr',lr),
    ('svc',svc),
    
    ('gnb',gnb),
    
    ('xgb',xgb),
    
    ('dt',dt),
    ('rfc',rfc)
]

In [22]:
stk_classifier = StackingClassifier(estimators=estimators, 
                                    final_estimator=GradientBoostingClassifier(),
                                    passthrough=True,
                                    cv=5,
                                    n_jobs=-1,
                                    verbose=3)

In [31]:
stk_classifier.fit(X,y['Attrition'])
y_pred = stk_classifier.predict(X_test)
print('Accuracy:',accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

Accuracy: 0.8616780045351474
[[358  12]
 [ 49  22]]
              precision    recall  f1-score   support

           0       0.88      0.97      0.92       370
           1       0.65      0.31      0.42        71

    accuracy                           0.86       441
   macro avg       0.76      0.64      0.67       441
weighted avg       0.84      0.86      0.84       441



## LR as meta

In [24]:
estimators = [
    ('ada',ada),
    
    ('gbc',gbc),
    #('lr',lr),
    ('svc',svc),
    
    ('gnb',gnb),
    
    ('xgb',xgb),
    
    ('dt',dt),
    ('rfc',rfc)
]

In [25]:
stk_classifier = StackingClassifier(estimators=estimators, 
                                    final_estimator=LogisticRegression(),
                                    passthrough=True,
                                    cv=5,
                                    n_jobs=-1,
                                    verbose=3)

In [26]:
stk_classifier.fit(X,y['Attrition'])
y_pred = stk_classifier.predict(X_test)
print('Accuracy:',accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

Accuracy: 0.7165532879818595
[[275  95]
 [ 30  41]]
              precision    recall  f1-score   support

           0       0.90      0.74      0.81       370
           1       0.30      0.58      0.40        71

    accuracy                           0.72       441
   macro avg       0.60      0.66      0.61       441
weighted avg       0.81      0.72      0.75       441



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


## SVC as meta

In [27]:
estimators = [
    ('ada',ada),
    
    ('gbc',gbc),
    ('lr',lr),
    #('svc',svc),
    
    ('gnb',gnb),
    
    ('xgb',xgb),
    
    ('dt',dt),
    ('rfc',rfc)
]


stk_classifier = StackingClassifier(estimators=estimators, 
                                    final_estimator=SVC(),
                                    passthrough=True,
                                    cv=5,
                                    n_jobs=-1,
                                    verbose=3)


stk_classifier.fit(X,y['Attrition'])
y_pred = stk_classifier.predict(X_test)
print('Accuracy:',accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

Accuracy: 0.5986394557823129
[[227 143]
 [ 34  37]]
              precision    recall  f1-score   support

           0       0.87      0.61      0.72       370
           1       0.21      0.52      0.29        71

    accuracy                           0.60       441
   macro avg       0.54      0.57      0.51       441
weighted avg       0.76      0.60      0.65       441



## Gaussian NB as meta

In [28]:
estimators = [
    ('ada',ada),
    
    ('gbc',gbc),
    ('lr',lr),
    ('svc',svc),
    
    #('gnb',gnb),
    
    ('xgb',xgb),
    
    ('dt',dt),
    ('rfc',rfc)
]


stk_classifier = StackingClassifier(estimators=estimators, 
                                    final_estimator=GaussianNB(),
                                    passthrough=True,
                                    cv=5,
                                    n_jobs=-1,
                                    verbose=3)


stk_classifier.fit(X,y['Attrition'])
y_pred = stk_classifier.predict(X_test)
print('Accuracy:',accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

Accuracy: 0.8344671201814059
[[330  40]
 [ 33  38]]
              precision    recall  f1-score   support

           0       0.91      0.89      0.90       370
           1       0.49      0.54      0.51        71

    accuracy                           0.83       441
   macro avg       0.70      0.71      0.71       441
weighted avg       0.84      0.83      0.84       441



## XGboost as meta

In [29]:
estimators = [
    ('ada',ada),
    
    ('gbc',gbc),
    ('lr',lr),
    ('svc',svc),
    
    ('gnb',gnb),
    
    #('xgb',xgb),
    
    ('dt',dt),
    ('rfc',rfc)
]


stk_classifier = StackingClassifier(estimators=estimators, 
                                    final_estimator=XGBClassifier(),
                                    passthrough=True,
                                    cv=5,
                                    n_jobs=-1,
                                    verbose=3)


stk_classifier.fit(X,y['Attrition'])
y_pred = stk_classifier.predict(X_test)
print('Accuracy:',accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))



Accuracy: 0.8594104308390023
[[362   8]
 [ 54  17]]
              precision    recall  f1-score   support

           0       0.87      0.98      0.92       370
           1       0.68      0.24      0.35        71

    accuracy                           0.86       441
   macro avg       0.78      0.61      0.64       441
weighted avg       0.84      0.86      0.83       441



## DT as meta

In [30]:
estimators = [
    ('ada',ada),
    
    ('gbc',gbc),
    ('lr',lr),
    ('svc',svc),
    
    ('gnb',gnb),
    
    ('xgb',xgb),
    
    #('dt',dt),
    ('rfc',rfc)
]


stk_classifier = StackingClassifier(estimators=estimators, 
                                    final_estimator=DecisionTreeClassifier(max_depth=4),
                                    passthrough=True,
                                    cv=5,
                                    n_jobs=-1,
                                    verbose=3)


stk_classifier.fit(X,y['Attrition'])
y_pred = stk_classifier.predict(X_test)
print('Accuracy:',accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

Accuracy: 0.8662131519274376
[[358  12]
 [ 47  24]]
              precision    recall  f1-score   support

           0       0.88      0.97      0.92       370
           1       0.67      0.34      0.45        71

    accuracy                           0.87       441
   macro avg       0.78      0.65      0.69       441
weighted avg       0.85      0.87      0.85       441

