In [1]:
import numpy as np, pandas as pd, seaborn as sns, matplotlib.pyplot as plt
pd.pandas.set_option('display.max_columns',None)

In [4]:
from sklearn.ensemble import StackingClassifier, RandomForestClassifier,AdaBoostClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
import lightgbm as lgb
from sklearn.tree import DecisionTreeClassifier

In [9]:
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score,fbeta_score

In [5]:
rfc = RandomForestClassifier()
ada = AdaBoostClassifier()
gbc = GradientBoostingClassifier()
lr = LogisticRegression()
svc = SVC()
gnb = GaussianNB()
xgb = XGBClassifier()
dt = DecisionTreeClassifier()

In [2]:
X = pd.read_csv('Data/X_train_enc.csv')
y = pd.read_csv('Data/y_train_enc.csv')

X_test = pd.read_csv('Data/X_test_enc.csv')
y_test = pd.read_csv('Data/y_test_enc.csv')

In [3]:
from imblearn.combine import SMOTEENN
X, y = SMOTEENN().fit_resample(X, y)
print("Shape: ",X.shape,y.shape)
print("\n\nValue counts: \n",y['Attrition'].value_counts())

Shape:  (1019, 43) (1019, 1)


Value counts: 
 1    597
0    422
Name: Attrition, dtype: int64


## RandomForest as meta

In [6]:
estimators = [
    ('ada',ada),
    
    ('gbc',gbc),
    ('lr',lr),
    ('svc',svc),
    
    ('gnb',gnb),
    
    ('xgb',xgb),
    
    ('dt',dt)
    #('rfc',rfc)
]

In [20]:
stk_classifier = StackingClassifier(estimators=estimators, 
                                    final_estimator=RandomForestClassifier(n_estimators=100),
                                    passthrough=True,
                                    cv=5,
                                    n_jobs=-1,
                                    verbose=3)

In [21]:
stk_classifier.fit(X,y['Attrition'])
y_pred = stk_classifier.predict(X_test)
print('Accuracy:',accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

Accuracy: 0.8253968253968254
[[325  45]
 [ 32  39]]
              precision    recall  f1-score   support

           0       0.91      0.88      0.89       370
           1       0.46      0.55      0.50        71

    accuracy                           0.83       441
   macro avg       0.69      0.71      0.70       441
weighted avg       0.84      0.83      0.83       441



## Adaboost as meta

In [22]:
estimators = [
    #('ada',ada),
    
    ('gbc',gbc),
    ('lr',lr),
    ('svc',svc),
    
    ('gnb',gnb),
    
    ('xgb',xgb),
    
    ('dt',dt),
    ('rfc',rfc)
]

In [23]:
stk_classifier = StackingClassifier(estimators=estimators, 
                                    final_estimator=AdaBoostClassifier(),
                                    passthrough=True,
                                    cv=5,
                                    n_jobs=-1,
                                    verbose=3)

In [24]:
stk_classifier.fit(X,y['Attrition'])
y_pred = stk_classifier.predict(X_test)
print('Accuracy:',accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

Accuracy: 0.8095238095238095
[[323  47]
 [ 37  34]]
              precision    recall  f1-score   support

           0       0.90      0.87      0.88       370
           1       0.42      0.48      0.45        71

    accuracy                           0.81       441
   macro avg       0.66      0.68      0.67       441
weighted avg       0.82      0.81      0.81       441



## Gradient Boost as meta

In [25]:
estimators = [
    ('ada',ada),
    
    #('gbc',gbc),
    ('lr',lr),
    ('svc',svc),
    
    ('gnb',gnb),
    
    ('xgb',xgb),
    
    ('dt',dt),
    ('rfc',rfc)
]

In [26]:
stk_classifier = StackingClassifier(estimators=estimators, 
                                    final_estimator=GradientBoostingClassifier(),
                                    passthrough=True,
                                    cv=5,
                                    n_jobs=-1,
                                    verbose=3)

In [27]:
stk_classifier.fit(X,y['Attrition'])
y_pred = stk_classifier.predict(X_test)
print('Accuracy:',accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

Accuracy: 0.8140589569160998
[[327  43]
 [ 39  32]]
              precision    recall  f1-score   support

           0       0.89      0.88      0.89       370
           1       0.43      0.45      0.44        71

    accuracy                           0.81       441
   macro avg       0.66      0.67      0.66       441
weighted avg       0.82      0.81      0.82       441



## LR as meta

In [28]:
estimators = [
    ('ada',ada),
    
    ('gbc',gbc),
    #('lr',lr),
    ('svc',svc),
    
    ('gnb',gnb),
    
    ('xgb',xgb),
    
    ('dt',dt),
    ('rfc',rfc)
]

In [29]:
stk_classifier = StackingClassifier(estimators=estimators, 
                                    final_estimator=LogisticRegression(),
                                    passthrough=True,
                                    cv=5,
                                    n_jobs=-1,
                                    verbose=3)

In [30]:
stk_classifier.fit(X,y['Attrition'])
y_pred = stk_classifier.predict(X_test)
print('Accuracy:',accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

Accuracy: 0.6190476190476191
[[225 145]
 [ 23  48]]
              precision    recall  f1-score   support

           0       0.91      0.61      0.73       370
           1       0.25      0.68      0.36        71

    accuracy                           0.62       441
   macro avg       0.58      0.64      0.55       441
weighted avg       0.80      0.62      0.67       441



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


## SVC as meta

In [31]:
estimators = [
    ('ada',ada),
    
    ('gbc',gbc),
    ('lr',lr),
    #('svc',svc),
    
    ('gnb',gnb),
    
    ('xgb',xgb),
    
    ('dt',dt),
    ('rfc',rfc)
]


stk_classifier = StackingClassifier(estimators=estimators, 
                                    final_estimator=SVC(),
                                    passthrough=True,
                                    cv=5,
                                    n_jobs=-1,
                                    verbose=3)


stk_classifier.fit(X,y['Attrition'])
y_pred = stk_classifier.predict(X_test)
print('Accuracy:',accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

Accuracy: 0.41043083900226757
[[119 251]
 [  9  62]]
              precision    recall  f1-score   support

           0       0.93      0.32      0.48       370
           1       0.20      0.87      0.32        71

    accuracy                           0.41       441
   macro avg       0.56      0.60      0.40       441
weighted avg       0.81      0.41      0.45       441



## Gaussian NB as meta

In [32]:
estimators = [
    ('ada',ada),
    
    ('gbc',gbc),
    ('lr',lr),
    ('svc',svc),
    
    #('gnb',gnb),
    
    ('xgb',xgb),
    
    ('dt',dt),
    ('rfc',rfc)
]


stk_classifier = StackingClassifier(estimators=estimators, 
                                    final_estimator=GaussianNB(),
                                    passthrough=True,
                                    cv=5,
                                    n_jobs=-1,
                                    verbose=3)


stk_classifier.fit(X,y['Attrition'])
y_pred = stk_classifier.predict(X_test)
print('Accuracy:',accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

Accuracy: 0.7845804988662132
[[302  68]
 [ 27  44]]
              precision    recall  f1-score   support

           0       0.92      0.82      0.86       370
           1       0.39      0.62      0.48        71

    accuracy                           0.78       441
   macro avg       0.66      0.72      0.67       441
weighted avg       0.83      0.78      0.80       441



## XGboost as meta

In [33]:
estimators = [
    ('ada',ada),
    
    ('gbc',gbc),
    ('lr',lr),
    ('svc',svc),
    
    ('gnb',gnb),
    
    #('xgb',xgb),
    
    ('dt',dt),
    ('rfc',rfc)
]


stk_classifier = StackingClassifier(estimators=estimators, 
                                    final_estimator=XGBClassifier(),
                                    passthrough=True,
                                    cv=5,
                                    n_jobs=-1,
                                    verbose=3)


stk_classifier.fit(X,y['Attrition'])
y_pred = stk_classifier.predict(X_test)
print('Accuracy:',accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))



Accuracy: 0.8344671201814059
[[333  37]
 [ 36  35]]
              precision    recall  f1-score   support

           0       0.90      0.90      0.90       370
           1       0.49      0.49      0.49        71

    accuracy                           0.83       441
   macro avg       0.69      0.70      0.70       441
weighted avg       0.84      0.83      0.83       441



## DT as meta

In [39]:
estimators = [
    ('ada',ada),
    
    ('gbc',gbc),
    ('lr',lr),
    ('svc',svc),
    
    ('gnb',gnb),
    
    ('xgb',xgb),
    
    #('dt',dt),
    ('rfc',rfc)
]


stk_classifier = StackingClassifier(estimators=estimators, 
                                    final_estimator=DecisionTreeClassifier(max_depth=4),
                                    passthrough=True,
                                    cv=5,
                                    n_jobs=-1,
                                    verbose=3)


stk_classifier.fit(X,y['Attrition'])
y_pred = stk_classifier.predict(X_test)
print('Accuracy:',accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

Accuracy: 0.8412698412698413
[[334  36]
 [ 34  37]]
              precision    recall  f1-score   support

           0       0.91      0.90      0.91       370
           1       0.51      0.52      0.51        71

    accuracy                           0.84       441
   macro avg       0.71      0.71      0.71       441
weighted avg       0.84      0.84      0.84       441

