In [43]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from time import perf_counter
from sklearn.metrics import classification_report

In [44]:
df = pd.read_csv('BankChurners.csv')

In [45]:
for cols in df.columns:
    print(cols)

CLIENTNUM
Attrition_Flag
Customer_Age
Gender
Dependent_count
Education_Level
Marital_Status
Income_Category
Card_Category
Months_on_book
Total_Relationship_Count
Months_Inactive_12_mon
Contacts_Count_12_mon
Credit_Limit
Total_Revolving_Bal
Avg_Open_To_Buy
Total_Amt_Chng_Q4_Q1
Total_Trans_Amt
Total_Trans_Ct
Total_Ct_Chng_Q4_Q1
Avg_Utilization_Ratio
Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_1
Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_2


In [46]:
df = df.drop(['Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_1',
         'Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_2'], axis = 1)

In [48]:
column_transform = pd.get_dummies(df, columns = ['Gender', 'Education_Level', 'Marital_Status', 'Income_Category', 'Card_Category'])
df = df.merge(column_transform)
df.drop(['Gender', 'Education_Level', 'Marital_Status', 'Income_Category', 'Card_Category'], axis=1, inplace=True)

In [50]:
label_encoder = preprocessing.LabelEncoder()

df['Attrition_Flag'] = label_encoder.fit_transform(df['Attrition_Flag'])

In [51]:
X = df.drop('Attrition_Flag', axis = 1)
y = df['Attrition_Flag']

In [52]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=41)

In [53]:
rfg = RandomForestClassifier(class_weight='balanced', random_state=41)

In [54]:
rfg_gscv = GridSearchCV(rfg,
                   {
    'n_estimators': range(1, 50), 
    'max_depth': range(1, 20)
                   }, 
                    n_jobs=-1,
                    scoring='accuracy')

rfg_gscv.fit(X_train, y_train)
t_2 = perf_counter()
print("Best Parameters:", rfg_gscv.best_params_)

Best Parameters: {'max_depth': 10, 'n_estimators': 32}


In [56]:
rfg_optimal = rfg_gscv.best_estimator_
print(classification_report(y_test, rfg_optimal.predict(X_test)))


              precision    recall  f1-score   support

           0       0.83      0.81      0.82       497
           1       0.96      0.97      0.97      2542

    accuracy                           0.94      3039
   macro avg       0.90      0.89      0.89      3039
weighted avg       0.94      0.94      0.94      3039

