In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

In [2]:
def iteration_training_model(lst):
    df = pd.read_csv('credit_card_churn.csv')
    df = df.drop(['Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_1', 'Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_2', 'CLIENTNUM'], axis=1)
    ##### remove outlier
    df = df.loc[df['Customer_Age'] <= 66]
    df = df.loc[df['Total_Ct_Chng_Q4_Q1'] <= 1.6]
    df = df.loc[df['Months_Inactive_12_mon'] < 6]
    df = df.loc[df['Total_Amt_Chng_Q4_Q1'] <= 1.6]

    ##### correct skewed data
    df['Credit_Limit'] = np.log10(df['Credit_Limit'])
    fitted_data, fitted_lambda = stats.boxcox(df['Avg_Open_To_Buy'])
    df['Avg_Open_To_Buy'] = fitted_data
    df['Total_Trans_Amt'] = np.log10(df['Total_Trans_Amt'])
    df['Avg_Utilization_Ratio'] = (df['Avg_Utilization_Ratio'])**(1/2)

    df = df.drop(lst, axis = 1)

    x = df.drop(['Attrition_Flag'], axis=1)
    y = df['Attrition_Flag']
    y = y.replace(['Existing Customer', 'Attrited Customer'], [0, 1])
    x = pd.get_dummies(x)
    x = x.values
    y = y.values
    scaler = MinMaxScaler()
    x = scaler.fit_transform(x)
    
    x_train,x_test,y_train,y_test = train_test_split(x, y, test_size = 0.1, random_state = 1)
    parameters = {'C':[0.01, 0.1, 1, 10, 100],
                    'solver':('newton-cg', 'liblinear', 'sag', 'saga')}
    
    logisReg = LogisticRegression()
    clf = GridSearchCV(logisReg, parameters, scoring='f1', verbose=0, return_train_score=True, n_jobs=-1, cv = 10)
    clf.fit(x_train, y_train)
    
    y_pred = clf.predict(x_test)
    print(lst)
    print("Classification report \n=======================")
    print(classification_report(y_true=y_test, y_pred=y_pred))
    print("Confusion matrix \n=======================")
    print(confusion_matrix(y_true=y_test, y_pred=y_pred))

In [3]:
import itertools
cols = ['Customer_Age', 'Gender', 'Dependent_count',
       'Education_Level', 'Marital_Status', 'Income_Category', 'Card_Category',
       'Months_on_book', 'Total_Relationship_Count', 'Months_Inactive_12_mon',
       'Contacts_Count_12_mon', 'Credit_Limit', 'Total_Revolving_Bal',
       'Avg_Open_To_Buy', 'Total_Amt_Chng_Q4_Q1', 'Total_Trans_Amt',
       'Total_Trans_Ct', 'Total_Ct_Chng_Q4_Q1', 'Avg_Utilization_Ratio']
for i in range(1,3,1):
    comb = itertools.combinations(range(19), i)
    for val in comb:
        lst = []
        for j in range(i):
            lst.append(cols[val[j]])
        iteration_training_model(lst)

['Customer_Age']
Classification report 
              precision    recall  f1-score   support

           0       0.93      0.97      0.95       823
           1       0.83      0.66      0.74       165

    accuracy                           0.92       988
   macro avg       0.88      0.82      0.85       988
weighted avg       0.92      0.92      0.92       988

Confusion matrix 
[[801  22]
 [ 56 109]]
['Gender']
Classification report 
              precision    recall  f1-score   support

           0       0.93      0.97      0.95       823
           1       0.84      0.65      0.73       165

    accuracy                           0.92       988
   macro avg       0.89      0.81      0.84       988
weighted avg       0.92      0.92      0.92       988

Confusion matrix 
[[802  21]
 [ 57 108]]
['Dependent_count']
Classification report 
              precision    recall  f1-score   support

           0       0.93      0.97      0.95       823
           1       0.83      0.65     



['Months_Inactive_12_mon']
Classification report 
              precision    recall  f1-score   support

           0       0.93      0.98      0.95       823
           1       0.84      0.61      0.71       165

    accuracy                           0.92       988
   macro avg       0.88      0.79      0.83       988
weighted avg       0.91      0.92      0.91       988

Confusion matrix 
[[804  19]
 [ 64 101]]
['Contacts_Count_12_mon']
Classification report 
              precision    recall  f1-score   support

           0       0.93      0.97      0.95       823
           1       0.82      0.65      0.72       165

    accuracy                           0.92       988
   macro avg       0.87      0.81      0.84       988
weighted avg       0.91      0.92      0.91       988

Confusion matrix 
[[799  24]
 [ 58 107]]
['Credit_Limit']
Classification report 
              precision    recall  f1-score   support

           0       0.93      0.97      0.95       823
           1    



['Gender', 'Card_Category']
Classification report 
              precision    recall  f1-score   support

           0       0.93      0.97      0.95       823
           1       0.84      0.65      0.73       165

    accuracy                           0.92       988
   macro avg       0.88      0.81      0.84       988
weighted avg       0.92      0.92      0.92       988

Confusion matrix 
[[802  21]
 [ 58 107]]
['Gender', 'Months_on_book']
Classification report 
              precision    recall  f1-score   support

           0       0.93      0.98      0.96       823
           1       0.85      0.66      0.74       165

    accuracy                           0.92       988
   macro avg       0.89      0.82      0.85       988
weighted avg       0.92      0.92      0.92       988

Confusion matrix 
[[804  19]
 [ 56 109]]
['Gender', 'Total_Relationship_Count']
Classification report 
              precision    recall  f1-score   support

           0       0.93      0.96      0.95 



['Dependent_count', 'Total_Amt_Chng_Q4_Q1']
Classification report 
              precision    recall  f1-score   support

           0       0.93      0.97      0.95       823
           1       0.84      0.65      0.73       165

    accuracy                           0.92       988
   macro avg       0.88      0.81      0.84       988
weighted avg       0.92      0.92      0.92       988

Confusion matrix 
[[802  21]
 [ 58 107]]




['Dependent_count', 'Total_Trans_Amt']
Classification report 
              precision    recall  f1-score   support

           0       0.92      0.98      0.95       823
           1       0.83      0.57      0.68       165

    accuracy                           0.91       988
   macro avg       0.88      0.77      0.81       988
weighted avg       0.90      0.91      0.90       988

Confusion matrix 
[[804  19]
 [ 71  94]]
['Dependent_count', 'Total_Trans_Ct']
Classification report 
              precision    recall  f1-score   support

           0       0.90      0.98      0.94       823
           1       0.83      0.46      0.59       165

    accuracy                           0.89       988
   macro avg       0.86      0.72      0.77       988
weighted avg       0.89      0.89      0.88       988

Confusion matrix 
[[807  16]
 [ 89  76]]
['Dependent_count', 'Total_Ct_Chng_Q4_Q1']
Classification report 
              precision    recall  f1-score   support

           0       0



['Months_on_book', 'Months_Inactive_12_mon']
Classification report 
              precision    recall  f1-score   support

           0       0.93      0.98      0.95       823
           1       0.84      0.61      0.71       165

    accuracy                           0.92       988
   macro avg       0.88      0.79      0.83       988
weighted avg       0.91      0.92      0.91       988

Confusion matrix 
[[804  19]
 [ 64 101]]
['Months_on_book', 'Contacts_Count_12_mon']
Classification report 
              precision    recall  f1-score   support

           0       0.93      0.97      0.95       823
           1       0.82      0.65      0.73       165

    accuracy                           0.92       988
   macro avg       0.88      0.81      0.84       988
weighted avg       0.91      0.92      0.91       988

Confusion matrix 
[[799  24]
 [ 57 108]]
['Months_on_book', 'Credit_Limit']
Classification report 
              precision    recall  f1-score   support

           0    



['Total_Relationship_Count', 'Months_Inactive_12_mon']
Classification report 
              precision    recall  f1-score   support

           0       0.92      0.96      0.94       823
           1       0.76      0.57      0.65       165

    accuracy                           0.90       988
   macro avg       0.84      0.77      0.80       988
weighted avg       0.89      0.90      0.89       988

Confusion matrix 
[[794  29]
 [ 71  94]]
['Total_Relationship_Count', 'Contacts_Count_12_mon']
Classification report 
              precision    recall  f1-score   support

           0       0.93      0.96      0.95       823
           1       0.78      0.66      0.72       165

    accuracy                           0.91       988
   macro avg       0.86      0.81      0.83       988
weighted avg       0.91      0.91      0.91       988

Confusion matrix 
[[793  30]
 [ 56 109]]
['Total_Relationship_Count', 'Credit_Limit']
Classification report 
              precision    recall  f1-sco