# 5-Credit Card Churn Customers, accuracy 96%

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [None]:
bank_churner_df = pd.read_csv("./data/bank_churner.csv")
bank_churner_df_org = bank_churner_df.copy()

In [None]:
bank_churner_df = pd.read_csv("./data/bank_churner.csv")
bank_churner_df_org = bank_churner_df.copy()

def test_transform(x_test):
    ''' 전처리 함수 정의'''
    
    # 불필요 컬럼 제거(고객번호)
    # -------------------------
    x_test = x_test.drop('cstno', axis=1)
    
    
    # 성별 변환('F':0, 'M':1)
    # -------------------------
    x_test['sex']=x_test['sex'].replace({'F':0,'M':1})
    
    
    # # 다중공선성 컬럼 제거
    # # -------------------------
    # x_test = x_test.drop('mon_on_book', axis = 1)
    # x_test = x_test.drop('mean_open_to_buy', axis = 1)
    # x_test = x_test.drop('tot_trans_cnt_for_12m', axis = 1)
    
    return x_test

bank_churner_df_org = test_transform(bank_churner_df_org)


In [None]:
bank_churner_df_org

In [None]:
correlation=bank_churner_df_org.corr()
plt.figure(figsize=(12,6))
sns.heatmap(correlation,cmap = sns.diverging_palette(20, 220, n = 200), annot =False)
plt.show()

In [None]:
bank_churner_df_org.info()

In [None]:
plt.figure(figsize=(12,6))
plt.scatter(bank_churner_df_org['credit_line'],bank_churner_df_org['mean_open_to_buy'])
plt.xlabel('credit_line')
plt.ylabel('mean_open_to_buy')

In [None]:
plt.figure(figsize=(12,6))
plt.scatter(bank_churner_df_org['tot_trans_amt_for_12m'],bank_churner_df_org['tot_trans_cnt_for_12m'])
plt.xlabel('tot_trans_amt_for_12m')
plt.ylabel('tot_trans_cnt_for_12m')

In [None]:
plt.figure(figsize=(12,6))
plt.scatter(bank_churner_df_org['mean_open_to_buy'],bank_churner_df_org['mean_util_pct'])
plt.xlabel('mean_open_to_buy')
plt.ylabel('mean_util_pct')

In [None]:
plt.figure(figsize=(12,6))
plt.scatter(bank_churner_df_org['credit_line'],bank_churner_df_org['mean_util_pct'])
plt.xlabel('credit_line')
plt.ylabel('mean_util_pct')


In [None]:
plt.figure(figsize=(12,6))
plt.scatter(bank_churner_df_org['credit_line'],bank_churner_df_org['card_type'])
plt.xlabel('credit_line')
plt.ylabel('card_type')


In [None]:
correlation1=bank_churner_df_org.corr()
plt.figure(figsize=(12,6))
sns.heatmap(correlation1,cmap = sns.diverging_palette(20, 220, n = 200), annot =False)
plt.show()

# 예측 단계

## Null 처리

In [None]:
# mean_util_pct 살리고 null 처리
#bank_churner_df_org.drop(columns = ['mean_util_pct'], inplace=True)
bank_churner_df_org.dropna(axis=0, inplace=True)

## 표준화, 스케일링

In [None]:
cat1 = bank_churner_df_org.select_dtypes(exclude = ['int64','float64']).columns
int1 = bank_churner_df_org.select_dtypes(include = ['int64']).columns
float1 = bank_churner_df_org.select_dtypes(include = ['Float64']).columns


bank_churner_df_org = pd.get_dummies(bank_churner_df_org, columns = cat1)

from sklearn.preprocessing import MinMaxScaler
for col in bank_churner_df_org[float1]:
    bank_churner_df_org[col] = MinMaxScaler().fit_transform(bank_churner_df_org[[col]])

for col in bank_churner_df_org[int1]:
    bank_churner_df_org[col] = MinMaxScaler().fit_transform(bank_churner_df_org[[col]])

print('New Number of Features: %d'%(bank_churner_df_org.shape[1]))

## 중요 Feature 설정

In [None]:
corr1 = bank_churner_df_org.corr()
corr1.sort_values(["is_churned"], ascending = False, inplace = True)
correlations2 = corr1.is_churned

a = correlations2[correlations2 > 0.1]
b = correlations2[correlations2 < -0.1]

top_corr_features = a.append(b)

top_corr_features
print(top_corr_features)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

## 예측 데이터 생성

In [None]:
x=bank_churner_df_org[['contact_cnt_for_12m','months_inact_for_12m','tot_amt_ratio_q4_q1','tot_product_count','tot_trans_amt_for_12m','tot_revol_balance','tot_cnt_ratio_q4_q1','tot_trans_cnt_for_12m']]
y=bank_churner_df_org['is_churned']

In [None]:
x_train, x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=42)

In [None]:
model=RandomForestClassifier(n_estimators=100)
model.fit(x_train,y_train)

In [None]:
model_pred=model.predict(x_test)

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(y_test,model_pred))

## 회귀분석

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
model2=LogisticRegression().fit(x_train,y_train)

In [None]:
y_test_pred=model2.predict(x_test)

In [None]:
print(classification_report(y_test,y_test_pred))

# 다중 공선성 제거후 재수행

## 데이터 로딩

In [None]:
bank_churner_df = pd.read_csv("./data/bank_churner.csv")
bank_churner_df_org = bank_churner_df.copy()

def test_transform(x_test):
    ''' 전처리 함수 정의'''
    
    # 불필요 컬럼 제거(고객번호)
    # -------------------------
    x_test = x_test.drop('cstno', axis=1)
    
    
    # 성별 변환('F':0, 'M':1)
    # -------------------------
    x_test['sex']=x_test['sex'].replace({'F':0,'M':1})
    
    
    # 다중공선성 컬럼 제거
    # -------------------------
    x_test = x_test.drop('mon_on_book', axis = 1)
    x_test = x_test.drop('mean_open_to_buy', axis = 1)
    x_test = x_test.drop('tot_trans_cnt_for_12m', axis = 1)
    
    return x_test

bank_churner_df_org = test_transform(bank_churner_df_org)


## Null 처리

In [None]:
bank_churner_df_org.drop(columns = ['mean_util_pct'], inplace=True)
bank_churner_df_org.dropna(axis=0, inplace=True)

## 표준화, 스케일링

In [None]:
cat1 = bank_churner_df_org.select_dtypes(exclude = ['int64','float64']).columns
int1 = bank_churner_df_org.select_dtypes(include = ['int64']).columns
float1 = bank_churner_df_org.select_dtypes(include = ['Float64']).columns


bank_churner_df_org = pd.get_dummies(bank_churner_df_org, columns = cat1)

from sklearn.preprocessing import MinMaxScaler
for col in bank_churner_df_org[float1]:
    bank_churner_df_org[col] = MinMaxScaler().fit_transform(bank_churner_df_org[[col]])

for col in bank_churner_df_org[int1]:
    bank_churner_df_org[col] = MinMaxScaler().fit_transform(bank_churner_df_org[[col]])

print('New Number of Features: %d'%(bank_churner_df_org.shape[1]))

## 중요 Feature 설정

In [None]:
corr1 = bank_churner_df_org.corr()
corr1.sort_values(["is_churned"], ascending = False, inplace = True)
correlations2 = corr1.is_churned

a = correlations2[correlations2 > 0.1]
b = correlations2[correlations2 < -0.1]

top_corr_features = a.append(b)

top_corr_features
print(top_corr_features)

## 예측 데이터 생성

In [None]:
x=bank_churner_df_org[['contact_cnt_for_12m','months_inact_for_12m','tot_amt_ratio_q4_q1','tot_product_count','tot_trans_amt_for_12m','tot_revol_balance','tot_cnt_ratio_q4_q1']]
y=bank_churner_df_org['is_churned']

In [None]:
x_train, x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=42)

In [None]:
model=RandomForestClassifier(n_estimators=100)
model.fit(x_train,y_train)

In [None]:
model_pred=model.predict(x_test)

In [None]:
print(classification_report(y_test,model_pred))

## 회귀분석

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
model2=LogisticRegression().fit(x_train,y_train)

In [None]:
y_test_pred=model2.predict(x_test)

In [None]:
print(classification_report(y_test,y_test_pred))