# 실습2 금융 상품 이탈률 예측 (Customer Churn Prediction)

금융상품 이탈률 예측(Customer Churn Prediction)을 위한 실습 코드를 준비했습니다. Kaggle에서 찾은 적절한 데이터셋은 "Credit Card Customers" 입니다.

데이터셋 링크: https://www.kaggle.com/sakshigoyal7/credit-card-customers

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix


In [7]:
# 데이터 로드
data = pd.read_csv('BankChurners.csv')

# 필요한 열 선택
data = data[['Customer_Age', 'Gender', 'Dependent_count', 'Education_Level', 'Marital_Status', 'Income_Category', 'Card_Category', 'Months_on_book', 'Total_Relationship_Count', 'Months_Inactive_12_mon', 'Contacts_Count_12_mon', 'Credit_Limit', 'Total_Revolving_Bal', 'Avg_Open_To_Buy', 'Total_Amt_Chng_Q4_Q1', 'Total_Trans_Amt', 'Total_Trans_Ct', 'Total_Ct_Chng_Q4_Q1', 'Avg_Utilization_Ratio', 'Attrition_Flag']]

# 카테고리형 변수를 숫자로 매핑
data['Gender'] = data['Gender'].map({'M': 0, 'F': 1})
data['Education_Level'] = data['Education_Level'].map({'Unknown': 0, 'Uneducated': 1, 'High School': 2, 'College': 3, 'Graduate': 4, 'Post-Graduate': 5, 'Doctorate': 6})
data['Marital_Status'] = data['Marital_Status'].map({'Unknown': 0, 'Single': 1, 'Married': 2, 'Divorced': 3})
data['Income_Category'] = data['Income_Category'].map({'Unknown': 0, 'Less than $40K': 1, '$40K - $60K': 2, '$60K - $80K': 3, '$80K - $120K': 4, '$120K +': 5})
data['Card_Category'] = data['Card_Category'].map({'Blue': 0, 'Silver': 1, 'Gold': 2, 'Platinum': 3})

# Attrition_Flag 열을 매핑해보세요 'Existing Customer'-> 0, 'Attrited Customer' -> 1
data['Attrition_Flag'] = data['Attrition_Flag'].map({'Existing Customer': 0, 'Attrited Customer': 1})

# 위 모든 feature를 사용해서, Attrition_Flag 예측
X = data.drop('Attrition_Flag', axis=1)
y = data['Attrition_Flag']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=40)
data

Unnamed: 0,Customer_Age,Gender,Dependent_count,Education_Level,Marital_Status,Income_Category,Card_Category,Months_on_book,Total_Relationship_Count,Months_Inactive_12_mon,Contacts_Count_12_mon,Credit_Limit,Total_Revolving_Bal,Avg_Open_To_Buy,Total_Amt_Chng_Q4_Q1,Total_Trans_Amt,Total_Trans_Ct,Total_Ct_Chng_Q4_Q1,Avg_Utilization_Ratio,Attrition_Flag
0,45,0,3,2,2,3,0,39,5,1,3,12691.0,777,11914.0,1.335,1144,42,1.625,0.061,0
1,49,1,5,4,1,1,0,44,6,1,2,8256.0,864,7392.0,1.541,1291,33,3.714,0.105,0
2,51,0,3,4,2,4,0,36,4,1,0,3418.0,0,3418.0,2.594,1887,20,2.333,0.000,0
3,40,1,4,2,0,1,0,34,3,4,1,3313.0,2517,796.0,1.405,1171,20,2.333,0.760,0
4,40,0,3,1,2,3,0,21,5,1,0,4716.0,0,4716.0,2.175,816,28,2.500,0.000,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10122,50,0,2,4,1,2,0,40,3,2,3,4003.0,1851,2152.0,0.703,15476,117,0.857,0.462,0
10123,41,0,2,0,3,2,0,25,4,2,3,4277.0,2186,2091.0,0.804,8764,69,0.683,0.511,1
10124,44,1,1,2,2,1,0,36,5,3,4,5409.0,0,5409.0,0.819,10291,60,0.818,0.000,1
10125,30,0,2,4,0,2,0,36,4,3,3,5281.0,0,5281.0,0.535,8395,62,0.722,0.000,1


In [8]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.metrics import Precision, Recall


# 모델 선택 및 학습
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', Precision(), Recall()])
model.fit(X_train, y_train, epochs=1000, batch_size=256, validation_data=(X_test, y_test))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/1000
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - accuracy: 0.7567 - loss: 43.8606 - precision: 0.2320 - recall: 0.1989 - val_accuracy: 0.8258 - val_loss: 4.2130 - val_precision: 0.4412 - val_recall: 0.4259
Epoch 2/1000
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8046 - loss: 5.0945 - precision: 0.3733 - recall: 0.3250 - val_accuracy: 0.8435 - val_loss: 10.7643 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 3/1000
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8059 - loss: 8.6441 - precision: 0.4276 - recall: 0.2158 - val_accuracy: 0.6194 - val_loss: 6.7305 - val_precision: 0.2472 - val_recall: 0.7003
Epoch 4/1000
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7672 - loss: 8.5601 - precision: 0.2531 - recall: 0.2245 - val_accuracy: 0.6984 - val_loss: 3.7961 - val_precision: 0.3008 - val_recall: 0.7003
Epoch 5/1000


<keras.src.callbacks.history.History at 0x14e00072a70>

In [9]:
# Feature Importance 시각화
feat_importances = pd.Series(model.feature_importances_, index=X.columns)
feat_importances.nlargest(10).plot(kind='barh', figsize=(10, 6))
plt.title('Feature Importance')
plt.xlabel('Importance Score')
plt.ylabel('Features')
plt.show()

# 상관관계 히트맵
plt.figure(figsize=(12, 10))
sns.heatmap(data.corr(), annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Heatmap')
plt.show()

AttributeError: 'Sequential' object has no attribute 'feature_importances_'