# Santander 은행 데이터를 이용해 고객 만족 여부를 예측하는 모델 구축 목적 프로젝트

## 0. 라이브러리 불러오기

In [86]:
# 1. 데이터 핸들링 및 연산
import pandas as pd
import numpy as np

# 2. 데이터 시각화
import matplotlib.pyplot as plt
import seaborn as sns

# 경고 메시지 비활성화 (선택 사항)
import warnings
warnings.filterwarnings('ignore')

# 3. 데이터 전처리 및 피처 엔지니어링
from sklearn.preprocessing import StandardScaler

# 4. 모델 훈련을 위한 데이터 분할
from sklearn.model_selection import train_test_split

# 5. 머신러닝 모델
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

# 6. 모델 성능 평가
from sklearn.metrics import accuracy_score, roc_auc_score

# 7. 하이퍼파라미터 튜닝
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from scipy.stats import uniform

## 1. 데이터 로드

In [30]:
test_df= pd.read_csv('./data/test.csv')
train_df= pd.read_csv('./data/train.csv')

In [31]:
train_df

Unnamed: 0,ID,var3,var15,imp_ent_var16_ult1,imp_op_var39_comer_ult1,imp_op_var39_comer_ult3,imp_op_var40_comer_ult1,imp_op_var40_comer_ult3,imp_op_var40_efect_ult1,imp_op_var40_efect_ult3,...,saldo_medio_var33_hace2,saldo_medio_var33_hace3,saldo_medio_var33_ult1,saldo_medio_var33_ult3,saldo_medio_var44_hace2,saldo_medio_var44_hace3,saldo_medio_var44_ult1,saldo_medio_var44_ult3,var38,TARGET
0,1,2,23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,39205.170000,0
1,3,2,34,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,49278.030000,0
2,4,2,23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,67333.770000,0
3,8,2,37,0.0,195.0,195.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,64007.970000,0
4,10,2,39,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,117310.979016,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76015,151829,2,48,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60926.490000,0
76016,151830,2,39,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,118634.520000,0
76017,151835,2,23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,74028.150000,0
76018,151836,2,25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,84278.160000,0


In [32]:
test_df

Unnamed: 0,ID,var3,var15,imp_ent_var16_ult1,imp_op_var39_comer_ult1,imp_op_var39_comer_ult3,imp_op_var40_comer_ult1,imp_op_var40_comer_ult3,imp_op_var40_efect_ult1,imp_op_var40_efect_ult3,...,saldo_medio_var29_ult3,saldo_medio_var33_hace2,saldo_medio_var33_hace3,saldo_medio_var33_ult1,saldo_medio_var33_ult3,saldo_medio_var44_hace2,saldo_medio_var44_hace3,saldo_medio_var44_ult1,saldo_medio_var44_ult3,var38
0,2,2,32,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40532.100000
1,5,2,35,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,45486.720000
2,6,2,23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,46993.950000
3,7,2,24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,187898.610000
4,9,2,23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,73649.730000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75813,151831,2,23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40243.200000
75814,151832,2,26,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,146961.300000
75815,151833,2,24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,167299.770000
75816,151834,2,40,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,117310.979016


# 2. EDA

In [33]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 75818 entries, 0 to 75817
Columns: 370 entries, ID to var38
dtypes: float64(110), int64(260)
memory usage: 214.0 MB


In [34]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 76020 entries, 0 to 76019
Columns: 371 entries, ID to TARGET
dtypes: float64(111), int64(260)
memory usage: 215.2 MB


In [35]:
train_df.describe()

Unnamed: 0,ID,var3,var15,imp_ent_var16_ult1,imp_op_var39_comer_ult1,imp_op_var39_comer_ult3,imp_op_var40_comer_ult1,imp_op_var40_comer_ult3,imp_op_var40_efect_ult1,imp_op_var40_efect_ult3,...,saldo_medio_var33_hace2,saldo_medio_var33_hace3,saldo_medio_var33_ult1,saldo_medio_var33_ult3,saldo_medio_var44_hace2,saldo_medio_var44_hace3,saldo_medio_var44_ult1,saldo_medio_var44_ult3,var38,TARGET
count,76020.0,76020.0,76020.0,76020.0,76020.0,76020.0,76020.0,76020.0,76020.0,76020.0,...,76020.0,76020.0,76020.0,76020.0,76020.0,76020.0,76020.0,76020.0,76020.0,76020.0
mean,75964.050723,-1523.199277,33.212865,86.208265,72.363067,119.529632,3.55913,6.472698,0.412946,0.567352,...,7.935824,1.365146,12.21558,8.784074,31.505324,1.858575,76.026165,56.614351,117235.8,0.039569
std,43781.947379,39033.462364,12.956486,1614.757313,339.315831,546.266294,93.155749,153.737066,30.604864,36.513513,...,455.887218,113.959637,783.207399,538.439211,2013.125393,147.786584,4040.337842,2852.579397,182664.6,0.194945
min,1.0,-999999.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5163.75,0.0
25%,38104.75,2.0,23.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,67870.61,0.0
50%,76043.0,2.0,28.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,106409.2,0.0
75%,113748.75,2.0,40.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,118756.3,0.0
max,151838.0,238.0,105.0,210000.0,12888.03,21024.81,8237.82,11073.57,6600.0,6600.0,...,50003.88,20385.72,138831.63,91778.73,438329.22,24650.01,681462.9,397884.3,22034740.0,1.0


In [36]:
test_df.describe()

Unnamed: 0,ID,var3,var15,imp_ent_var16_ult1,imp_op_var39_comer_ult1,imp_op_var39_comer_ult3,imp_op_var40_comer_ult1,imp_op_var40_comer_ult3,imp_op_var40_efect_ult1,imp_op_var40_efect_ult3,...,saldo_medio_var29_ult3,saldo_medio_var33_hace2,saldo_medio_var33_hace3,saldo_medio_var33_ult1,saldo_medio_var33_ult3,saldo_medio_var44_hace2,saldo_medio_var44_hace3,saldo_medio_var44_ult1,saldo_medio_var44_ult3,var38
count,75818.0,75818.0,75818.0,75818.0,75818.0,75818.0,75818.0,75818.0,75818.0,75818.0,...,75818.0,75818.0,75818.0,75818.0,75818.0,75818.0,75818.0,75818.0,75818.0,75818.0
mean,75874.830581,-1579.955011,33.138832,83.164329,74.312894,123.136448,4.578517,7.666855,0.470645,0.672444,...,0.160595,12.438559,1.327408,17.469991,12.674349,63.597839,11.404505,95.973025,70.504319,117386.3
std,43882.370827,39752.473358,12.932,1694.873886,364.211245,606.431562,133.383326,239.701416,34.028605,42.336668,...,32.441677,958.651673,170.449935,1252.618781,895.165516,3754.668953,1061.858193,4658.871575,3318.527783,247938.4
min,2.0,-999999.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,-0.6,-0.6,0.0,0.0,0.0,0.0,1202.73
25%,37840.25,2.0,23.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,67549.61
50%,75810.0,2.0,27.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,105764.3
75%,113996.5,2.0,39.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,118315.9
max,151837.0,238.0,105.0,240000.0,21093.96,47943.96,21093.96,47943.96,6600.0,6600.0,...,8030.16,146605.32,40080.6,162355.8,138054.96,453893.4,217762.23,496198.08,354260.73,28894400.0


In [40]:
train_df.isnull().sum()

ID                         0
var3                       0
var15                      0
imp_ent_var16_ult1         0
imp_op_var39_comer_ult1    0
                          ..
saldo_medio_var44_hace3    0
saldo_medio_var44_ult1     0
saldo_medio_var44_ult3     0
var38                      0
TARGET                     0
Length: 371, dtype: int64

In [41]:
test_df.isnull().sum()

ID                         0
var3                       0
var15                      0
imp_ent_var16_ult1         0
imp_op_var39_comer_ult1    0
                          ..
saldo_medio_var44_hace2    0
saldo_medio_var44_hace3    0
saldo_medio_var44_ult1     0
saldo_medio_var44_ult3     0
var38                      0
Length: 370, dtype: int64

In [42]:
print(train_df['var3'].value_counts())

var3
 2         74165
 8           138
-999999      116
 9           110
 3           108
           ...  
 63            1
 194           1
 40            1
 57            1
 87            1
Name: count, Length: 208, dtype: int64


In [43]:
print(test_df['var3'].value_counts())

var3
 2         73962
-999999      120
 8           116
 9           108
 3           107
           ...  
 127           1
 51            1
 194           1
 87            1
 225           1
Name: count, Length: 199, dtype: int64


In [39]:
train_df['TARGET'].value_counts() # 고객만족 0, 고객불만족 1

TARGET
0    73012
1     3008
Name: count, dtype: int64

## 3. 데이터 전처리

In [47]:
# var3의 이상치를 결측치로 변환 
train_df['var3'].replace(-999999, np.nan, inplace=True)
test_df['var3'].replace(-999999, np.nan, inplace=True)

In [56]:
# var3의 결측치를 최빈값으로 대체 
mode_var3 = train_df['var3'].mode()[0]

In [58]:
# var3의 결측치를 최빈값으로 대체
train_df['var3'].fillna(mode_var3, inplace=True)
test_df['var3'].fillna(mode_var3, inplace=True)

In [None]:
# Train_df에 중복 데이터 처리
df_T = train_df.T
duplicated_cols = df_T[df_T.duplicated()].index.tolist()
train_df.drop(labels=duplicated_cols, axis=1, inplace=True)

In [None]:
# Test_df에 중복 데이터 처리
df_T = test_df.T
duplicated_cols = df_T[df_T.duplicated()].index.tolist()
test_df.drop(labels=duplicated_cols, axis=1, inplace=True)

In [66]:
X = train_df.drop(['ID', 'TARGET'], axis=1)

y = train_df['TARGET']

In [67]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [72]:
# 스케일링
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)

X_val_scaled = scaler.transform(X_val)

X_train_scaled = pd.DataFrame(X_train_scaled, columns=X.columns)
X_val_scaled = pd.DataFrame(X_val_scaled, columns=X.columns)

In [71]:
# 스케일링 결과 확인
print("--- 스케일링 완료된 훈련 데이터 (상위 5개) ---")
print(X_train_scaled.head())

print("\n--- 스케일링 완료된 검증 데이터 (상위 5개) ---")
print(X_val_scaled.head())

print("\n--- 데이터 형태 확인 ---")
print("훈련 데이터 형태:", X_train_scaled.shape)
print("검증 데이터 형태:", X_val_scaled.shape)

--- 스케일링 완료된 훈련 데이터 (상위 5개) ---
       var3     var15  imp_ent_var16_ult1  imp_op_var39_comer_ult1  \
0 -0.075029 -0.866496           -0.059556                -0.213537   
1 -0.075029  3.535981            0.017418                -0.213537   
2 -0.075029 -0.325841           -0.059556                -0.213537   
3 -0.075029 -0.325841           -0.059556                -0.213537   
4 -0.075029 -0.634786           -0.059556                -0.213537   

   imp_op_var39_comer_ult3  imp_op_var40_comer_ult1  imp_op_var40_comer_ult3  \
0                -0.218957                -0.036688                -0.040435   
1                -0.218957                -0.036688                -0.040435   
2                -0.218957                -0.036688                -0.040435   
3                -0.218957                -0.036688                -0.040435   
4                -0.218957                -0.036688                -0.040435   

   imp_op_var40_efect_ult1  imp_op_var40_efect_ult3  imp_op_var40_

## 4. 모델 평가

acc : 정확도<br>
roc_acu :  불만족 고객을 판별하는 모델의 종합 성능 지표

### 모델 : LogisticRegression

In [76]:
LR_m = LogisticRegression(random_state=11)
LR_m.fit(X_train_scaled, y_train)
pred = LR_m.predict(X_val_scaled)
pred_proba = LR_m.predict_proba(X_val_scaled)[:, 1] # AUC 계산을 위해 확률 예측
acc =accuracy_score(y_val, pred)
roc_auc = roc_auc_score(y_val, pred_proba)
print(f'정확도 :{acc}')
print(f'판별성능 : {roc_auc}')

정확도 :0.9598132070507761
판별성능 : 0.8031254308675687


LogisticRegression 모델 결과<br>
해당 모델은 정확도는 약96% 이지만,<br>
만족/불만족 클래스를 분류하는 성능이 AUC 0.803으로 측정되었습니다.<br>
<h3>총평 : 정확도는 약96%에 달하지만,<br>
핵심성능지표인 AUC는 0.803으로, 양호한 수준의 분류 모델능력을 갖춘 것으로 판단 됩니다.<h3>

### 모델 : DecisionTreeClassifier

In [80]:
dt_clf2 = DecisionTreeClassifier(random_state=121)
params = {'max_depth':[1,2,3], 'min_samples_split':[2,3]}
grid_dtree = GridSearchCV(dt_clf2, param_grid=params, cv=3, refit=True)
gd = grid_dtree.fit(X_train_scaled,y_train)
pred_dt = gd.predict(X_val_scaled)
pred_proba_dt = gd.predict_proba(X_val_scaled)[:, 1]
dt_acc = accuracy_score(y_val, pred_dt)
dt_roc_auc = roc_auc_score(y_val, pred_proba_dt)
print(f'정확도 :{dt_acc}')
print(f'판별성능 : {dt_roc_auc}')

정확도 :0.9604051565377533
판별성능 : 0.6858729132358422


DecisionTreeClassifier 모델 결과<br>
해당 모델은 정확도는 96% 이지만,<br>
만족/불만족 클래스를 분류하는 성능이 AUC 0.685으로 측정되었습니다.<br>
<h3>총평 : 정확도는 96%에 달하지만,<br>
핵심성능지표인 AUC는 0.685으로, 평범한 수준의 분류 모델능력을 갖춘 것으로 판단 됩니다.<h3>

### 모델 : RandomForestClassifier

In [81]:
rf_clf = RandomForestClassifier(random_state=0, max_depth=8)
rf_clf.fit(X_train_scaled, y_train)
pred_rf = rf_clf.predict(X_val_scaled)
pred_proba_rf = rf_clf.predict_proba(X_val_scaled)[:, 1]
rf_acc = accuracy_score(y_val, pred_rf)
rf_rou_auc = roc_auc_score(y_val, pred_proba_rf)
print(f'정확도 :{rf_acc}')
print(f'판별성능 : {rf_rou_auc}')

정확도 :0.9604051565377533
판별성능 : 0.8140497296825038


RandomForestClassifier 모델 결과<br>
해당 모델은 정확도는 96% 이지만,<br>
만족/불만족 클래스를 분류하는 성능이 AUC 0.814으로 측정되었습니다.<br>
<h3>총평 : 정확도는 96%에 달하지만,<br>
핵심성능지표인 AUC는 0.814으로, 양호한 수준의 분류 모델능력을 갖춘 것으로 판단 됩니다.<h3>

### 모델 : XGBoost

In [84]:
xgb_clf = XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=11, eval_metric='logloss')

xgb_clf.fit(X_train_scaled, y_train)
pred_xgb = xgb_clf.predict(X_val_scaled)
pred_proba_xgb = xgb_clf.predict_proba(X_val_scaled)[:,1]
xgb_acc = accuracy_score(y_val, pred_xgb)
xgb_roc_auc = roc_auc_score(y_val, pred_proba_xgb)
print(f'정확도 :{xgb_acc}')
print(f'판별성능 : {xgb_roc_auc}')

정확도 :0.9604051565377533
판별성능 : 0.8465663239141227


XGBClassifier 모델 결과<br>
해당 모델은 정확도는 96% 이지만,<br>
만족/불만족 클래스를 분류하는 성능이 AUC 0.846으로 측정되었습니다.<br>
<h3>총평 : 정확도는 96%에 달하지만,<br>
핵심성능지표인 AUC는 0.846으로, 우수한 수준의 분류 모델능력을 갖춘 것으로 판단 됩니다.<h3>

## 5. 최적의 파라미터 탐색

In [87]:
# 1. 모델별 파라미터 탐색 범위 설정
param_dists = {
    'LogisticRegression': {
        'C': uniform(loc=0, scale=4),  # 0~4 사이에서 실수 균등분포로 추출
        'penalty': ['l2'] # l1은 solver 지원 문제로 l2만 사용
    },
    'DecisionTree': {
        'max_depth': range(2, 11), # 2부터 10까지
        'min_samples_split': range(2, 11),
        'min_samples_leaf': range(1, 11)
    },
    'RandomForest': {
        'n_estimators': range(50, 201, 50), # 50, 100, 150, 200
        'max_depth': range(5, 16), # 5부터 15까지
        'min_samples_split': range(2, 11),
        'min_samples_leaf': range(1, 11)
    },
    'XGBoost': {
        'n_estimators': range(50, 201, 50),
        'learning_rate': [0.01, 0.05, 0.1, 0.2],
        'max_depth': range(3, 8), # 3부터 7까지
        'subsample': [0.6, 0.7, 0.8, 0.9, 1.0],
        'colsample_bytree': [0.6, 0.7, 0.8, 0.9, 1.0]
    }
}

# 2. 모델 딕셔너리 생성
models = {
    'LogisticRegression': LogisticRegression(random_state=11, solver='liblinear'),
    'DecisionTree': DecisionTreeClassifier(random_state=11),
    'RandomForest': RandomForestClassifier(random_state=11),
    'XGBoost': XGBClassifier(random_state=11, eval_metric='logloss')
}

# 3. RandomizedSearchCV 실행
for model_name, model in models.items():
    print(f"--- {model_name} Hyperparameter Tuning Start ---")
    
    # RandomizedSearchCV 설정
    # n_iter: 지정된 범위에서 20개의 파라미터 조합을 무작위로 테스트
    # scoring='roc_auc': AUC 점수를 기준으로 최적의 파라미터를 찾음
    # n_jobs=-1: 모든 CPU 코어를 사용하여 속도 향상
    random_search = RandomizedSearchCV(model, 
                                       param_distributions=param_dists[model_name],
                                       n_iter=20, 
                                       cv=3, 
                                       scoring='roc_auc', 
                                       random_state=11,
                                       n_jobs=-1)
    
    # 훈련 데이터로 탐색 시작
    random_search.fit(X_train_scaled, y_train)
    
    # 결과 출력
    print(f"Best Parameters: {random_search.best_params_}")
    print(f"Best CV Score (AUC): {random_search.best_score_:.4f}\n")

--- LogisticRegression Hyperparameter Tuning Start ---
Best Parameters: {'C': np.float64(0.7210787555070768), 'penalty': 'l2'}
Best CV Score (AUC): 0.7911

--- DecisionTree Hyperparameter Tuning Start ---
Best Parameters: {'min_samples_split': 9, 'min_samples_leaf': 8, 'max_depth': 6}
Best CV Score (AUC): 0.8092

--- RandomForest Hyperparameter Tuning Start ---
Best Parameters: {'n_estimators': 100, 'min_samples_split': 7, 'min_samples_leaf': 7, 'max_depth': 15}
Best CV Score (AUC): 0.8156

--- XGBoost Hyperparameter Tuning Start ---
Best Parameters: {'subsample': 0.7, 'n_estimators': 50, 'max_depth': 4, 'learning_rate': 0.1, 'colsample_bytree': 0.7}
Best CV Score (AUC): 0.8345



## 6. 결론
Stantander 은행 데이터를 이용해 고객 만족 여부를 예측하는 모델 구축을 목적으로 본 프로젝트를 진행했습니다<br>
로지스틱 회귀, 의사결정나무, 랜덤 포레스트, XGBoost 모델의 성능을 비교하고,<br>
 RandomizedSearchCV를 통해 모델별 최적의 하이퍼파라미터를 탐색했습니다.<br>
클래스 불균형을 고려해 AUC를 핵심 평가지표로 삼았으며,<br>
튜닝 결과 XGBoost 모델이 CrossValidation에서 약 0.835dml AUC로 가장 높은 판별 성능을 보여 최종 모델로 선정되었습니다<Br>
분석된 최적의 파라미터는 subsample : 0.7, n_estimators : 50, max_depth : 4, learning_rate : 0.1, colsample_bytree : 0.7로 확인되었습니다.

### 최종적으로 XGBoost 모델이 고객 만족여부 예측에 가장 적합한 모델임을 확인하였습니다 