In [1]:
import pandas as pd
df = pd.read_csv(r"../../DL_data/dataset/re_log_model_preprocessed.csv")

In [4]:
# baseline model 

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score
import pandas as pd

# 1. 데이터 분리
X = df.drop('churn', axis=1)
y = df['churn']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. 스케일링 : 연속형 컬럼만 정규화
scaler = StandardScaler()
num_cols = ['bill_avg_log', 'download_avg_log', 'upload_avg_log', 'service_failure_count']

X_train[num_cols] = scaler.fit_transform(X_train[num_cols])
X_test[num_cols] = scaler.transform(X_test[num_cols])


# 3. MLP 모델 정의
mlp = MLPClassifier(hidden_layer_sizes=(64, 32),  # 두 개의 히든 레이어, 첫 번째 레이어 64유닛, 두 번째 레이어 32유닛
                    activation='relu',  # ReLU 활성화 함수 사용
                    solver='adam',      # Adam 옵티마이저 사용
                    max_iter=500,       # 최대 500번 반복해서 학습
                    random_state=42) 

# 4. 학습
mlp.fit(X_train, y_train)

# 5. 예측
y_pred = mlp.predict(X_test)

print(f"\nbaseline_MLPClassifier_result")
print(classification_report(y_test, y_pred))


baseline_MLPClassifier_result
              precision    recall  f1-score   support

           0       0.91      0.95      0.93      6327
           1       0.96      0.92      0.94      8052

    accuracy                           0.93     14379
   macro avg       0.93      0.94      0.93     14379
weighted avg       0.94      0.93      0.93     14379



In [5]:
# 파라미터 튜닝 : GridSearch
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
import pandas as pd

pipe = Pipeline([
    ('scaler', StandardScaler()),  # placeholder (GridSearch에서 변경됨)
    ('mlp', MLPClassifier(
        solver='adam',
        max_iter=500,
        early_stopping=True,
        random_state=42,
    ))
])


In [6]:

# GridSearch 스케일러 포함
param_grid = {
    'mlp__hidden_layer_sizes': [(128, 64, 32), (64, 32), (64, 32, 16)],
    'mlp__activation': ['relu', 'tanh'],
    'mlp__learning_rate_init': [0.001, 0.01, 0.1]
}


In [7]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

grid = GridSearchCV(
    estimator=pipe,
    param_grid=param_grid,
    scoring='f1',
    cv=cv,
    n_jobs=-1,
    verbose=1
)


In [8]:
# 학습
grid.fit(X_train, y_train)

print("[Best params]", grid.best_params_)
print("[CV Best F1]:", round(grid.best_score_, 4))

# 테스트
best_model = grid.best_estimator_
y_pred  = best_model.predict(X_test)
y_prob  = best_model.predict_proba(X_test)[:, 1]

print(f"\ntuning_MLPClassifier_result")
print(classification_report(y_test, y_pred))

Fitting 5 folds for each of 18 candidates, totalling 90 fits
[Best params] {'mlp__activation': 'tanh', 'mlp__hidden_layer_sizes': (128, 64, 32), 'mlp__learning_rate_init': 0.001}
[CV Best F1]: 0.9426

tuning_MLPClassifier_result
              precision    recall  f1-score   support

           0       0.91      0.94      0.93      6327
           1       0.95      0.93      0.94      8052

    accuracy                           0.94     14379
   macro avg       0.93      0.94      0.93     14379
weighted avg       0.94      0.94      0.94     14379



In [9]:
from sklearn.metrics import roc_auc_score
from sklearn.metrics import average_precision_score

pr_auc = average_precision_score(y_test, y_prob)

roc_auc = roc_auc_score(y_test, y_prob)
print(f"ROC-AUC : {roc_auc:.4f} PR-AUC  : {pr_auc:.4f}")

ROC-AUC : 0.9696 PR-AUC  : 0.9804
