In [None]:
# TensorFlow 설치
%pip install tensorflow

# PyTorch 설치 (기본 CPU 버전)
%pip install torch torchvision torchaudio

# PyTorch로 만든 모델을 scikit-learn처럼 편하게 학습/평가/튜닝할 때 사용
%pip install skorch
# TensorFlow 설치
%pip install tensorflow

# PyTorch 설치 (기본 CPU 버전)
# %pip install torch torchvision torchaudio

# NumPy, Pandas, scikit-learn 설치
%pip install numpy pandas scikit-learn

In [None]:
%pip install matplotlib

In [None]:
%pip uninstall torch torchvision torchaudio -y

In [None]:
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu130


In [None]:
import torch
print(torch.version.cuda)          # CUDA 버전 확인
print(torch.cuda.is_available())   # True = GPU 사용 가능
print(torch.cuda.get_device_name(0)) # GPU 이름 출력


In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Using device:", device)

#### 데이터 불러오기

In [None]:
# # 데이터 불러오기 1 : 기존 수치형 데이터
# import pandas as pd
# url = "C:\\Users\\Playdata2\\Downloads\\tree_model_preprocessed.csv"
# df = pd.read_csv(url)
# df.head()

In [1]:
# 데이터 불러오기 2 : 로그변환 수치형 데이터
import pandas as pd
url = "../../DL_data/dataset/re_log_model_preprocessed.csv"
df = pd.read_csv(url)
df.head()

Unnamed: 0,subscription_age,service_failure_count,download_over_limit,churn,contract_type_active,contract_type_expired,contract_type_no_contract,sub_both,sub_movie,sub_none,sub_tv,bill_avg_log,download_avg_log,upload_avg_log
0,11.95,0,0,0,1,0,0,0,0,0,1,3.258097,2.24071,1.193922
1,8.22,0,0,1,0,0,1,0,0,1,0,0.0,0.0,0.0
2,8.91,0,0,1,0,1,0,0,0,0,1,2.833213,2.687847,0.641854
3,6.87,1,0,1,0,0,1,0,0,1,0,3.091042,0.0,0.0
4,6.39,0,0,1,0,0,1,0,0,1,0,0.0,0.0,0.0


In [None]:
# 랜덤서치 3-150
import torch
import torch.nn as nn
import torch.optim as optim
from skorch import NeuralNetClassifier
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import numpy as np
from skorch.callbacks import EarlyStopping,EpochScoring

# GPU 자동 선택
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Using device:", device)

# 데이터 준비
X = df.drop('churn', axis=1).values.astype(np.float32)
y = df['churn'].values.astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# PyTorch 모델 정의 (BCEWithLogitsLoss용)
class ChurnModel(nn.Module):
    def __init__(self, layers=[64,32,16], activation=nn.ReLU):
        super().__init__()
        layer_list = []
        input_dim = X_train.shape[1]
        for l in layers:
            layer_list.append(nn.Linear(input_dim, l))
            layer_list.append(activation())
            input_dim = l
        layer_list.append(nn.Linear(input_dim, 1))  # 마지막 Linear
        self.network = nn.Sequential(*layer_list)
    
    def forward(self, X):
        return self.network(X).squeeze(1)

# skorch 래퍼
net = NeuralNetClassifier(
    module=ChurnModel,
    max_epochs=10,
    lr=0.001,
    batch_size=32,
    optimizer=optim.Adam,
    criterion=nn.BCEWithLogitsLoss,
    iterator_train__shuffle=True,
    device=device,
    callbacks=[EarlyStopping(patience=5), # 개선 없으면 학습 중단
        EpochScoring('accuracy', on_train=True, name='train_acc'),
        EpochScoring('accuracy', on_train=False, name='valid_acc')
    ],
    verbose=1,
)

# 파이프라인 생성
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('net', net)
])

# 랜덤서치할 파라미터
param_dist = {
    'net__module__layers': [[16, 8], [16, 8, 4], [16, 8, 16], [16, 32, 16], [64, 32], [64, 32, 16], [64, 32, 64], [128, 64, 32]],
    'net__module__activation': [nn.ReLU, nn.Tanh],
    'net__optimizer': [optim.Adam, optim.SGD],
    'net__lr': [0.001, 0.01, 0.1],
    'net__optimizer__weight_decay': [0.0, 0.0001, 0.001],
    'net__batch_size': [32, 64, 128],
    'net__max_epochs': [10, 20, 30]
}

random_search = RandomizedSearchCV(
    estimator=pipeline,
    param_distributions=param_dist,
    n_iter=150,
    cv=3,
    n_jobs=-1,
    verbose=1,
    random_state=42
)

# RandomizedSearchCV 학습
random_result = random_search.fit(X_train, y_train)

# 최적 모델로 예측
best_model = random_result.best_estimator_
y_pred = best_model.predict(X_test)

# classification metrics
print("Accuracy: {:.4f}".format(accuracy_score(y_test, y_pred)))
print("Precision: {:.4f}".format(precision_score(y_test, y_pred)))
print("Recall: {:.4f}".format(recall_score(y_test, y_pred)))
print("F1 Score: {:.4f}".format(f1_score(y_test, y_pred)))
print("\nClassification Report:\n", classification_report(y_test, y_pred, digits=4))

# 최적 하이퍼파라미터 확인
print("Best Parameters Found:")
print(random_result.best_params_)


Using device: cuda
Fitting 3 folds for each of 150 candidates, totalling 450 fits
  epoch    train_acc    train_loss    valid_acc    valid_loss     dur
-------  -----------  ------------  -----------  ------------  ------
      1       [36m0.9312[0m        [32m0.2247[0m       [35m0.9350[0m        [31m0.2045[0m  1.6397
      2       0.9339        [32m0.2086[0m       [35m0.9345[0m        [31m0.2027[0m  1.5303
      3       0.9339        [32m0.2055[0m       0.9352        [31m0.1994[0m  1.6467
      4       0.9348        [32m0.2036[0m       0.9363        [31m0.1971[0m  1.7901
      5       0.9349        [32m0.2023[0m       0.9365        [31m0.1950[0m  1.8105
      6       0.9353        [32m0.2014[0m       0.9365        0.1952  1.8300
      7       0.9360        [32m0.2004[0m       0.9377        [31m0.1941[0m  1.8279
      8       0.9362        [32m0.1992[0m       0.9382        0.1949  1.8041
      9       0.9367        [32m0.1988[0m       0.9384        

In [None]:
# 랜덤서치 3-300
import torch
import torch.nn as nn
import torch.optim as optim
from skorch import NeuralNetClassifier
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import numpy as np
from skorch.callbacks import EarlyStopping,EpochScoring

# GPU 자동 선택
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Using device:", device)

# 데이터 준비
X = df.drop('churn', axis=1).values.astype(np.float32)
y = df['churn'].values.astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# PyTorch 모델 정의 (BCEWithLogitsLoss용)
class ChurnModel(nn.Module):
    def __init__(self, layers=[64,32,16], activation=nn.ReLU):
        super().__init__()
        layer_list = []
        input_dim = X_train.shape[1]
        for l in layers:
            layer_list.append(nn.Linear(input_dim, l))
            layer_list.append(activation())
            input_dim = l
        layer_list.append(nn.Linear(input_dim, 1))  # 마지막 Linear
        self.network = nn.Sequential(*layer_list)
    
    def forward(self, X):
        return self.network(X).squeeze(1)

# skorch 래퍼
net = NeuralNetClassifier(
    module=ChurnModel,
    max_epochs=10,
    lr=0.001,
    batch_size=32,
    optimizer=optim.Adam,
    criterion=nn.BCEWithLogitsLoss,
    iterator_train__shuffle=True,
    device=device,
    callbacks=[EarlyStopping(patience=5), # 개선 없으면 학습 중단
        EpochScoring('accuracy', on_train=True, name='train_acc'),
        EpochScoring('accuracy', on_train=False, name='valid_acc')
    ],
    verbose=1,
)

# 파이프라인 생성
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('net', net)
])

# 랜덤서치할 파라미터
param_dist = {
    'net__module__layers': [[16, 8], [16, 8, 4], [16, 8, 16], [16, 32, 16], [64, 32], [64, 32, 16], [64, 32, 64], [128, 64, 32]],
    'net__module__activation': [nn.ReLU, nn.Tanh],
    'net__optimizer': [optim.Adam, optim.SGD],
    'net__lr': [0.001, 0.01, 0.1],
    'net__optimizer__weight_decay': [0.0, 0.0001, 0.001],
    'net__batch_size': [32, 64, 128],
    'net__max_epochs': [10, 20, 30]
}

random_search = RandomizedSearchCV(
    estimator=pipeline,
    param_distributions=param_dist,
    n_iter= 300,
    cv=3,
    n_jobs=-1,
    verbose=1,
    random_state=42
)

# RandomizedSearchCV 학습
random_result = random_search.fit(X_train, y_train)

# 최적 모델로 예측
best_model = random_result.best_estimator_
y_pred = best_model.predict(X_test)

# classification metrics
print("Accuracy: {:.4f}".format(accuracy_score(y_test, y_pred)))
print("Precision: {:.4f}".format(precision_score(y_test, y_pred)))
print("Recall: {:.4f}".format(recall_score(y_test, y_pred)))
print("F1 Score: {:.4f}".format(f1_score(y_test, y_pred)))
print("\nClassification Report:\n", classification_report(y_test, y_pred, digits=4))

# 최적 하이퍼파라미터 확인
print("Best Parameters Found:")
print(random_result.best_params_)


Using device: cuda
Fitting 3 folds for each of 300 candidates, totalling 900 fits
  epoch    train_acc    train_loss    valid_acc    valid_loss     dur
-------  -----------  ------------  -----------  ------------  ------
      1       [36m0.9212[0m        [32m0.2531[0m       [35m0.9352[0m        [31m0.2024[0m  0.9982
      2       0.9341        [32m0.2063[0m       0.9365        [31m0.1998[0m  1.0180
      3       0.9348        [32m0.2038[0m       0.9371        [31m0.1983[0m  1.0128
      4       0.9351        [32m0.2025[0m       0.9368        0.1983  1.0089
      5       0.9355        [32m0.2014[0m       0.9371        [31m0.1967[0m  1.0188
      6       0.9358        [32m0.2010[0m       0.9371        0.1969  1.0305
      7       0.9358        [32m0.2008[0m       0.9373        [31m0.1949[0m  1.0179
      8       0.9356        [32m0.2003[0m       0.9366        0.1955  1.0047
      9       0.9362        [32m0.1997[0m       0.9365        0.1968  1.0390
    

In [None]:
# 랜덤서치 2-600
import torch
import torch.nn as nn
import torch.optim as optim
from skorch import NeuralNetClassifier
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import numpy as np
from skorch.callbacks import EarlyStopping,EpochScoring

# GPU 자동 선택
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Using device:", device)

# 데이터 준비
X = df.drop('churn', axis=1).values.astype(np.float32)
y = df['churn'].values.astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# PyTorch 모델 정의 (BCEWithLogitsLoss용)
class ChurnModel(nn.Module):
    def __init__(self, layers=[64,32,16], activation=nn.ReLU):
        super().__init__()
        layer_list = []
        input_dim = X_train.shape[1]
        for l in layers:
            layer_list.append(nn.Linear(input_dim, l))
            layer_list.append(activation())
            input_dim = l
        layer_list.append(nn.Linear(input_dim, 1))  # 마지막 Linear
        self.network = nn.Sequential(*layer_list)
    
    def forward(self, X):
        return self.network(X).squeeze(1)

# skorch 래퍼
net = NeuralNetClassifier(
    module=ChurnModel,
    max_epochs=10,
    lr=0.001,
    batch_size=32,
    optimizer=optim.Adam,
    criterion=nn.BCEWithLogitsLoss,
    iterator_train__shuffle=True,
    device=device,
    callbacks=[EarlyStopping(patience=5), # 개선 없으면 학습 중단
        EpochScoring('accuracy', on_train=True, name='train_acc'),
        EpochScoring('accuracy', on_train=False, name='valid_acc')
    ],
    verbose=1,
)

# 파이프라인 생성
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('net', net)
])

# 랜덤서치할 파라미터
param_dist = {
    'net__module__layers': [[16, 8], [16, 8, 4], [16, 8, 16], [16, 32, 16], [64, 32], [64, 32, 16], [64, 32, 64], [128, 64, 32]],
    'net__module__activation': [nn.ReLU, nn.Tanh],
    'net__optimizer': [optim.Adam, optim.SGD],
    'net__lr': [0.001, 0.01, 0.1],
    'net__optimizer__weight_decay': [0.0, 0.0001, 0.001],
    'net__batch_size': [256, 64, 128],
    'net__max_epochs': [10, 20, 30]
}

random_search = RandomizedSearchCV(
    estimator=pipeline,
    param_distributions=param_dist,
    n_iter= 600,
    cv=2,
    n_jobs=-1,
    verbose=1,
    random_state=42
)

# RandomizedSearchCV 학습
random_result = random_search.fit(X_train, y_train)

# 최적 모델로 예측
best_model = random_result.best_estimator_
y_pred = best_model.predict(X_test)

# classification metrics
print("Accuracy: {:.4f}".format(accuracy_score(y_test, y_pred)))
print("Precision: {:.4f}".format(precision_score(y_test, y_pred)))
print("Recall: {:.4f}".format(recall_score(y_test, y_pred)))
print("F1 Score: {:.4f}".format(f1_score(y_test, y_pred)))
print("\nClassification Report:\n", classification_report(y_test, y_pred, digits=4))

# 최적 하이퍼파라미터 확인
print("Best Parameters Found:")
print(random_result.best_params_)


Using device: cuda
Fitting 2 folds for each of 600 candidates, totalling 1200 fits
  epoch    train_acc    train_loss    valid_acc    valid_loss     dur
-------  -----------  ------------  -----------  ------------  ------
      1       [36m0.9275[0m        [32m0.2238[0m       [35m0.9356[0m        [31m0.2006[0m  1.5231
      2       0.9341        [32m0.2059[0m       [35m0.9350[0m        [31m0.1998[0m  1.4516
      3       0.9344        [32m0.2054[0m       0.9362        [31m0.1970[0m  1.3742
      4       0.9353        [32m0.2037[0m       0.9361        [31m0.1948[0m  1.3873
      5       0.9356        [32m0.2022[0m       0.9369        0.1955  1.3822
      6       0.9356        [32m0.2015[0m       0.9371        0.1964  1.4182
      7       0.9365        [32m0.2001[0m       0.9359        0.1970  1.3885
      8       0.9356        0.2003       0.9365        0.1962  1.3784
Stopping since valid_loss has not improved in the last 5 epochs.
Accuracy: 0.9350
Precision

In [None]:

# 랜덤서치 2 150
import torch
import torch.nn as nn
import torch.optim as optim
from skorch import NeuralNetClassifier
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import numpy as np
from skorch.callbacks import EarlyStopping,EpochScoring

# GPU 자동 선택
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Using device:", device)

# 데이터 준비
X = df.drop('churn', axis=1).values.astype(np.float32)
y = df['churn'].values.astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# PyTorch 모델 정의 (BCEWithLogitsLoss용)
class ChurnModel(nn.Module):
    def __init__(self, layers=[64,32,16], activation=nn.ReLU):
        super().__init__()
        layer_list = []
        input_dim = X_train.shape[1]
        for l in layers:
            layer_list.append(nn.Linear(input_dim, l))
            layer_list.append(activation())
            input_dim = l
        layer_list.append(nn.Linear(input_dim, 1))  # 마지막 Linear
        self.network = nn.Sequential(*layer_list)
    
    def forward(self, X):
        return self.network(X).squeeze(1)

# skorch net
net = NeuralNetClassifier(
    module=ChurnModel,
    max_epochs=20,         # 최대 epoch 줄임
    lr=0.001,
    batch_size=64,         # 배치 크기 늘림
    optimizer=optim.Adam,
    criterion=nn.BCEWithLogitsLoss,
    iterator_train__shuffle=True,
    device=device,
    callbacks=[EarlyStopping(patience=5), # 개선 없으면 학습 중단
        EpochScoring('accuracy', on_train=True, name='train_acc'),
        EpochScoring('accuracy', on_train=False, name='valid_acc')
    ],
    verbose=1,
)

# 파이프라인 생성
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('net', net)
])

# 랜덤서치할 파라미터
param_dist = {
    'net__module__layers': [[16, 8], [16, 8, 4], [16, 8, 16], [16, 32, 16], [64, 32], [64, 32, 16], [64, 32, 64], [128, 64, 32]],
    'net__module__activation': [nn.ReLU, nn.Tanh],
    'net__optimizer': [optim.Adam, optim.SGD],
    'net__lr': [0.001, 0.01, 0.1],
    'net__optimizer__weight_decay': [0.0, 0.0001, 0.001],
    'net__batch_size': [256, 64, 128],
    'net__max_epochs': [10, 20, 30]
}

# RandomizedSearchCV
random_search = RandomizedSearchCV(
    estimator=pipeline,
    param_distributions=param_dist,
    n_iter=150,           # 500 -> 150으로 줄임
    cv=2,                 # 3-fold -> 2-fold
    n_jobs=-1,
    verbose=1,
    random_state=42
)

# RandomizedSearchCV 학습
random_result = random_search.fit(X_train, y_train)

# 최적 모델로 예측
best_model = random_result.best_estimator_
y_pred = best_model.predict(X_test)

# classification metrics
print("Accuracy: {:.4f}".format(accuracy_score(y_test, y_pred)))
print("Precision: {:.4f}".format(precision_score(y_test, y_pred)))
print("Recall: {:.4f}".format(recall_score(y_test, y_pred)))
print("F1 Score: {:.4f}".format(f1_score(y_test, y_pred)))
print("\nClassification Report:\n", classification_report(y_test, y_pred, digits=4))

# 최적 하이퍼파라미터 확인
print("Best Parameters Found:")
print(random_result.best_params_)

Using device: cuda
Fitting 2 folds for each of 150 candidates, totalling 300 fits


286 fits failed out of a total of 300.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
2 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\USER\Desktop\프로젝트2\.venv\Lib\site-packages\sklearn\model_selection\_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\USER\Desktop\프로젝트2\.venv\Lib\site-packages\sklearn\base.py", line 1365, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Users\USER\Desktop\프로젝트2\.venv\Lib\site-packages\sklearn\pipeline.py", line 663, in fit
    self._final_estimator.fit(Xt, y, **last_step_params["fit"])
    ~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^

  epoch    train_acc    train_loss    valid_acc    valid_loss     dur
-------  -----------  ------------  -----------  ------------  ------
      1       [36m0.9307[0m        [32m0.2192[0m       [35m0.9356[0m        [31m0.2000[0m  0.9607
      2       0.9338        [32m0.2083[0m       0.9363        [31m0.1990[0m  0.8904
      3       0.9339        [32m0.2067[0m       0.9359        0.2000  0.9135
      4       0.9344        [32m0.2058[0m       [35m0.9352[0m        0.2013  0.9010
      5       0.9344        0.2063       [35m0.9346[0m        0.1998  0.9279
      6       0.9341        [32m0.2048[0m       0.9371        [31m0.1980[0m  0.9048
      7       0.9343        0.2055       0.9350        0.2040  0.9126
      8       0.9346        [32m0.2042[0m       0.9370        0.1995  0.9302
      9       0.9345        0.2042       0.9368        [31m0.1955[0m  0.8867
     10       0.9348        [32m0.2036[0m       0.9366        0.1990  0.9257
Accuracy: 0.9337
Precisio

In [None]:
# 시각화
import matplotlib.pyplot as plt
# best_model 가져오기
best_model = random_result.best_estimator_

# skorch history 접근
history = best_model.named_steps['net'].history

# epoch별 train/valid loss, accuracy
train_loss = [h['train_loss'] for h in history]
valid_loss = [h['valid_loss'] for h in history]
train_acc = [h['train_acc'] for h in history if 'train_acc' in h]
valid_acc = [h['valid_acc'] for h in history if 'valid_acc' in h]
epochs = range(1, len(train_loss)+1)

# 손실값 시각화
plt.figure(figsize=(10,5))
plt.plot(epochs, train_loss, label='Train Loss')
plt.plot(epochs, valid_loss, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training & Validation Loss')
plt.legend()
plt.show()

# 정확도 시각화
if train_acc and valid_acc:
    plt.figure(figsize=(10,5))
    plt.plot(epochs, train_acc, label='Train Accuracy')
    plt.plot(epochs, valid_acc, label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title('Training & Validation Accuracy')
    plt.legend()
    plt.show()