In [None]:
import pandas as pd
from google.colab import files

# 파일 업로드
uploaded = files.upload()  # 파일 업로드 창이 뜹니다

# 업로드된 파일의 이름을 가져옵니다
file_path = list(uploaded.keys())[0]

# 데이터 로드 (엑셀 파일)
df = pd.read_excel(file_path)

Saving processed_data.xlsx to processed_data.xlsx


In [None]:
# 단계 1: 데이터 준비 및 전처리
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# 1.2 결측값 처리
df = df.fillna(0)  # 간단히 결측값을 0으로 대체

# 1.3 특성과 타깃 변수 분리
features = df.drop(columns=['Admission1'])  # 타깃 열 제외
target = df['Admission1']

# 1.4 데이터 스케일링 (LSTM 입력 안정화)
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

# 1.5 시계열 데이터 생성
def create_sequences(data, target, sequence_length):
    sequences = []
    labels = []
    for i in range(len(data) - sequence_length):
        seq = data[i:i + sequence_length]
        label = target[i + sequence_length]
        sequences.append(seq)
        labels.append(label)
    return np.array(sequences), np.array(labels)

sequence_length = 10  # 시계열 길이
X, y = create_sequences(scaled_features, target.values, sequence_length)

# 1.6 학습 및 테스트 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 데이터를 텐서로 변환
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

In [None]:
# 단계 2: LSTM 모델 정의
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        _, (hidden, _) = self.lstm(x)  # LSTM 출력 중 hidden 상태만 사용
        out = self.fc(hidden[-1])  # 마지막 LSTM 레이어의 출력 사용
        return self.sigmoid(out)

# 모델 파라미터 설정
input_size = X_train.shape[2]  # 특성 수
hidden_size = 64  # LSTM의 은닉 상태 크기
num_layers = 2  # LSTM 레이어 수
output_size = 1  # 이진 분류

model = LSTMModel(input_size, hidden_size, num_layers, output_size)


In [None]:
# 단계 3: 모델 학습 및 평가
# 손실 함수와 최적화 알고리즘
criterion = nn.BCELoss()  # Binary Cross Entropy Loss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 학습 루프
epochs = 20
batch_size = 32

for epoch in range(epochs):
    model.train()
    permutation = torch.randperm(X_train.size(0))
    epoch_loss = 0

    for i in range(0, X_train.size(0), batch_size):
        indices = permutation[i:i + batch_size]
        batch_X, batch_y = X_train[indices], y_train[indices]

        # Forward pass
        outputs = model(batch_X)
        loss = criterion(outputs.squeeze(), batch_y)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss / len(X_train)}")

# 평가
model.eval()
with torch.no_grad():
    test_outputs = model(X_test)
    test_predictions = (test_outputs.squeeze() > 0.5).float()
    accuracy = (test_predictions == y_test).float().mean()
    print(f"Test Accuracy: {accuracy.item() * 100:.2f}%")


ValueError: Using a target size (torch.Size([1])) that is different to the input size (torch.Size([])) is deprecated. Please ensure they have the same size.

In [None]:
# 단계 4: 모델 평가
model.eval()
with torch.no_grad():
    test_outputs = model(X_test)
    test_predictions = (test_outputs.squeeze() > 0.5).float()  # 0.5 기준으로 분류
    accuracy = (test_predictions == y_test).float().mean()
    print(f"Test Accuracy on 500 Samples: {accuracy.item() * 100:.2f}%")


Test Accuracy on 500 Samples: 63.51%


In [None]:
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
import numpy as np

# 1. PyTorch 모델 정의
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        _, (hidden, _) = self.lstm(x)
        out = self.fc(hidden[-1])
        return self.sigmoid(out)

# 2. Scikit-learn 래퍼 클래스 정의
class PyTorchLSTM(BaseEstimator, ClassifierMixin):
    def __init__(self, input_size, hidden_size=64, num_layers=1, learning_rate=0.001, epochs=10):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model = None

    def fit(self, X, y):
        # Initialize model, loss, and optimizer
        self.model = LSTMModel(self.input_size, self.hidden_size, self.num_layers, 1).to(self.device)
        criterion = nn.BCELoss()
        optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate)

        # Prepare data
        X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
        y_tensor = torch.tensor(y, dtype=torch.float32).to(self.device)

        # Training loop
        for epoch in range(self.epochs):
            self.model.train()
            optimizer.zero_grad()
            outputs = self.model(X_tensor)
            loss = criterion(outputs.squeeze(), y_tensor)
            loss.backward()
            optimizer.step()

        return self

    def predict(self, X):
        self.model.eval()
        with torch.no_grad():
            X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
            outputs = self.model(X_tensor)
            predictions = (outputs.squeeze().cpu().numpy() > 0.5).astype(int)
        return predictions

    def score(self, X, y):
        predictions = self.predict(X)
        return accuracy_score(y, predictions)

# 3. GridSearchCV를 사용한 하이퍼파라미터 탐색
# 입력 데이터 (X_train, y_train)과 테스트 데이터 (X_test, y_test)는 이전 코드에서 생성된 데이터 사용

param_grid = {
    'hidden_size': [32, 64, 128],
    'num_layers': [1, 2, 3],
    'learning_rate': [0.01, 0.001, 0.0001],
    'epochs': [10, 20]
}

grid_search = GridSearchCV(
    estimator=PyTorchLSTM(input_size=X_train.shape[2]),
    param_grid=param_grid,
    scoring='accuracy',
    cv=3  # 3-fold 교차 검증
)

grid_search.fit(X_train.numpy(), y_train.numpy())

# 최적의 하이퍼파라미터 출력
print("Best Parameters:", grid_search.best_params_)

# 테스트 데이터로 평가
best_model = grid_search.best_estimator_
test_accuracy = best_model.score(X_test.numpy(), y_test.numpy())
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 971, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_scorer.py", line 279, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_scorer.py", line 371, in _score
    y_pred = method_caller(
  File "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_scorer.py", line 89, in _cached_call
    result, _ = _get_response_values(
  File "/usr/local/lib/python3.10/dist-packages/sklearn/utils/_response.py", line 199, in _get_response_values
    classes = estimator.classes_
AttributeError: 'PyTorchLSTM' object has no attribute 'classes_'

 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 

Best Parameters: {'epochs': 10, 'hidden_size': 32, 'learning_rate': 0.01, 'num_layers': 1}
Test Accuracy: 68.56%
