In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from sklearn.preprocessing import *
from sklearn.model_selection import *
from sklearn.metrics import *

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
cuda = True if torch.cuda.is_available() else False
Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor

In [11]:
train_data = pd.read_csv("C:/Users/user/Desktop/deep-learning-programing-project/train_data.csv")
print(data.dtypes)

LABEL          int64
FLUX.1       float64
FLUX.2       float64
FLUX.3       float64
FLUX.4       float64
              ...   
FLUX.3193    float64
FLUX.3194    float64
FLUX.3195    float64
FLUX.3196    float64
FLUX.3197    float64
Length: 3198, dtype: object


In [None]:
# 데이터 전처리
# 라벨을 제외한 플럭스 데이터만 선택
flux_columns = data.columns[1:]  # 첫 번째 열은 LABEL이므로 제외
X = data[flux_columns]

# 라벨 데이터 선택
y = data['LABEL']

# StandardScaler를 사용하여 데이터 정규화
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 데이터 분할 (검증 데이터 비율 0.2)
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# 시계열 데이터의 형태로 변환 (샘플 수, 시간 단계, 특징 수)
X_train_reshaped = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_val_reshaped = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))

# 레이블이 0부터 시작하도록 조정
y_train = y_train - 1
y_val = y_val - 1

X_train_reshaped.shape

In [2]:
# LSTM 모델 정의
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
        super(LSTMModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

In [None]:
# 모델 파라미터 설정
input_dim = 1  # 특징 수 (시계열 데이터의 경우 보통 1)
hidden_dim = 64
output_dim = 2  # 레이블 수
num_layers = 2

In [None]:
# 모델 생성 및 전송
model = LSTMModel(input_dim, hidden_dim, output_dim, num_layers).to(device)

(4069, 3197, 1)

In [4]:
# 데이터 텐서로 변환 및 전송 (정수형 레이블 사용)
X_train_tensor = torch.tensor(X_train_reshaped, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long).to(device)
X_val_tensor = torch.tensor(X_val_reshaped, dtype=torch.float32).to(device)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.long).to(device)

X_train_tensor

tensor([[[-0.0068],
         [-0.0061],
         [-0.0069],
         ...,
         [ 0.0299],
         [ 0.0259],
         [ 0.0199]],

        [[-0.0088],
         [-0.0068],
         [-0.0059],
         ...,
         [ 0.0344],
         [ 0.0219],
         [ 0.0048]],

        [[-0.3390],
         [-0.6628],
         [-0.2338],
         ...,
         [ 0.4679],
         [ 0.3186],
         [-0.2473]],

        ...,

        [[-0.0069],
         [-0.0068],
         [-0.0072],
         ...,
         [ 0.0312],
         [ 0.0264],
         [ 0.0222]],

        [[-0.0063],
         [-0.0061],
         [-0.0065],
         ...,
         [ 0.0307],
         [ 0.0269],
         [ 0.0208]],

        [[-0.0120],
         [-0.0152],
         [-0.0121],
         ...,
         [ 0.0243],
         [ 0.0164],
         [ 0.0227]]], device='cuda:0')

In [8]:
# 손실 함수와 최적화 함수 설정
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [9]:
# 학습 및 검증 함수 정의
def train(model, criterion, optimizer, X_train, y_train, batch_size):
    model.train()
    total_loss = 0
    for i in range(0, len(X_train), batch_size):
        X_batch = X_train[i:i+batch_size]
        y_batch = y_train[i:i+batch_size]
        
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    return total_loss / len(X_train)

def evaluate(model, criterion, X_val, y_val, batch_size):
    model.eval()
    total_loss = 0
    all_preds = []
    with torch.no_grad():
        for i in range(0, len(X_val), batch_size):
            X_batch = X_val[i:i+batch_size]
            y_batch = y_val[i:i+batch_size]
            
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            total_loss += loss.item()
            
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
    
    accuracy = accuracy_score(y_val.cpu(), all_preds)
    return total_loss / len(X_val), accuracy

# 학습 및 검증
num_epochs = 10
batch_size = 32

for epoch in range(num_epochs):
    train_loss = train(model, criterion, optimizer, X_train_tensor, y_train_tensor, batch_size)
    val_loss, val_accuracy = evaluate(model, criterion, X_val_tensor, y_val_tensor, batch_size)
    
    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')

Epoch 1/10, Train Loss: 0.0014, Val Loss: 0.0014, Val Accuracy: 0.9921
Epoch 2/10, Train Loss: 0.0014, Val Loss: 0.0014, Val Accuracy: 0.9921
Epoch 3/10, Train Loss: 0.0014, Val Loss: 0.0014, Val Accuracy: 0.9921
Epoch 4/10, Train Loss: 0.0014, Val Loss: 0.0014, Val Accuracy: 0.9921
Epoch 5/10, Train Loss: 0.0014, Val Loss: 0.0014, Val Accuracy: 0.9921
Epoch 6/10, Train Loss: 0.0014, Val Loss: 0.0014, Val Accuracy: 0.9921
Epoch 7/10, Train Loss: 0.0014, Val Loss: 0.0014, Val Accuracy: 0.9921
Epoch 8/10, Train Loss: 0.0014, Val Loss: 0.0017, Val Accuracy: 0.9921
Epoch 9/10, Train Loss: 0.0014, Val Loss: 0.0014, Val Accuracy: 0.9921
Epoch 10/10, Train Loss: 0.0014, Val Loss: 0.0014, Val Accuracy: 0.9921
