In [None]:
import numpy as np

import torch
import torch.nn as nn

device = "cpu"

if torch.cuda.is_available():
    device = "cuda"
elif torch.backends.mps.is_available():
    device = "mps"
else:
    device = "cpu"

print(device)

In [2]:
filename = "KRW-XRP-1m-full"

# npy 파일 불러오기
X = np.load(f"../preprocessed/{filename}-train-X.npy")
y = np.load(f"../preprocessed/{filename}-train-y.npy")

# Numpy 배열을 Tensor로 변환
X_train = torch.from_numpy(X).type(torch.FloatTensor).to(device)
y_train = torch.from_numpy(y).type(torch.FloatTensor).to(device)

In [3]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, device):
        super(LSTMModel, self).__init__()
        
        self.device = device
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # LSTM 레이어 정의
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        # 출력 레이어 정의
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # 초기 hidden state와 cell state 설정
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size, device=self.device).requires_grad_()
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size, device=self.device).requires_grad_()

        # LSTM 실행
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))

        # 마지막 시점의 출력값 사용
        out = self.fc(out[:, -1, :])
        return out

In [None]:
input_size = X_train.shape[2]  # 5
hidden_size = 50
num_layers = 2
output_size = 1
learning_rate = 0.001
num_epochs = 100
batch_size = 64 # 배치 크기 설정

print("hidden_size:", hidden_size)
print("num_layers:", num_layers)
print("num_epochs:", num_epochs)
print("batch_size:", batch_size)

In [5]:
model = LSTMModel(input_size, hidden_size, num_layers, output_size,device).to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# print("Available GPUs:", torch.cuda.device_count())

# for i in range(torch.cuda.device_count()):
#     print(f"GPU {i}:")
#     print("  Name:", torch.cuda.get_device_name(i))
#     print("  Allocated memory:", torch.cuda.memory_allocated(i) / 1e9, "GB")
#     print("  Cached memory:", torch.cuda.memory_reserved(i) / 1e9, "GB")
#     print("  Utilization:", torch.cuda.max_memory_allocated(i) / torch.cuda.get_device_properties(i).total_memory * 100, "%")


# print("Current GPU in use:", torch.cuda.current_device())

if torch.cuda.device_count() > 1:
    model = torch.nn.DataParallel(model)
model.to('cuda')

current_device = next(model.parameters()).device
# print("Current device:", current_device)

In [None]:
import time

# 훈련 전 메모리 캐시 초기화 (GPU 사용 시)
if torch.cuda.is_available():
    torch.cuda.empty_cache()

try:
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        start_time = time.time()  # 각 에포크 시작 시간을 기록
        
        for i in range(0, len(X_train), batch_size):
            X_batch = X_train[i:i + batch_size].to(device)
            y_batch = y_train[i:i + batch_size].to(device)

            # 모델 훈련
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch.view(-1, 1))

            optimizer.zero_grad()  # 기울기 초기화
            loss.backward()  # 역전파
            optimizer.step()  # 가중치 업데이트
            
            total_loss += loss.item()

        # 에포크가 종료될 때 마다 경과 시간과 평균 손실을 계산 및 출력
        elapsed_time = time.time() - start_time
        avg_loss = total_loss / (len(X_train) / batch_size)
        print(f'Epoch [{epoch}/{num_epochs}], Average Loss: {avg_loss:.8f}, Time: {elapsed_time:.2f} sec')

        # 선택적으로 GPU 메모리 캐시 청소 (필요 시)
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

except Exception as e:
    print(f"Training failed: {e}")

In [None]:
model_filename = f'{filename}-lr{learning_rate}_bs{batch_size}-epochs{num_epochs}-hs{hidden_size}_nl{num_layers}.pth'

In [8]:
torch.save(model.module.state_dict(),f"../model/{model_filename}")