## simple RNN
- dataset : https://www.kaggle.com/datasets/iveeaten3223times/massive-yahoo-finance-dataset
- 연속형(시계열, 문장 등) 데이터 처리에 유리

## 데이터 관련

In [None]:
# 데이터 로드
import pandas as pd
df_stock_all = pd.read_csv('/kaggle/input/massive-yahoo-finance-dataset/stock_details_5_years.csv')
df_stock_all.head(3)

In [None]:
# 특정 회사 시계열 가져오기
selected_company = 'AAPL'
df_stock = df_stock_all[df_stock_all['Company'] == selected_company].copy()
df_stock.head()

#### 데이터 전처리

In [None]:
# 날짜를 datetime 변환
df_stock['Date'] = pd.to_datetime(df_stock['Date'], utc=True)
df_stock['Date'].dtype

In [None]:
df_stock.info()

In [None]:
# 스케일링
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
df_stock[['Open','High', 'Low','Close','Volume']] = scaler.fit_transform(df_stock[['Open','High', 'Low','Close','Volume']])
df_stock

## 모델 관련

In [None]:
# 시계열 데이터 생성
import numpy as np
import torch
sequence_length = 5   #5일 단위

from tqdm import tqdm
def create_sequences(data, seq_length):
    xs = []
    ys = []
    #for i in range(len(data) - seq_length):
    for i in tqdm(range(len(data) - seq_length), desc='Generating Sequences') :
        x = data[i:i+seq_length]
        y = data[i+seq_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)   #features, label

features, label = create_sequences(df_stock['Close'].values, sequence_length)
features.shape, label.shape

In [None]:
features[:3]

pytorch에 넣으려면 tensor로 바꿔야함

In [None]:
# 텐서 변환
features_tensor = torch.tensor(features, dtype=torch.float32).unsqueeze(-1)
label_tensor = torch.tensor(label, dtype=torch.float32).unsqueeze(-1)
features_tensor.shape, label_tensor.shape


In [None]:
# RNN 모델 정의
import torch.nn as nn
class SimpleRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleRNN, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), hidden_size)
        out, _ = self.rnn(x, h0)
        out = self.fc(out[:, -1, :])
        return out

input_size = 1  # row 단위 개수
hidden_size = 5  # 은닉층 개수
output_size = 1 

model = SimpleRNN(input_size, hidden_size, output_size)
model

In [None]:
# 모델 학습
import torch.optim as optim

criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=0.01)

epochs = 100

for epoch in tqdm(range(epochs), desc='learning Model'):
    model.train()
    outputs = model(features_tensor)
    optimizer.zero_grad()
    loss = criterion(outputs, label_tensor)  # 에러율 확인
    loss.backward()
    optimizer.step()

    if(epoch+1) % 10 == 0: # 10회에 1번 출력
        print(f'Epoch [{epoch+1} / {epochs}], Loss : {loss.item():.5f}')

In [None]:
model.state_dict()

# 평가 관련

In [None]:
model.eval()

In [None]:
with torch.no_grad():
    predicted = model(features_tensor[:5])
predicted, label_tensor[:5]