In [None]:
import pandas as pd
from pytorch_forecasting import TimeSeriesDataSet, GroupNormalizer
import os

In [None]:
data_dir = "/app/bucket/data/processed_data"
filedir = os.path.join(data_dir, "prep_D_data.csv")
data_df = pd.read_csv(filedir)
data_df['ds'] = pd.to_datetime(data_df['ds'], format='%Y-%m-%d')
data_df.set_index('ds', inplace=True)
data_df = data_df.sort_index()

data_df['time_idx'] = range(len(data_df))
data_df['group'] = 'SPY'
data_df['day_of_week'] = data_df.index.dayofweek
data_df['day_of_week'] = data_df['day_of_week'].astype(str)
print(data_df.shape)
data_df.head()

In [None]:
# 마지막 120일을 검증 데이터로 분리
validation_cut_off = data_df['time_idx'].max() - 120

training_data = TimeSeriesDataSet(
    data_df[lambda x: x.time_idx <= validation_cut_off], 
    time_idx="time_idx",
    target="spy_close",
    group_ids=["group"],
    max_encoder_length=60, 
    max_prediction_length=7, 
    
    # [변수 타입 정의]
    time_varying_known_categoricals=["day_of_week", "group"],
    time_varying_known_reals=["time_idx"], 
    time_varying_unknown_reals=["spy_close", "spy_close", 'SMA_60', 'SMA_120', 'BBL_20_2.0_2.0', 'BBM_20_2.0_2.0',
       'BBU_20_2.0_2.0', 'BBB_20_2.0_2.0', 'BBP_20_2.0_2.0', 'MACD_12_26_9',
       'MACDh_12_26_9', 'MACDs_12_26_9', 'RSI_14', 'spy_volume_lag_1D',
       'CLI_lag_1MS'], # 예측 대상은 모르는 변수
    
    # [정규화]
    target_normalizer=GroupNormalizer(groups=["group"], transformation="softplus"),
    add_target_scales=True,
    add_encoder_length=True,
)

# 데이터 로더 생성
batch_size = 64
train_dataloader = training_data.to_dataloader(train=True, batch_size=batch_size, num_workers=0)

In [None]:
from pytorch_forecasting.models import TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss
import lightning.pytorch as pl

# 1. TFT 모델 정의
tft = TemporalFusionTransformer.from_dataset(
    training_data,
    loss=QuantileLoss(), # 표준 손실 함수
    hidden_size=16,
    attention_head_size=1,
)

# 2. 학습 실행
trainer = pl.Trainer(max_epochs=10, accelerator="auto")
trainer.fit(tft, train_dataloaders=train_dataloader)

In [None]:
# 1. 예측을 위한 데이터셋 준비
# last_data는 예측에 필요한 과거 데이터(max_encoder_length 만큼)
last_data = df.iloc[-training_data.max_encoder_length:] 
future_data = training_data.create_specific_future_dataframe(
    last_data, 
    times_from_last=7, # 미래 7일
    # 미래 7일의 'day_of_week', 'time_idx' 등을 자동으로 생성
)

# 2. 예측 실행
raw_predictions = tft.predict(future_data, return_x=True, trainer_kwargs=dict(accelerator="cpu"))

# 3. 예측 결과 확인 (평균 예측값)
predictions = raw_predictions.output.prediction.mean(axis=1)

# 예측된 시각화 결과 확인
# tft.plot_prediction(raw_predictions.x, raw_predictions.output)