In [18]:
import sys
import os
import argparse
import shutil
import random
from pathlib import Path

import pandas as pd
import numpy as np
import torch
import pytorch_lightning as pl

from pytorch_forecasting.data import (
    TimeSeriesDataSet,
    GroupNormalizer
)
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import (
    ModelCheckpoint,
    EarlyStopping,
    LearningRateMonitor
)
from pytorch_forecasting.metrics import SMAPE
from pytorch_forecasting.models import TemporalFusionTransformer

In [3]:
base_path = "/home/jinjinjara1022/"

weather_forecast_data_1 = pd.read_csv(base_path + "OBIC/data/기상예측데이터_1.csv")
jeju_power_market_day_ahead = pd.read_csv(base_path + "OBIC/data/제주전력시장_시장전기가격_하루전가격.csv")

In [4]:
weather_forecast_data_1 = weather_forecast_data_1[weather_forecast_data_1['location'] != 'location']
weather_forecast_data_1['ts'] =  pd.to_datetime(weather_forecast_data_1['ts'], unit='s')
weather_forecast_data_1['base_ts'] = pd.to_datetime(weather_forecast_data_1['base_ts'], unit='s')

columns_to_average = [
    'temp', 'real_feel_temp', 'wet_bulb_temp', 'dew_point', 
    'wind_dir', 'wind_spd', 'wind_gust_spd', 'rel_hum', 'vis', 
    'ceiling', 'uv_idx', 'precip_prob', 'rain_prob', 'snow_prob', 
    'ice_prob', 'total_liq', 'rain', 'snow', 'ice', 'cld_cvr'
]

weather_forecast_data_1[columns_to_average] = weather_forecast_data_1[columns_to_average].apply(pd.to_numeric, errors='coerce')


average_df = weather_forecast_data_1.groupby('ts', as_index=False)[columns_to_average].mean()
weather_df = average_df

In [5]:
ahead_df = jeju_power_market_day_ahead
ahead_df['ts'] = pd.to_datetime(ahead_df['ts'], unit='s')

In [6]:
df = pd.merge(weather_df, ahead_df, on='ts')

In [7]:
df['hour'] = df['ts'].dt.hour
df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)

df['month'] = df['ts'].dt.month
df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)

df['day_of_week'] = df['ts'].dt.dayofweek
df['day_of_week_sin'] = np.sin(2 * np.pi * df['day_of_week'] / 7)
df['day_of_week_cos'] = np.cos(2 * np.pi * df['day_of_week'] / 7)

df = df.drop(['hour', 'month', 'day_of_week'], axis=1)
df = df[[col for col in df.columns if col != '하루전가격(원/kWh)'] + ['하루전가격(원/kWh)']]

df.rename(columns={
    '하루전가격(원/kWh)': 'previous_price_kwh'
}, inplace=True)

df['previous_day_price'] = df['previous_price_kwh'].shift(24)
df['previous_2day_price'] = df['previous_price_kwh'].shift(48)
df['previous_3day_price'] = df['previous_price_kwh'].shift(72)

In [8]:
df['time_idx'] =  df['ts'].astype(int) // 10**9
df = df.drop(['ts'], axis=1)

In [9]:
train = df[72:int(len(df)*0.85)]
test = df[int(len(df)*0.85):]

In [10]:
prediction_steps = test['time_idx'].nunique()

In [11]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4722 entries, 72 to 4793
Data columns (total 31 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   temp                 4722 non-null   float64
 1   real_feel_temp       4722 non-null   float64
 2   wet_bulb_temp        4722 non-null   float64
 3   dew_point            4722 non-null   float64
 4   wind_dir             4722 non-null   float64
 5   wind_spd             4722 non-null   float64
 6   wind_gust_spd        4722 non-null   float64
 7   rel_hum              4722 non-null   float64
 8   vis                  4722 non-null   float64
 9   ceiling              4722 non-null   float64
 10  uv_idx               4722 non-null   float64
 11  precip_prob          4722 non-null   float64
 12  rain_prob            4722 non-null   float64
 13  snow_prob            4722 non-null   float64
 14  ice_prob             4722 non-null   float64
 15  total_liq            4722 non-null   

In [12]:
max_prediction_length = 6
max_encoder_length = 24
training_cutoff = train["time_idx"].max() - max_prediction_length

training = TimeSeriesDataSet(
    train[lambda x: x["time_idx"] <= training_cutoff],
    time_idx="time_idx",  # 시간 인덱스 값
    target="previous_price_kwh",  # 타겟 변수
    group_ids=['ice'],  # 그룹이 필요하다면 주석 해제하여 사용
    min_encoder_length=0,  
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    
    # 시간이 지나도 변하지 않는 범주형 변수 목록
    static_categoricals=[],  # 예시에서 정적 범주형 변수가 없다면 빈 리스트 유지
    
    # 시간이 지나도 변하지 않는 연속형 변수 목록
    static_reals=[],  # 정적 연속형 변수가 없다면 빈 리스트 유지
    
    # 시간이 지남에 따라 변하고 미래에 알려진 범주형 변수 목록
    time_varying_known_categoricals=[],  
    
    # 시간이 지남에 따라 변하고 미래에 알려진 연속형 변수 목록
    time_varying_known_reals=["temp", "real_feel_temp", "wet_bulb_temp", "dew_point", 
                               "wind_dir", "wind_spd", "wind_gust_spd", 
                               "rel_hum", "vis", "ceiling", "uv_idx", 
                               "precip_prob", "rain_prob", "snow_prob", 
                               "ice_prob", "total_liq", "rain", "snow", 
                               "ice", "cld_cvr", "hour_sin", "hour_cos", 
                               "month_sin", "month_cos", "day_of_week_sin", 
                               "day_of_week_cos", "previous_day_price", 
                                 "previous_2day_price", "previous_3day_price"],  # 예측에 영향을 미칠 수 있는 기상 관련 변수들
    
    # 시간이 지남에 따라 변하고 미래에 알려지지 않은 연속형 변수 목록
    time_varying_unknown_reals=["previous_price_kwh", ],  # 과거 가격 변수들 포함
    
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    allow_missing_timesteps=True    
)

In [17]:
import pandas as pd
from pytorch_forecasting.data.examples import get_stallion_data

stallion_data = get_stallion_data()

HTTPError: HTTP Error 404: Not Found

In [None]:
# Ensure df has the same columns as stallion_data
df.columns = stallion_data.columns

# Assign it to the variable 'data'
data = df

AttributeError: 'function' object has no attribute 'columns'

In [57]:
# create validation set (predict=True) which means to predict the last max_prediction_length points in time
# for each series
validation = TimeSeriesDataSet.from_dataset(train, df, predict=True, stop_randomization=True)

# create dataloaders for model
batch_size = 128  # set this between 32 to 128
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)

AttributeError: 'DataFrame' object has no attribute 'get_parameters'