In [8]:
import pandas as pd
import numpy as np
from gluonts.dataset.pandas import PandasDataset
from gluonts.torch.model.deepar import DeepAREstimator
from gluonts.dataset.split import split
from gluonts.evaluation import make_evaluation_predictions, Evaluator
from pytorch_lightning import Trainer
import torch
from gluonts.torch.distributions import StudentTOutput

import time # time 라이브러리 import

In [35]:
start = time.time() # 시작

# 2) 데이터 로드 및 전처리
logistics = pd.read_csv("logistics_dong2.csv", encoding="euc-kr", parse_dates=["배송년월일"])
target_area = '논현1동'
target_category = '식품r'

logistics['배송년월일'] = pd.to_datetime(logistics['배송년월일'], format='%Y%m%d')
data = logistics.loc[logistics.행정동명==target_area][['배송년월일', target_category]]
data.columns = ['date', 'count']
data.reset_index(inplace=True, drop=True)
df = data[['date', 'count']]

# 2) Train/Test 날짜 기준으로 직접 분할
cutoff_date = pd.Timestamp("2023-01-01")

train_df = df[df["date"] < cutoff_date].copy()
test_df  = df[df["date"] >= cutoff_date].copy()

# 3) PandasDataset 생성
train_dataset = PandasDataset(
    train_df, 
    target="count", 
    timestamp="date", 
    freq="D"
)

# test_dataset = PandasDataset(
#     test_df, 
#     target="count", 
#     timestamp="date", 
#     freq="D"
# )

# 4) DeepAREstimator 생성
estimator = DeepAREstimator(
    freq="D",  # 데이터의 시간 단위
    prediction_length=7,  # 예측 길이: 7일
    context_length=30,  # 문맥 길이: 30일
    num_layers=3,  # RNN 계층 수
    hidden_size=50,  # RNN 셀 크기
    lr=0.001,  # 학습률
    dropout_rate=0.1,  # 드롭아웃 비율
    patience=5,
    num_feat_dynamic_real=0 # len(feat_dynamic),  # 동적 실수형 특성의 수
    num_feat_static_cat=0,  # 정적 범주형 특성의 수
    scaling=True,  # 스케일링 활성화
    batch_size=64,  # 배치 크기
    num_parallel_samples=200,  # 병렬 샘플 수
    trainer_kwargs={
        "max_epochs": 5,  # 최대 에포크 수
        "accelerator": "gpu" if torch.cuda.is_available() else "cpu",  # CPU 또는 GPU 선택
        "devices": 1 if torch.cuda.is_available() else None  # 사용할 장치 수 설정
    }
)

# 5) 학습
predictor = estimator.train(train_dataset)

# ======================================================================
# (E) "2023-01-01"만 포함된 더미 test dataset 으로 1년 예측
# ======================================================================
one_shot_test_df = pd.DataFrame({
    "date":  [pd.Timestamp("2023-01-01")],
    "count": [0]  # 의미 없는 placeholder
})

test_dataset = PandasDataset(
    one_shot_test_df,
    target="count",
    timestamp="date",
    freq="D"
)

forecasts = list(predictor.predict(test_dataset))
if len(forecasts) == 0:
    raise RuntimeError("예측 생성 실패: test_dataset이 잘못되었거나 모델 예측에 문제가 있습니다.")

forecast = forecasts[0]  # 단일 시계열 → 1개 forecast
# forecast.start_date = 2023-01-01 (기본적으로)
# 예측 길이 = 365일

# 중앙값(quantile=0.5)으로 시계열 얻기
pred_series = forecast.quantile(0.5)
pred_dates = [forecast.start_date + pd.Timedelta(days=i) for i in range(len(pred_series))]

df_pred = pd.DataFrame({"date": pred_dates, "predict": pred_series})
df_pred["date"] = df_pred["date"].dt.to_timestamp()

# ======================================================================
# (F) 실제값 (2023년)과 예측값 머지
# ======================================================================
# test_df(2023-01-01~2023-12-31)에서 실제 count를 가져옴
df_2023 = test_df.copy()  # ['date','count']

# date 기준으로 머지
final_df = pd.merge(df_2023, df_pred, on="date", how="left")
# 만약 test_df에 2023-12-31까지, df_pred도 2023-12-31까지 있을 것
# right join 하므로 예측 날짜가 우선 -> 실제값이 없는 날짜면 NaN이 들어감

final_df = final_df[["date", "count", "predict"]].sort_values("date").reset_index(drop=True)

print("\n=== (F) final_df 미리보기 ===")
print(final_df.head(10))
print(final_df.tail(10))


print(f"{time.time()-start:.4f} sec") # 종료와 함께 수행시간 출력

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
C:\Users\PC\anaconda3\envs\llm\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type        | Params | Mode  | In sizes                                                          | Out sizes    
----------------------------------------------------------------------------------------------------------------------------------
0 | model | DeepARModel | 25.9 K | train | [[1, 1], [1, 1], [1, 1122, 4], [1, 1122], [1, 1122], [1, 365, 4]] | [1, 100, 365]
----------------------------------------------------------------------------------------------------------------------------------
25.9 K    Trainable params
0         Non-trainable params
25.9 K    Total params
0.104     Total estimated model params size (MB)
11        Mo

Training: |                                                                                      | 0/? [00:00<…

Epoch 0, global step 50: 'train_loss' reached 7.68664 (best 7.68664), saving model to 'C:\\Users\\PC\\Desktop\\demand_prediction\\lightning_logs\\version_22\\checkpoints\\epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 7.23511 (best 7.23511), saving model to 'C:\\Users\\PC\\Desktop\\demand_prediction\\lightning_logs\\version_22\\checkpoints\\epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 6.53479 (best 6.53479), saving model to 'C:\\Users\\PC\\Desktop\\demand_prediction\\lightning_logs\\version_22\\checkpoints\\epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 6.25116 (best 6.25116), saving model to 'C:\\Users\\PC\\Desktop\\demand_prediction\\lightning_logs\\version_22\\checkpoints\\epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 6.07980 (best 6.07980), saving model to 'C:\\Users\\PC\\Desktop\\demand_prediction\\lightning_logs\\version_22\\checkpoints\\epoch=4-step=2


=== (F) final_df 미리보기 ===
        date  count       predict
0 2023-01-01     97           NaN
1 2023-01-02   1316 -4.494333e-11
2 2023-01-03   1020 -3.401029e-11
3 2023-01-04   1025 -2.877042e-11
4 2023-01-05    972 -4.804887e-11
5 2023-01-06    921 -4.388564e-11
6 2023-01-07    212 -4.866495e-11
7 2023-01-08    246 -4.508920e-11
8 2023-01-09   1777 -3.954440e-11
9 2023-01-10   1548 -3.231463e-11
          date  count       predict
355 2023-12-22    747 -3.977127e-11
356 2023-12-23    119 -4.428961e-11
357 2023-12-24     22 -4.883894e-11
358 2023-12-25    219 -4.219178e-11
359 2023-12-26   1442 -4.189639e-11
360 2023-12-27    966 -4.195521e-11
361 2023-12-28    822 -4.485171e-11
362 2023-12-29    717 -6.883579e-11
363 2023-12-30     88 -3.757549e-11
364 2023-12-31     21 -4.093062e-11
285.8772 sec


In [None]:
def parse_to_2023(input_date_str: str) -> pd.Timestamp:
    """
    입력 날짜가 어떤 연도든, '월/일'만 유지하고 연도는 2023으로 치환.
    예) '2025-01-20' -> 2023-01-20
    """
    parsed = pd.to_datetime(input_date_str, errors="raise")
    forced_2023 = parsed.replace(year=2023)  # 월일 그대로, 연도=2023으로 변경
    return forced_2023

def get_7day_forecast(input_date_str: str, df_2023: pd.DataFrame):
    """
    - input_date_str: 사용자 입력 (연도 무관, 예: '2025-01-20', '03/05' 등)
    - df_2023: 'date', 'count', 'predict' (2023년 1년치) 데이터프레임
    
    returns (target_date(2023), actual_value, predict_value)
     * target_date = (입력일을 2023년으로 치환) + 7일
     * actual_value = 그 날짜의 실제 수요(df_2023.count) / 없으면 None
     * predict_value = 그 날짜의 예측 수요(df_2023.predict) / 없으면 None
    """
    base_date_2023 = parse_to_2023(input_date_str)
    target_date = base_date_2023 + timedelta(days=7)

    row = df_2023.loc[df_2023["date"] == target_date]
    if row.empty:
        return (target_date, None, None)

    actual_value = row["count"].values[0]   # 실제
    predict_value = row["predict"].values[0]  # 예측
    return (target_date, actual_value, predict_value)

# ======================================================================

# ======================================================================
input_examples = ["2023-03-05", "2025-01-20", "2025/07/01", "12-25"]
for d_str in input_examples:
    td, act, pred = get_7day_forecast(d_str, final_df)
    print("\n입력:", d_str, "-> 2023로 치환 후 +7일 =", td.date())
    print("실제수요 =", act, "/ 예측수요 =", pred)

In [None]:
# 6. 예측 및 평가
forecast_it, ts_it = make_evaluation_predictions(
    dataset=test_dataset,
    predictor=predictor,
    num_samples=100
)

forecasts = list(forecast_it)
tss = list(ts_it)

In [None]:
# 7. 평가 지표 확인
evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
agg_metrics, item_metrics = evaluator(tss, forecasts, num_series=len(test_dataset))