In [2]:
import pandas as pd
import numpy as np
import random
from statsmodels.tsa.statespace.sarimax import SARIMAX
import warnings
warnings.filterwarnings("ignore")

# =========================================================
# 0) Seed Í≥†Ï†ï
# =========================================================
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)

set_seed(42)

# =========================================================
# 1) ÌèâÍ∞Ä ÏßÄÌëú
# =========================================================
def smape(a, f):
    a = np.array(a)
    f = np.array(f)
    denom = (np.abs(a) + np.abs(f)) / 2
    mask = denom != 0
    return np.mean(np.abs(a[mask] - f[mask]) / denom[mask]) * 100

def evaluate(actual, pred):
    actual = np.array(actual)
    pred = np.array(pred)
    mae = np.mean(np.abs(actual - pred))
    rmse = np.sqrt(np.mean((actual - pred)**2))
    smape_v = smape(actual, pred)
    return mae, rmse, smape_v

# =========================================================
# 2) Autoregressive SARIMAX ÏòàÏ∏°
# =========================================================
def autoregressive_sarimax(train, test, exog_cols):

    history = train.copy()
    preds = []

    for i in range(len(test)):

        exog_row = test.loc[[i], exog_cols]
        exog_row = exog_row.fillna(0)

        model = SARIMAX(
            history['ÏùºÎß§Ï∂ú'],
            exog=history[exog_cols],
            order=(1,1,1),
            seasonal_order=(1,1,1,7)
        )

        fit = model.fit(disp=False)

        forecast = fit.predict(
            start=len(history),
            end=len(history),
            exog=exog_row
        )

        pred = float(forecast.values[0])
        pred = max(pred, 0)

        preds.append(pred)

        new_row = test.iloc[i].copy()
        new_row['ÏùºÎß§Ï∂ú'] = pred
        history = pd.concat([history, pd.DataFrame([new_row])], ignore_index=True)

    return preds

# =========================================================
# 3) Îç∞Ïù¥ÌÑ∞ Î°úÎìú
# =========================================================
train = pd.read_csv('/content/drive/MyDrive/·ÑÄ·Öµ·ÑÄ·Ö®·Ñí·Ö°·Ü®·Ñâ·Ö≥·Ü∏/·Ñê·Öµ·Ü∑·Ñë·Ö≥·ÜØ/Data/POS_train_val.csv')
test  = pd.read_csv('/content/drive/MyDrive/·ÑÄ·Öµ·ÑÄ·Ö®·Ñí·Ö°·Ü®·Ñâ·Ö≥·Ü∏/·Ñê·Öµ·Ü∑·Ñë·Ö≥·ÜØ/Data/POS_test.csv')

train['ÏòÅÏóÖÏùºÏûê'] = pd.to_datetime(train['ÏòÅÏóÖÏùºÏûê'])
test ['ÏòÅÏóÖÏùºÏûê'] = pd.to_datetime(test ['ÏòÅÏóÖÏùºÏûê'])

train['ÏùºÎß§Ï∂ú'] = train['ÏùºÎß§Ï∂ú'].astype(str).str.replace(",", "").astype(float)
test ['ÏùºÎß§Ï∂ú'] = test ['ÏùºÎß§Ï∂ú'].astype(str).str.replace(",", "").astype(float)

# =========================================================
# 4) ÌïôÏÇ¨ÏùºÏ†ï merge
# =========================================================
academic = pd.read_csv('/content/drive/MyDrive/·ÑÄ·Öµ·ÑÄ·Ö®·Ñí·Ö°·Ü®·Ñâ·Ö≥·Ü∏/·Ñê·Öµ·Ü∑·Ñë·Ö≥·ÜØ/Data/·Ñí·Ö°·Ü®·Ñâ·Ö°·Ñã·Öµ·ÜØ·Ñå·Ö•·Üº_·Ñå·Ö•·Üº·ÑÖ·Öµ(2325).csv')

academic['ÏòÅÏóÖÏùºÏûê'] = pd.to_datetime(academic['date'])
academic = academic.drop(columns=['date'])

weekday_map = {'mon':0,'tue':1,'wed':2,'thu':3,'fri':4,'sat':5,'sun':6}
academic['acad_weekday'] = academic['weekday'].map(weekday_map)
academic = academic.drop(columns=['weekday'])

academic = academic.rename(columns={
    'weekend':'acad_weekend',
    'holiday':'acad_holiday',
    'semester':'acad_semester',
    'seasonal':'acad_seasonal',
    'exam':'acad_exam',
    'ceremony':'acad_ceremony',
    'dormitory':'acad_dormitory'
})

train = train.merge(academic, on='ÏòÅÏóÖÏùºÏûê', how='left')
test  = test.merge(academic, on='ÏòÅÏóÖÏùºÏûê', how='left')

# =========================================================
# 5) ÎÇ†Ïßú Í∏∞Î∞ò ÌîºÏ≤ò Ï∂îÍ∞Ä
# =========================================================
def add_date_features(df):
    df['month'] = df['ÏòÅÏóÖÏùºÏûê'].dt.month
    df['weekday'] = df['ÏòÅÏóÖÏùºÏûê'].dt.weekday
    df['is_weekend'] = (df['weekday'] >= 5).astype(int)

    df['open_hours'] = 11
    df.loc[df['weekday'] == 5, 'open_hours'] = 6
    df.loc[df['weekday'] == 6, 'open_hours'] = 0
    return df

train = add_date_features(train)
test  = add_date_features(test)

# =========================================================
# 6) SARIMAX exog feature ÏÑ†ÌÉù (lag/rolling Ï†úÍ±∞)
# =========================================================
exog_cols = [
    'acad_weekend','acad_semester','acad_weekday',
    'acad_holiday','acad_seasonal','acad_exam',
    'acad_ceremony','acad_dormitory',
    'month','weekday','is_weekend','open_hours'
]

train[exog_cols] = train[exog_cols].fillna(0)
test [exog_cols] = test [exog_cols].fillna(0)

# =========================================================
# 7) SARIMAX ÏòàÏ∏°
# =========================================================
test_pred = autoregressive_sarimax(train, test, exog_cols)

mae, rmse, smape_val = evaluate(test['ÏùºÎß§Ï∂ú'], test_pred)

print("\n====== SARIMAX Í≤∞Í≥º (lag/rolling Ï†úÍ±∞) ======")
print("MAE  :", mae)
print("RMSE :", rmse)
print("SMAPE:", smape_val)

# =========================================================
# 8) CSV Ï†ÄÏû•
# =========================================================
output = test[['ÏòÅÏóÖÏùºÏûê']].copy()
output['ÏòàÏ∏°Îß§Ï∂ú'] = test_pred
output.to_csv("sarimax_prediction_output.csv", index=False)

print("\nCSV ÌååÏùº Ï†ÄÏû• ÏôÑÎ£å ‚Üí sarimax_prediction_output.csv")
print(output.head())



MAE  : 77898.79778632565
RMSE : 98289.41421725308
SMAPE: 76.65960579041068

CSV ÌååÏùº Ï†ÄÏû• ÏôÑÎ£å ‚Üí sarimax_prediction_output.csv
        ÏòÅÏóÖÏùºÏûê           ÏòàÏ∏°Îß§Ï∂ú
0 2025-08-10       0.000000
1 2025-08-11  185742.622154
2 2025-08-12  122451.625520
3 2025-08-13  162148.301086
4 2025-08-14  153032.472745


### **ÌïòÏù¥ÌçºÌååÎùºÎØ∏ÌÑ∞ ÏûêÎèô ÌÉêÏÉâ**

In [4]:
import itertools
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX

# ================================
# Seed
# ================================
def set_seed(seed=42):
    np.random.seed(seed)
set_seed(42)

# ================================
# ÌèâÍ∞Ä ÏßÄÌëú
# ================================
def smape(a, f):
    a = np.array(a)
    f = np.array(f)
    denom = (np.abs(a) + np.abs(f)) / 2
    mask = denom != 0
    return np.mean(np.abs(a[mask] - f[mask]) / denom[mask]) * 100

def evaluate(actual, pred):
    mae  = np.mean(np.abs(actual - pred))
    rmse = np.sqrt(np.mean((actual - pred)**2))
    smp  = smape(actual, pred)
    return mae, rmse, smp


# =========================================================
# 1) SARIMAX AIC Í∏∞Î∞ò ÏûêÎèô ÌÉêÏÉâÍ∏∞
# =========================================================
def sarimax_auto_search(train_df, exog_cols):

    y = train_df["ÏùºÎß§Ï∂ú"]
    exog = train_df[exog_cols]

    # Í≤ÄÏÉâ Í≥µÍ∞Ñ (ÎÑàÎ¨¥ ÎÑìÌûàÎ©¥ Í≥ºÎ∂ÄÌïò! Ïã§Î¨¥ÏóêÏÑúÎèÑ Ïù¥Î†áÍ≤å ÏîÄ)
    p = d = q = range(0, 2)
    P = D = Q = range(0, 2)
    seasonal_period = [7]   # ÏöîÏùº Ìå®ÌÑ¥

    best_aic = np.inf
    best_param = None
    best_model = None

    search_space = list(itertools.product(p, d, q, P, D, Q, seasonal_period))

    print(f"Ï¥ù ÌÉêÏÉâ Ï°∞Ìï© Ïàò: {len(search_space)}\n")

    for (p_, d_, q_, P_, D_, Q_, s_) in search_space:

        try:
            model = SARIMAX(
                y,
                exog=exog,
                order=(p_, d_, q_),
                seasonal_order=(P_, D_, Q_, s_),
                enforce_stationarity=False,
                enforce_invertibility=False
            )

            fit = model.fit(disp=False)

            if fit.aic < best_aic:
                best_aic = fit.aic
                best_param = (p_, d_, q_, P_, D_, Q_, s_)
                best_model = fit

                print(f"üî• ÏÉàÎ°úÏö¥ ÏµúÏ†Å AIC: {best_aic:.2f} ‚Üí {best_param}")

        except Exception as e:
            continue

    print("\n==========================")
    print("üìå ÏµúÏ¢Ö ÏÑ†ÌÉùÎêú SARIMAX Î™®Îç∏")
    print("order =", best_param[:3])
    print("seasonal_order =", best_param[3:])
    print("AIC =", best_aic)
    print("==========================\n")

    return best_model


# =========================================================
# 2) Auto-Regressive ÏòàÏ∏°
# =========================================================
def sarimax_forecast_autoreg(train_df, test_df, exog_cols, best_model):

    history = train_df.copy()
    preds = []

    # üî• ÏµúÏã† statsmodels Î≤ÑÏ†ÑÏóêÏÑú ÏßÄÏõêÌïòÎäî order Ï∂îÏ∂ú Î∞©Ïãù
    order = best_model.model.order
    seasonal_order = best_model.model.seasonal_order

    for i in range(len(test_df)):

        exog_next = test_df.loc[[i], exog_cols].fillna(0)

        model = SARIMAX(
            history["ÏùºÎß§Ï∂ú"],
            exog=history[exog_cols],
            order=order,
            seasonal_order=seasonal_order,
            enforce_stationarity=False,
            enforce_invertibility=False
        )

        fit = model.fit(disp=False)

        forecast = fit.predict(
            start=len(history),
            end=len(history),
            exog=exog_next
        )

        pred = float(forecast.values[0])
        pred = max(pred, 0)

        preds.append(pred)

        new_row = test_df.iloc[i].copy()
        new_row["ÏùºÎß§Ï∂ú"] = pred
        history = pd.concat([history, pd.DataFrame([new_row])], ignore_index=True)

    return preds


# =========================================================
# 3) Îç∞Ïù¥ÌÑ∞ Î°úÎìú (Í≤ΩÎ°ú Î≥ÄÍ≤Ω ÏóÜÏùå)
# =========================================================
train = pd.read_csv('/content/drive/MyDrive/·ÑÄ·Öµ·ÑÄ·Ö®·Ñí·Ö°·Ü®·Ñâ·Ö≥·Ü∏/·Ñê·Öµ·Ü∑·Ñë·Ö≥·ÜØ/Data/POS_train_val.csv')
test  = pd.read_csv('/content/drive/MyDrive/·ÑÄ·Öµ·ÑÄ·Ö®·Ñí·Ö°·Ü®·Ñâ·Ö≥·Ü∏/·Ñê·Öµ·Ü∑·Ñë·Ö≥·ÜØ/Data/POS_test.csv')

train['ÏòÅÏóÖÏùºÏûê'] = pd.to_datetime(train['ÏòÅÏóÖÏùºÏûê'])
test ['ÏòÅÏóÖÏùºÏûê'] = pd.to_datetime(test ['ÏòÅÏóÖÏùºÏûê'])

train['ÏùºÎß§Ï∂ú'] = train['ÏùºÎß§Ï∂ú'].astype(str).str.replace(",", "").astype(float)
test ['ÏùºÎß§Ï∂ú'] = test ['ÏùºÎß§Ï∂ú'].astype(str).str.replace(",", "").astype(float)


# =========================================================
# 4) ÌïôÏÇ¨ÏùºÏ†ï merge (Í∑∏ÎåÄÎ°ú)
# =========================================================
academic = pd.read_csv('/content/drive/MyDrive/·ÑÄ·Öµ·ÑÄ·Ö®·Ñí·Ö°·Ü®·Ñâ·Ö≥·Ü∏/·Ñê·Öµ·Ü∑·Ñë·Ö≥·ÜØ/Data/·Ñí·Ö°·Ü®·Ñâ·Ö°·Ñã·Öµ·ÜØ·Ñå·Ö•·Üº_·Ñå·Ö•·Üº·ÑÖ·Öµ(2325).csv')
academic['ÏòÅÏóÖÏùºÏûê'] = pd.to_datetime(academic['date'])
academic = academic.drop(columns=['date'])

weekday_map = {'mon':0,'tue':1,'wed':2,'thu':3,'fri':4,'sat':5,'sun':6}
academic['acad_weekday'] = academic['weekday'].map(weekday_map)
academic = academic.drop(columns=['weekday'])

academic = academic.rename(columns={
    'weekend':'acad_weekend',
    'holiday':'acad_holiday',
    'semester':'acad_semester',
    'seasonal':'acad_seasonal',
    'exam':'acad_exam',
    'ceremony':'acad_ceremony',
    'dormitory':'acad_dormitory'
})

train = train.merge(academic, on='ÏòÅÏóÖÏùºÏûê', how='left')
test  = test.merge(academic, on='ÏòÅÏóÖÏùºÏûê', how='left')


# =========================================================
# 5) ÎÇ†Ïßú ÌîºÏ≤ò Ï∂îÍ∞Ä
# =========================================================
def add_date_features(df):
    df['month'] = df['ÏòÅÏóÖÏùºÏûê'].dt.month
    df['weekday'] = df['ÏòÅÏóÖÏùºÏûê'].dt.weekday
    df['is_weekend'] = (df['weekday'] >= 5).astype(int)

    df['open_hours'] = 11
    df.loc[df['weekday'] == 5, 'open_hours'] = 6
    df.loc[df['weekday'] == 6, 'open_hours'] = 0
    return df

train = add_date_features(train)
test  = add_date_features(test)


# =========================================================
# 6) exog columns (GRUÏôÄ ÎèôÏùº)
# =========================================================
exog_cols = [
    'acad_weekend','acad_semester','acad_weekday',
    'acad_holiday','acad_seasonal','acad_exam',
    'acad_ceremony','acad_dormitory',
    'month','weekday','is_weekend','open_hours'
]

train[exog_cols] = train[exog_cols].fillna(0)
test [exog_cols] = test [exog_cols].fillna(0)


# =========================================================
# 7) AIC Í∏∞Î∞ò ÏûêÎèô SARIMAX Î™®Îç∏ ÏÑ†ÌÉù
# =========================================================
best_model = sarimax_auto_search(train, exog_cols)


# =========================================================
# 8) ÏòàÏ∏°
# =========================================================
test_pred = sarimax_forecast_autoreg(train, test, exog_cols, best_model)

mae, rmse, smape_val = evaluate(test['ÏùºÎß§Ï∂ú'], test_pred)

print("====== SARIMAX ÏûêÎèôÌÉêÏÉâ Í≤∞Í≥º ======")
print("MAE  :", mae)
print("RMSE :", rmse)
print("SMAPE:", smape_val)


# =========================================================
# 9) CSV Ï†ÄÏû•
# =========================================================
output = test[['ÏòÅÏóÖÏùºÏûê']].copy()
output['ÏòàÏ∏°Îß§Ï∂ú'] = test_pred
output.to_csv("sarimax_auto_prediction.csv", index=False)

print("\nCSV Ï†ÄÏû• ÏôÑÎ£å ‚Üí sarimax_auto_prediction.csv")


Ï¥ù ÌÉêÏÉâ Ï°∞Ìï© Ïàò: 64

üî• ÏÉàÎ°úÏö¥ ÏµúÏ†Å AIC: 23484.44 ‚Üí (0, 0, 0, 0, 0, 0, 7)
üî• ÏÉàÎ°úÏö¥ ÏµúÏ†Å AIC: 19090.44 ‚Üí (0, 0, 0, 0, 0, 1, 7)
üî• ÏÉàÎ°úÏö¥ ÏµúÏ†Å AIC: 19026.92 ‚Üí (0, 0, 0, 0, 1, 1, 7)
üî• ÏÉàÎ°úÏö¥ ÏµúÏ†Å AIC: 19006.22 ‚Üí (0, 0, 0, 1, 1, 1, 7)
üî• ÏÉàÎ°úÏö¥ ÏµúÏ†Å AIC: 18990.11 ‚Üí (0, 0, 1, 0, 1, 1, 7)
üî• ÏÉàÎ°úÏö¥ ÏµúÏ†Å AIC: 18974.29 ‚Üí (0, 0, 1, 1, 1, 1, 7)
üî• ÏÉàÎ°úÏö¥ ÏµúÏ†Å AIC: 18959.89 ‚Üí (1, 0, 1, 0, 1, 1, 7)
üî• ÏÉàÎ°úÏö¥ ÏµúÏ†Å AIC: 18952.35 ‚Üí (1, 0, 1, 1, 1, 1, 7)

üìå ÏµúÏ¢Ö ÏÑ†ÌÉùÎêú SARIMAX Î™®Îç∏
order = (1, 0, 1)
seasonal_order = (1, 1, 1, 7)
AIC = 18952.35459120816

MAE  : 63429.931170126794
RMSE : 79420.76312946768
SMAPE: 76.60371037543867

CSV Ï†ÄÏû• ÏôÑÎ£å ‚Üí sarimax_auto_prediction.csv


### **GRUÏôÄ SARIMAX Î∏îÎ†åÎî©**

In [5]:
import pandas as pd
import numpy as np
import random
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from statsmodels.tsa.statespace.sarimax import SARIMAX
import warnings
warnings.filterwarnings("ignore")


# =========================================================
# 0) Seed Í≥†Ï†ï
# =========================================================
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(42)



# =========================================================
# 1) ÌèâÍ∞Ä ÏßÄÌëú
# =========================================================
def smape(a, f):
    a = np.array(a)
    f = np.array(f)
    denom = (np.abs(a) + np.abs(f)) / 2
    mask = denom != 0
    return np.mean(np.abs(a[mask] - f[mask]) / denom[mask]) * 100

def evaluate(actual, pred):
    actual = np.array(actual)
    pred = np.array(pred)
    mae = np.mean(np.abs(actual - pred))
    rmse = np.sqrt(np.mean((actual - pred)**2))
    smape_v = smape(actual, pred)
    return mae, rmse, smape_v



# =========================================================
# 2) Îç∞Ïù¥ÌÑ∞ Ï†ÑÏ≤òÎ¶¨
# =========================================================
def clean_sales(df):
    df['ÏùºÎß§Ï∂ú'] = (
        df['ÏùºÎß§Ï∂ú'].astype(str)
        .str.replace(",", "")
        .str.replace(" ", "")
        .str.strip()
    )
    df['ÏùºÎß§Ï∂ú'] = pd.to_numeric(df['ÏùºÎß§Ï∂ú'], errors='coerce').fillna(0)
    return df

def add_date_features(df):
    df['month'] = df['ÏòÅÏóÖÏùºÏûê'].dt.month
    df['day'] = df['ÏòÅÏóÖÏùºÏûê'].dt.day
    df['weekday'] = df['ÏòÅÏóÖÏùºÏûê'].dt.weekday
    df['is_weekend'] = (df['weekday'] >= 5).astype(int)

    df['open_hours'] = 11
    df.loc[df['weekday'] == 5, 'open_hours'] = 6
    df.loc[df['weekday'] == 6, 'open_hours'] = 0
    return df

def add_lag_features(df):
    df['lag1'] = df['ÏùºÎß§Ï∂ú'].shift(1)
    df['lag7'] = df['ÏùºÎß§Ï∂ú'].shift(7)
    df['lag14'] = df['ÏùºÎß§Ï∂ú'].shift(14)
    df['lag28'] = df['ÏùºÎß§Ï∂ú'].shift(28)

    df['roll_mean7'] = df['ÏùºÎß§Ï∂ú'].rolling(7).mean()
    df['roll_mean14'] = df['ÏùºÎß§Ï∂ú'].rolling(14).mean()
    df['roll_mean28'] = df['ÏùºÎß§Ï∂ú'].rolling(28).mean()

    df['roll_std7'] = df['ÏùºÎß§Ï∂ú'].rolling(7).std()
    df['roll_std28'] = df['ÏùºÎß§Ï∂ú'].rolling(28).std()
    return df



# =========================================================
# 3) GRU Dataset
# =========================================================
def create_sequences(df, feature_cols, seq_len=28):
    X, y = [], []
    values = df[feature_cols].values
    targets = df['ÏùºÎß§Ï∂ú'].values
    for i in range(seq_len, len(df)):
        X.append(values[i-seq_len:i])
        y.append(targets[i])
    return np.array(X), np.array(y)


class SequenceDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32).unsqueeze(1)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]



# =========================================================
# 4) GRU Î™®Îç∏ Ï†ïÏùò
# =========================================================
class GRUModel(nn.Module):
    def __init__(self, input_dim, hidden_dim=64, num_layers=2):
        super().__init__()
        self.gru = nn.GRU(
            input_dim,
            hidden_dim,
            num_layers=num_layers,
            batch_first=True
        )
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        out, _ = self.gru(x)
        return self.fc(out[:, -1, :])



# =========================================================
# 5) GRU Autoregressive ÏòàÏ∏°
# =========================================================
def autoregressive_forecast(model, df_train, df_test, seq_len, feature_cols, scaler_X, scaler_y):

    history = df_train.copy()
    preds = []

    for i in range(len(df_test)):
        row = df_test.iloc[i].copy()
        last_vals = history['ÏùºÎß§Ï∂ú'].values

        row['lag1'] = last_vals[-1]
        row['lag7'] = last_vals[-7]
        row['lag14'] = last_vals[-14]
        row['lag28'] = last_vals[-28]

        row['roll_mean7'] = pd.Series(last_vals[-7:]).mean()
        row['roll_mean14'] = pd.Series(last_vals[-14:]).mean()
        row['roll_mean28'] = pd.Series(last_vals[-28:]).mean()
        row['roll_std7'] = pd.Series(last_vals[-7:]).std()
        row['roll_std28'] = pd.Series(last_vals[-28:]).std()

        seq_df = history.tail(seq_len).copy()
        seq_scaled = scaler_X.transform(seq_df[feature_cols])
        X = torch.tensor(seq_scaled, dtype=torch.float32).unsqueeze(0)

        pred_scaled = model(X).item()
        pred = scaler_y.inverse_transform([[pred_scaled]])[0][0]
        pred = max(pred, 0)

        preds.append(pred)

        row['ÏùºÎß§Ï∂ú'] = pred
        history = pd.concat([history, pd.DataFrame([row])], ignore_index=True)

    return preds



# =========================================================
# 6) SARIMAX ÏòàÏ∏° (exog Í∏∞Î∞ò)
# =========================================================
def sarimax_forecast(train_df, test_df, exog_cols):

    # SARIMAX Í∏∞Î≥∏ ÌååÎùºÎØ∏ÌÑ∞ (ÏöîÏùº Ï£ºÍ∏∞)
    order = (1,1,1)
    seasonal = (1,1,1,7)

    history = train_df.copy()
    preds = []

    for i in range(len(test_df)):

        model = SARIMAX(
            history["ÏùºÎß§Ï∂ú"],
            exog=history[exog_cols],
            order=order,
            seasonal_order=seasonal,
            enforce_stationarity=False,
            enforce_invertibility=False
        )

        fit = model.fit(disp=False)

        exog_next = test_df.loc[[i], exog_cols].fillna(0)

        forecast = fit.predict(
            start=len(history),
            end=len(history),
            exog=exog_next
        )

        pred = float(forecast.values[0])
        pred = max(pred, 0)

        preds.append(pred)

        new_row = test_df.iloc[i].copy()
        new_row["ÏùºÎß§Ï∂ú"] = pred
        history = pd.concat([history, pd.DataFrame([new_row])], ignore_index=True)

    return preds



# =========================================================
# 7) Îç∞Ïù¥ÌÑ∞ Î°úÎìú
# =========================================================
train = pd.read_csv('/content/drive/MyDrive/·ÑÄ·Öµ·ÑÄ·Ö®·Ñí·Ö°·Ü®·Ñâ·Ö≥·Ü∏/·Ñê·Öµ·Ü∑·Ñë·Ö≥·ÜØ/Data/POS_train_val.csv')
test = pd.read_csv('/content/drive/MyDrive/·ÑÄ·Öµ·ÑÄ·Ö®·Ñí·Ö°·Ü®·Ñâ·Ö≥·Ü∏/·Ñê·Öµ·Ü∑·Ñë·Ö≥·ÜØ/Data/POS_test.csv')

train['ÏòÅÏóÖÏùºÏûê'] = pd.to_datetime(train['ÏòÅÏóÖÏùºÏûê'])
test['ÏòÅÏóÖÏùºÏûê'] = pd.to_datetime(test['ÏòÅÏóÖÏùºÏûê'])

train = clean_sales(train)
test = clean_sales(test)



# ----------------- ÌïôÏÇ¨ÏùºÏ†ï merge -----------------
academic = pd.read_csv('/content/drive/MyDrive/·ÑÄ·Öµ·ÑÄ·Ö®·Ñí·Ö°·Ü®·Ñâ·Ö≥·Ü∏/·Ñê·Öµ·Ü∑·Ñë·Ö≥·ÜØ/Data/·Ñí·Ö°·Ü®·Ñâ·Ö°·Ñã·Öµ·ÜØ·Ñå·Ö•·Üº_·Ñå·Ö•·Üº·ÑÖ·Öµ(2325).csv')
academic['ÏòÅÏóÖÏùºÏûê'] = pd.to_datetime(academic['date'])
academic = academic.drop(columns=['date'])

weekday_map = {'mon':0,'tue':1,'wed':2,'thu':3,'fri':4,'sat':5,'sun':6}
academic['acad_weekday'] = academic['weekday'].map(weekday_map)
academic = academic.drop(columns=['weekday'])

academic = academic.rename(columns={
    'weekend':'acad_weekend',
    'holiday':'acad_holiday',
    'semester':'acad_semester',
    'seasonal':'acad_seasonal',
    'exam':'acad_exam',
    'ceremony':'acad_ceremony',
    'dormitory':'acad_dormitory'
})

train = train.merge(academic, on='ÏòÅÏóÖÏùºÏûê', how='left')
test = test.merge(academic, on='ÏòÅÏóÖÏùºÏûê', how='left')

train = add_date_features(train)
test = add_date_features(test)

train = add_lag_features(train).dropna().reset_index(drop=True)
test = add_lag_features(test)



# =========================================================
# 8) GRU ÌïôÏäµ
# =========================================================
feature_cols = [
    'acad_weekend','acad_semester','acad_weekday',
    'open_hours','acad_ceremony','acad_exam',
    'lag1','lag7','lag14','lag28',
    'roll_std7','roll_std28',
    'roll_mean7','roll_mean14','roll_mean28'
]

scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
train[feature_cols] = scaler_X.fit_transform(train[feature_cols])
train[['ÏùºÎß§Ï∂ú']] = scaler_y.fit_transform(train[['ÏùºÎß§Ï∂ú']])

seq_len = 28
X_train, y_train = create_sequences(train, feature_cols, seq_len)

dataset = SequenceDataset(X_train, y_train)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

model = GRUModel(input_dim=len(feature_cols), num_layers=2)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(50):
    total_loss = 0
    for xb, yb in loader:
        optimizer.zero_grad()
        loss = criterion(model(xb), yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"[Epoch {epoch+1}/50] Loss: {total_loss:.4f}")



# =========================================================
# 9) GRU + SARIMAX ÏòàÏ∏° + Blending
# =========================================================
gru_pred = autoregressive_forecast(model, train, test, seq_len, feature_cols, scaler_X, scaler_y)

# SARIMAX exog subset
exog_cols = [
    'acad_weekend','acad_semester','acad_weekday',
    'acad_holiday','acad_seasonal','acad_exam',
    'acad_ceremony','acad_dormitory',
    'month','weekday','is_weekend','open_hours'
]

sarimax_pred = sarimax_forecast(train, test, exog_cols)

# ---------- Blending ----------
alpha = 0.7     # GRU Í∞ÄÏ§ëÏπò
final_pred = alpha * np.array(gru_pred) + (1-alpha) * np.array(sarimax_pred)



# =========================================================
# 10) ÌèâÍ∞Ä
# =========================================================
mae, rmse, smape_val = evaluate(test['ÏùºÎß§Ï∂ú'], final_pred)
print("\n====== GRU + SARIMAX Blending Í≤∞Í≥º ======")
print("MAE   :", mae)
print("RMSE  :", rmse)
print("SMAPE :", smape_val)



# =========================================================
# 11) CSV Ï†ÄÏû•
# =========================================================
output = test[['ÏòÅÏóÖÏùºÏûê']].copy()
output['GRU'] = gru_pred
output['SARIMAX'] = sarimax_pred
output['Blended'] = final_pred

output.to_csv("gru_sarimax_blended.csv", index=False)
print("\nCSV Ï†ÄÏû• ÏôÑÎ£å ‚Üí gru_sarimax_blended.csv")


[Epoch 1/50] Loss: 0.8714
[Epoch 2/50] Loss: 0.5253
[Epoch 3/50] Loss: 0.4432
[Epoch 4/50] Loss: 0.3666
[Epoch 5/50] Loss: 0.3487
[Epoch 6/50] Loss: 0.3307
[Epoch 7/50] Loss: 0.3320
[Epoch 8/50] Loss: 0.3193
[Epoch 9/50] Loss: 0.3023
[Epoch 10/50] Loss: 0.2984
[Epoch 11/50] Loss: 0.2922
[Epoch 12/50] Loss: 0.2591
[Epoch 13/50] Loss: 0.2478
[Epoch 14/50] Loss: 0.2435
[Epoch 15/50] Loss: 0.2300
[Epoch 16/50] Loss: 0.2335
[Epoch 17/50] Loss: 0.2289
[Epoch 18/50] Loss: 0.2137
[Epoch 19/50] Loss: 0.2119
[Epoch 20/50] Loss: 0.2062
[Epoch 21/50] Loss: 0.2119
[Epoch 22/50] Loss: 0.2106
[Epoch 23/50] Loss: 0.2130
[Epoch 24/50] Loss: 0.2027
[Epoch 25/50] Loss: 0.2083
[Epoch 26/50] Loss: 0.2082
[Epoch 27/50] Loss: 0.2012
[Epoch 28/50] Loss: 0.1947
[Epoch 29/50] Loss: 0.2009
[Epoch 30/50] Loss: 0.1999
[Epoch 31/50] Loss: 0.2094
[Epoch 32/50] Loss: 0.2004
[Epoch 33/50] Loss: 0.1923
[Epoch 34/50] Loss: 0.1984
[Epoch 35/50] Loss: 0.1918
[Epoch 36/50] Loss: 0.2006
[Epoch 37/50] Loss: 0.1884
[Epoch 38/