In [None]:
import os
import itertools
import numpy as np
import pandas as pd
import torch
from torch import nn
import torch.optim as optim
from tqdm import tqdm

# =============================================================================
# 0. Config
# =============================================================================
CSV_IN            = "dataset.csv"
RESULTS_CSV       = "window_result.csv"
LOOK_BACK_list    = [10, 15, 20, 25, 30]       # example values
WIN_SIZE_list     = [4, 6, 8]
OVERLAP_list      = [0.25, 0.5, 0.75]
HIDDEN_SIZE       = 8
OUTPUT_FEATURES_NUM = 1
NUM_LAYERS        = 1
MAX_EPOCHS        = 1000
LEARNING_RATE     = 0.05
CIRCLE            = 5

# Prepare results file
columns = ["LOOK_BACK","WIN_SIZE","OVERLAP","WIN_STEP","N_WINDOWS","MSE","RMSE","RAE","R2"]
if not os.path.exists(RESULTS_CSV):
    pd.DataFrame(columns=columns).to_csv(RESULTS_CSV, index=False)

# =============================================================================
# 1. Data Import + Feature Builder
# =============================================================================
def import_TSD_data(filename=CSV_IN):
    df = pd.read_csv(filename)
    trend  = df["rate"].values.astype("float32")
    cycle  = df["cycle"].values.astype("float32")
    effect = df["indicator_norm"].values.astype("float32")
    train_size = int(0.6 * len(trend))
    test_size  = len(trend) - train_size
    return trend, cycle, effect, train_size, test_size

def build_stats_features(trend, cycle, effect, look_back, win_size, win_step):
    target_offset = 2 * look_back
    n_samples = len(trend) - (look_back + target_offset)
    X, y = [], []
    for i in range(n_samples):
        r_seg = trend[i:i+look_back]
        c_seg = cycle[i:i+look_back]
        feats = []
        for w in range(0, look_back - win_size + 1, win_step):
            r_win = r_seg[w:w+win_size]
            c_win = c_seg[w:w+win_size]
            feats.extend([
                r_win.min(), r_win.max(), r_win.mean(), r_win.std(ddof=0),
                c_win.min(), c_win.max(), c_win.mean(), c_win.std(ddof=0)
            ])
        feats.append(effect[i+target_offset])
        X.append(feats)
        y.append(trend[i+target_offset])
    return np.array(X, dtype="float32"), np.array(y, dtype="float32")

# =============================================================================
# 2. GRU Model
# =============================================================================
class GRU_model(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super().__init__()
        self.gru = nn.GRU(input_size-1, hidden_size, num_layers)
        self.head = nn.Linear(hidden_size+1, output_size)
    def forward(self, x):
        seq, batch, _ = x.shape
        data = x[:,:, : -1]
        eff  = x[:,:,-1].unsqueeze(2)
        out, _ = self.gru(data)
        out = torch.cat([out, eff], dim=2).view(seq*batch, -1)
        out = self.head(out)
        return out.view(seq, batch, -1)

# =============================================================================
# 3. Training/Eval
# =============================================================================
def TSD_GRU(trend, cycle, effect, train_size, look_back, win_size, win_step):
    tr_tr, te_tr = trend[:train_size], trend[train_size:]
    tr_cy, te_cy = cycle[:train_size], cycle[train_size:]
    tr_eff, te_eff = effect[:train_size], effect[train_size:]

    X_tr, y_tr = build_stats_features(tr_tr, tr_cy, tr_eff, look_back, win_size, win_step)
    X_te, y_te = build_stats_features(te_tr, te_cy, te_eff, look_back, win_size, win_step)

    bs_tr, bs_te = X_tr.shape[0], X_te.shape[0]
    feat_dim = X_tr.shape[1]

    X_tr_t = torch.from_numpy(X_tr).reshape(1, bs_tr, feat_dim)
    y_tr_t = torch.from_numpy(y_tr).reshape(1, bs_tr, OUTPUT_FEATURES_NUM)
    X_te_t = torch.from_numpy(X_te).reshape(1, bs_te, feat_dim)
    y_te_t = torch.from_numpy(y_te).reshape(1, bs_te, OUTPUT_FEATURES_NUM)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = GRU_model(feat_dim, HIDDEN_SIZE, OUTPUT_FEATURES_NUM, NUM_LAYERS).to(device)
    opt = optim.Adam(model.parameters(), lr=LEARNING_RATE)
    loss_fn = nn.MSELoss()

    MSEs, RMSEs, RAEs, R2s = [], [], [], []
    for _ in range(CIRCLE):
        model.apply(lambda m: m.reset_parameters() if hasattr(m, "reset_parameters") else None)
        for _ in tqdm(range(MAX_EPOCHS), desc="Training"):
            opt.zero_grad()
            out = model(X_tr_t.to(device))
            loss = loss_fn(out, y_tr_t.to(device))
            loss.backward(); opt.step()
        with torch.no_grad():
            pred = model(X_te_t.to(device)).cpu().view(-1).numpy()
        actual = y_te_t.view(-1).numpy()
        err    = pred - actual
        RSS    = np.sum(err**2)

        # MSE, RMSE (no change)
        mse = RSS / len(err)
        MSEs.append(mse)
        RMSEs.append(np.sqrt(mse))

        # RAE with guard
        denom = np.sum(np.abs(actual - actual.mean()))
        if denom > 0:
            rae = np.sum(np.abs(err)) / denom
        else:
            rae = 0.0
        RAEs.append(rae)

        # R2 with guard
        tss = np.sum((actual - actual.mean())**2)
        if tss > 0:
            r2 = 1 - RSS / tss
        else:
            r2 = 1.0
        R2s.append(r2)

    return np.mean(MSEs), np.mean(RMSEs), np.mean(RAEs), np.mean(R2s)

# =============================================================================
# 4. Grid Search + Append Results
# =============================================================================
trend, cycle, effect, train_size, test_size = import_TSD_data()

for LOOK_BACK in LOOK_BACK_list:
    for WIN_SIZE in WIN_SIZE_list:
        for OVERLAP in OVERLAP_list:
            WIN_STEP = max(1, int(WIN_SIZE*(1-OVERLAP)))
            N_WINDOWS = (LOOK_BACK - WIN_SIZE)//WIN_STEP + 1
            INPUT_FEATURES_NUM = N_WINDOWS*8 + 1

            mse, rmse, rae, r2 = TSD_GRU(trend, cycle, effect,
                                         train_size, LOOK_BACK, WIN_SIZE, WIN_STEP)

            row = {
                "LOOK_BACK": LOOK_BACK,
                "WIN_SIZE": WIN_SIZE,
                "OVERLAP": OVERLAP,
                "WIN_STEP": WIN_STEP,
                "N_WINDOWS": N_WINDOWS,
                "MSE": mse,
                "RMSE": rmse,
                "RAE": rae,
                "R2": r2
            }
            pd.DataFrame([row]).to_csv(RESULTS_CSV,
                                       mode='a',
                                       header=False,
                                       index=False)
