In [32]:
import numpy as np
import pandas as pd
import torch.nn as nn
import numpy as np
import pandas as pd
from torch.utils.data import Dataset
from torch.utils.data import Dataset, DataLoader

class TimeSeriesDataset(Dataset):
    def __init__(self, df, input_window=14, output_window=14, target_col_name='transaction_count'):
        """
        df               : A pandas DataFrame that includes both features and the target column.
        input_window     : Number of time steps used as input (lookback).
        output_window    : Number of time steps to forecast (e.g. 14 for 14-day).
        target_col_name  : Name of the target column in df.
        """
        # Separate the features vs. target
        self.X_data = df.values  # all columns except target
        self.y_data = df[target_col_name].values               

        self.input_window = input_window
        self.output_window = output_window

    def __len__(self):
        # The maximum valid index is total_rows - (input_window + output_window)
        return len(self.X_data) - (self.input_window + self.output_window - 1)

    def __getitem__(self, idx):
        # 1) Slice out the input window
        x_start = idx
        x_end   = idx + self.input_window
        X = self.X_data[x_start : x_end]   # shape => (input_window, num_features)

        # 2) Slice out the next 'output_window' points of the target
        y_start = x_end
        y_end   = x_end + self.output_window
        Y = self.y_data[y_start : y_end]   # shape => (output_window,)

        # Convert to float32 for PyTorch
        X = X.astype(np.float32)
        Y = Y.astype(np.float32)
        return X, Y


In [33]:
class TCNBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, dilation=1):
        super().__init__()
        padding = (kernel_size - 1) * dilation
        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size, 
                              stride=1, padding=padding, dilation=dilation)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2) 
        
    def forward(self, x):
        """
        x shape: [batch_size, channels, seq_len]
        """
        out = self.conv(x)
        # Remove extra time-steps from padding to maintain causality
        out = out[:, :, :-self.conv.padding[0]]  # remove the last "padding" points
        out = self.relu(out)
        out = self.dropout(out)
        return out
    
class TCN(nn.Module):
    def __init__(self, in_channels, channel_list, kernel_size=3, dropout=0.2):
        super().__init__()
        blocks = []
        current_in = in_channels
        for i, out_ch in enumerate(channel_list):
            dilation = 2 ** i
            block = TCNBlock(current_in, out_ch, kernel_size, dilation=dilation)
            blocks.append(block)
            current_in = out_ch
        self.network = nn.Sequential(*blocks)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = self.network(x)
        x = self.dropout(x)
        return x

    
class TCNForecastingModel(nn.Module):
    def __init__(self, num_inputs, num_channels, output_size, kernel_size=2, dropout=0.2, input_window=30):
        super(TCNForecastingModel, self).__init__()
        self.tcn = TCN(num_inputs, num_channels, kernel_size, dropout)
        self.fc = nn.Linear(num_channels[-1], output_size)
        self.input_window = input_window

    def forward(self, x):
        x = x.permute(0, 2, 1)
        y = self.tcn(x)
        y = y[:, :, -1]
        return self.fc(y)

In [34]:
# Load data
import os

os.chdir("/Users/megan/Thesis/")
print("Current working directory:", os.getcwd())
data = pd.read_csv('data/top_800_product_groups.csv')
data['date'] = pd.to_datetime(data['date'], errors='coerce')

data["day_of_week"] = data["date"].dt.dayofweek 
data["day_of_month"] = data["date"].dt.day
data["month"] = data["date"].dt.month
data["day_of_year"] = data["date"].dt.dayofyear
data["year"] = data["date"].dt.year

# Define numerical columns (exclude 'product_group' and 'date')
numerical_columns = [
    'transaction_count',
    'avg_price',
    'unique_customers',
    'unique_articles_sold',
    'median_age',
    'fashion_news_subscribers',
    'first_purchase_days_ago',
    'recent_purchase_days_ago',
    'day_of_week',
    'day_of_month',
    'month',
    'day_of_year',
    'year'
]

categorical_columns = [
    'sales_channel_2',
    'most_common_age_bin_20-29',
    'most_common_age_bin_30-39',
    'most_common_age_bin_40-49',
    'most_common_age_bin_50-59',
    'most_common_age_bin_60+'
]

Current working directory: /Users/megan/Thesis


In [35]:
categorical_columns = [
    'sales_channel_2', 'most_common_age_bin_20-29', 'most_common_age_bin_30-39',
    'most_common_age_bin_40-49', 'most_common_age_bin_50-59', 'most_common_age_bin_60+'
]

# One-hot encode the categorical columns
data = pd.get_dummies(data, columns=categorical_columns, dtype=int)

data.drop(columns=['std_price', 'club_member_ratio'], inplace=True)



In [36]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CausalConv1d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, dilation=1):
        """
        A 1D convolution that pads only on the left to ensure causality.
        """
        super(CausalConv1d, self).__init__()
        self.kernel_size = kernel_size
        self.dilation = dilation
        # No padding here; we will pad manually in forward.
        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size,
                              padding=0, dilation=dilation)
        
    def forward(self, x):
        # Calculate the required left padding: (kernel_size-1) * dilation
        pad = (self.kernel_size - 1) * self.dilation
        # Pad only on the left: (left, right)
        x = F.pad(x, (pad, 0))
        return self.conv(x)

class TemporalBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, dilation, dropout=0.2):
        super(TemporalBlock, self).__init__()
        # Use CausalConv1d instead of regular Conv1d with symmetric padding.
        self.conv1 = CausalConv1d(in_channels, out_channels, kernel_size, dilation=dilation)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout)
        
        self.conv2 = CausalConv1d(out_channels, out_channels, kernel_size, dilation=dilation)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout)
        
        self.net = nn.Sequential(
            self.conv1, self.relu1, self.dropout1,
            self.conv2, self.relu2, self.dropout2
        )
        self.downsample = nn.Conv1d(in_channels, out_channels, 1) \
                          if in_channels != out_channels else None
        self.relu = nn.ReLU()
        
    def forward(self, x):
        out = self.net(x)
        res = x if self.downsample is None else self.downsample(x)
        return self.relu(out + res)

In [37]:
top_10_groups = [
    "Bra Black Solid",
    "Dress Black Solid",
    "Leggings/Tights Black Solid",
    "Sweater Black Solid",
    "T-shirt Black Solid",
    "T-shirt White Solid",
    "Top Black Solid",
    "Trousers Black Solid",
    "Trousers Blue Denim",
    "Vest top Black Solid"
]

# remove top 10 products from pretrain data
pretrain_data = data[~data['product_group'].isin(top_10_groups)].copy()
top10_data = data[data['product_group'].isin(top_10_groups)].copy()

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader

pretrain_data = pretrain_data.sort_values(["date", "product_group"]).drop(columns=['date', 'product_group'])

# Take out the last 20% of data (never used in training or validation)
total_samples = len(pretrain_data)
leave_out_size = int(total_samples * 0.2)  # 20% to leave out
usable_data = pretrain_data.iloc[:-leave_out_size]  # Keep 80% for pretraining

# Split remaining 80% into 80% train, 20% validation
split_idx = int(len(usable_data) * 0.8)
train_data = usable_data.iloc[:split_idx]
val_data = usable_data.iloc[split_idx:]

feature_scaler = MinMaxScaler()
target_scaler = MinMaxScaler()

X_train_scaled = feature_scaler.fit_transform(train_data.drop(columns=['transaction_count']))
y_train_scaled = target_scaler.fit_transform(train_data[['transaction_count']])

X_val_scaled = feature_scaler.transform(val_data.drop(columns=['transaction_count']))
y_val_scaled = target_scaler.transform(val_data[['transaction_count']])

# Create scaled DataFrames
train_df_scaled = pd.DataFrame(X_train_scaled, columns=train_data.columns.drop('transaction_count'))
train_df_scaled['transaction_count'] = y_train_scaled

val_df_scaled = pd.DataFrame(X_val_scaled, columns=val_data.columns.drop('transaction_count'))
val_df_scaled['transaction_count'] = y_val_scaled

train_dataset = TimeSeriesDataset(train_df_scaled, input_window=14, output_window=1, target_col_name='transaction_count')
val_dataset = TimeSeriesDataset(val_df_scaled, input_window=14, output_window=1, target_col_name='transaction_count')

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

print(f"Train size: {len(train_df_scaled)}, Validation size: {len(val_df_scaled)}, Left-out test size: {leave_out_size}")


Train size: 337648, Validation size: 84412, Left-out test size: 105515


In [39]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Subset
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from modules.utils import process_name

all_groups_results = {}
output_dir = "final_version/output/1_day/transfer_finetuned"
os.makedirs(output_dir, exist_ok=True)

for product_group in top_10_groups:
    print(f"\n=== Running for {product_group} ===")
    
    # Create a folder for the product group
    sanitized_group = process_name(product_group)
    group_output_dir = os.path.join(output_dir, sanitized_group)
    os.makedirs(group_output_dir, exist_ok=True)
    
    # 1. Filter and preprocess data for the product group
    # (Using the top10_data split defined earlier; here we filter by product_group)
    group_df = data[data['product_group'] == product_group].copy()
    # Optionally, if you want to use dates for plotting, you could keep the 'date' column.
    # For now, we drop it (along with 'product_group') as before.
    group_df = group_df.drop(columns=['date', 'product_group'])
    
    # Scale data (using the scalers fitted on pretrain_data)
    X_group = group_df.drop(columns=['transaction_count']).values
    y_group = group_df['transaction_count'].values.reshape(-1, 1)
    X_group_scaled = feature_scaler.transform(X_group)
    y_group_scaled = target_scaler.transform(y_group)
    
    group_df_scaled = pd.DataFrame(X_group_scaled, columns=[col for col in group_df.columns if col != 'transaction_count'])
    group_df_scaled['transaction_count'] = y_group_scaled
    group_df_scaled = group_df_scaled.sort_index()
    
    # Create the TimeSeriesDataset
    input_window, output_window = 14, 1
    target_col_name = 'transaction_count'
    group_dataset = TimeSeriesDataset(
        group_df_scaled,
        input_window=input_window,
        output_window=output_window,
        target_col_name=target_col_name
    )
    
    # Use the last 20% of the samples as the test set
    split_idx = int(0.8 * len(group_dataset))
    test_subset = Subset(group_dataset, list(range(split_idx, len(group_dataset))))
    test_loader = DataLoader(test_subset, batch_size=32, shuffle=False)
    
    # Initialize the model and load the pretrained weights
    num_features = group_df_scaled.shape[1]
    model = TCNForecastingModel(
        num_inputs=num_features,
        num_channels=[60, 60],
        output_size=1,
        kernel_size=5,
        dropout=0.3362
    )
    checkpoint = torch.load("fine_tuned_tcn.pt")
    model.load_state_dict(checkpoint, strict=False)
    
    # Evaluate the pretrained model on the test set
    model.eval()
    all_preds, all_targets = [], []
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            preds = model(X_batch)
            all_preds.append(preds.cpu().numpy())
            all_targets.append(y_batch.cpu().numpy())
    all_preds = np.concatenate(all_preds, axis=0)
    all_targets = np.concatenate(all_targets, axis=0)
    
    # Compute evaluation metrics
    mae = mean_absolute_error(all_targets, all_preds)
    rmse = np.sqrt(mean_squared_error(all_targets, all_preds))
    mape = np.mean(np.abs((all_targets - all_preds) / np.maximum(np.abs(all_targets), 1e-8))) * 100
    r2 = r2_score(all_targets, all_preds)
    
    print(f"Test Set for {product_group} => MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.2f}%, R²: {r2:.4f}")
    
    # Plot Predictions vs Actual for the entire test set
    time_steps = np.arange(len(all_targets))
    plt.figure(figsize=(12, 6))
    plt.plot(time_steps, all_targets[:, 0], label="Actual")
    plt.plot(time_steps, all_preds[:, 0], label="Predicted", linestyle='--')
    plt.xlabel("Time Step")
    plt.ylabel("Scaled Transaction Count")
    plt.title(f"Transfer Learning with TCN Model for {product_group} (1-Day Predictions)")
    plt.legend()
    pred_fig_path = os.path.join(group_output_dir, "predictions_vs_actual.png")
    plt.savefig(pred_fig_path)
    plt.close()
    
    # Plot Residuals for the entire test set
    residuals = all_targets[:, 0] - all_preds[:, 0]
    plt.figure(figsize=(12, 6))
    plt.plot(time_steps, residuals, label="Residuals", color='green')
    plt.axhline(0, color='red', linestyle='--', alpha=0.7)
    plt.xlabel("Time Step")
    plt.ylabel("Residual")
    plt.title(f"Transfer Learning for {product_group} (1-Day)")
    plt.legend()
    resid_fig_path = os.path.join(group_output_dir, "prediction_residuals.png")
    plt.savefig(resid_fig_path)
    plt.close()
    
    # Save test set metrics
    metrics_dict = {'MAE': mae, 'RMSE': rmse, 'MAPE': mape, 'R2': r2}
    metrics_df = pd.DataFrame([metrics_dict])
    metrics_df.to_csv(os.path.join(group_output_dir, "test_metrics.csv"), index=False)
    
    # Store results
    all_groups_results[product_group] = {'metrics': metrics_dict}

print("\nAll groups processed. Figures and metrics saved in:", output_dir)



=== Running for Bra Black Solid ===
Test Set for Bra Black Solid => MAE: 0.1770, RMSE: 0.1868, MAPE: 93.28%, R²: -8.8590


  checkpoint = torch.load("fine_tuned_tcn.pt")



=== Running for Dress Black Solid ===
Test Set for Dress Black Solid => MAE: 0.3409, RMSE: 0.3786, MAPE: 96.31%, R²: -4.2867


  checkpoint = torch.load("fine_tuned_tcn.pt")



=== Running for Leggings/Tights Black Solid ===
Test Set for Leggings/Tights Black Solid => MAE: 0.1167, RMSE: 0.1266, MAPE: 83.95%, R²: -5.4521


  checkpoint = torch.load("fine_tuned_tcn.pt")



=== Running for Sweater Black Solid ===
Test Set for Sweater Black Solid => MAE: 0.0809, RMSE: 0.1277, MAPE: 72.63%, R²: -0.5721


  checkpoint = torch.load("fine_tuned_tcn.pt")



=== Running for T-shirt Black Solid ===
Test Set for T-shirt Black Solid => MAE: 0.1668, RMSE: 0.1735, MAPE: 89.89%, R²: -12.5152


  checkpoint = torch.load("fine_tuned_tcn.pt")



=== Running for T-shirt White Solid ===
Test Set for T-shirt White Solid => MAE: 0.1648, RMSE: 0.1756, MAPE: 91.79%, R²: -7.3954


  checkpoint = torch.load("fine_tuned_tcn.pt")



=== Running for Top Black Solid ===
Test Set for Top Black Solid => MAE: 0.1952, RMSE: 0.2061, MAPE: 93.59%, R²: -8.7306


  checkpoint = torch.load("fine_tuned_tcn.pt")



=== Running for Trousers Black Solid ===
Test Set for Trousers Black Solid => MAE: 0.3039, RMSE: 0.3158, MAPE: 96.18%, R²: -12.5365


  checkpoint = torch.load("fine_tuned_tcn.pt")



=== Running for Trousers Blue Denim ===
Test Set for Trousers Blue Denim => MAE: 0.1632, RMSE: 0.1732, MAPE: 92.80%, R²: -7.8939


  checkpoint = torch.load("fine_tuned_tcn.pt")



=== Running for Vest top Black Solid ===
Test Set for Vest top Black Solid => MAE: 0.2149, RMSE: 0.2359, MAPE: 92.87%, R²: -5.0461


  checkpoint = torch.load("fine_tuned_tcn.pt")



All groups processed. Figures and metrics saved in: final_version/output/1_day/transfer_finetuned


In [40]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Subset
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import optuna

from modules.utils import process_name

############################
# Optuna Objective Function
############################
def objective_fn(trial, group_df_scaled, pretrained_checkpoint):
    """
    Creates a new TimeSeriesDataset with a trial-suggested input_window,
    splits it into train/val (80/20), and trains a TCN model with trial-suggested
    hyperparameters. Returns the validation RMSE.
    """
    # === Sample hyperparameters ===
    input_window = trial.suggest_int("input_window", 7, 28, step=7)
    lr = trial.suggest_float("lr", 1e-5, 1e-3, log=True)
    dropout = trial.suggest_float("dropout", 0.0, 0.5)
    epochs = trial.suggest_int("epochs", 30, 80)

    # === Build TimeSeriesDataset with the chosen input_window ===
    dataset = TimeSeriesDataset(
        df=group_df_scaled,
        input_window=input_window,
        output_window=1,
        target_col_name="transaction_count"
    )

    # === Split into train/val (80/20) ===
    total_samples = len(dataset)
    train_size = int(0.8 * total_samples)
    train_indices = range(train_size)
    val_indices = range(train_size, total_samples)

    train_loader = DataLoader(Subset(dataset, train_indices), batch_size=32, shuffle=False)
    val_loader = DataLoader(Subset(dataset, val_indices), batch_size=32, shuffle=False)

    # === Build model (fixed architecture except dropout) ===
    #    If you must keep the same number of channels as the pretrained model, do so here:
    model = TCNForecastingModel(
        num_inputs=group_df_scaled.shape[1],
        num_channels=[64, 64],  # Must match pretrained if you want to load all weights
        output_size=1,
        kernel_size=3,
        dropout=dropout
    )

    # === Load pretrained weights ===
    checkpoint = torch.load(pretrained_checkpoint)
    model.load_state_dict(checkpoint, strict=False)

    # === Fine-tune model on train split ===
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    model.train()
    for epoch in range(epochs):
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            preds = model(X_batch)
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()

    # === Evaluate on validation split ===
    model.eval()
    all_val_preds, all_val_targets = [], []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            preds = model(X_batch)
            all_val_preds.append(preds.cpu().numpy())
            all_val_targets.append(y_batch.cpu().numpy())

    all_val_preds = np.concatenate(all_val_preds, axis=0)
    all_val_targets = np.concatenate(all_val_targets, axis=0)

    val_rmse = np.sqrt(mean_squared_error(all_val_targets, all_val_preds))
    return val_rmse


############################
# Main Loop
############################
all_groups_results = {}
output_dir = "final_version/output/1_day/transfer"
os.makedirs(output_dir, exist_ok=True)

for product_group in top_10_groups:
    print(f"\n=== Running for {product_group} ===")
    
    # Create output folder
    sanitized_group = process_name(product_group)
    group_output_dir = os.path.join(output_dir, sanitized_group)
    os.makedirs(group_output_dir, exist_ok=True)
    
    # === Load and scale data for this product group ===
    group_df = data[data['product_group'] == product_group].copy()
    group_df = group_df.drop(columns=['date', 'product_group'])
    
    X_group = group_df.drop(columns=['transaction_count']).values
    y_group = group_df['transaction_count'].values.reshape(-1, 1)
    
    X_group_scaled = feature_scaler.transform(X_group)
    y_group_scaled = target_scaler.transform(y_group)
    
    group_df_scaled = pd.DataFrame(
        X_group_scaled,
        columns=[col for col in group_df.columns if col != 'transaction_count']
    )
    group_df_scaled['transaction_count'] = y_group_scaled
    group_df_scaled = group_df_scaled.sort_index()
    
    # === Optuna hyperparameter search (including input_window) ===
    study = optuna.create_study(direction="minimize")
    study.optimize(
        lambda trial: objective_fn(
            trial=trial,
            group_df_scaled=group_df_scaled,
            pretrained_checkpoint="pretrained_tcn_1_day.pt"
        ),
        n_trials=30
    )
    
    best_params = study.best_trial.params
    print(f"Best parameters for {product_group}: {best_params}")
    
    # === Rebuild final dataset with best input_window ===
    final_input_window = best_params["input_window"]
    final_dataset = TimeSeriesDataset(
        df=group_df_scaled,
        input_window=final_input_window,
        output_window=1,
        target_col_name="transaction_count"
    )
    
    # === Final train/test split (80/20) ===
    total_samples = len(final_dataset)
    test_split = int(0.8 * total_samples)
    final_train_indices = range(test_split)
    final_test_indices = range(test_split, total_samples)

    final_train_loader = DataLoader(Subset(final_dataset, final_train_indices), batch_size=32, shuffle=False)
    final_test_loader = DataLoader(Subset(final_dataset, final_test_indices), batch_size=32, shuffle=False)

    # === Build final model using best hyperparams ===
    model_final = TCNForecastingModel(
        num_inputs=group_df_scaled.shape[1],
        num_channels=[64, 64],  # Must match pretrained
        output_size=1,
        kernel_size=3,
        dropout=best_params["dropout"]
    )
    checkpoint = torch.load("pretrained_tcn_1_day.pt")
    model_final.load_state_dict(checkpoint, strict=False)

    criterion = nn.MSELoss()
    optimizer = optim.Adam(model_final.parameters(), lr=best_params["lr"])
    final_epochs = best_params["epochs"]
    
    # === Final training on 80% data ===
    model_final.train()
    for epoch in range(final_epochs):
        epoch_loss = 0.0
        for X_batch, y_batch in final_train_loader:
            optimizer.zero_grad()
            preds = model_final(X_batch)
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        avg_loss = epoch_loss / len(final_train_loader)
        print(f"Epoch {epoch+1}/{final_epochs}, Loss: {avg_loss:.6f}")
    
    # === Evaluate final model on the test set ===
    model_final.eval()
    all_preds, all_targets = [], []
    with torch.no_grad():
        for X_batch, y_batch in final_test_loader:
            preds = model_final(X_batch)
            all_preds.append(preds.cpu().numpy())
            all_targets.append(y_batch.cpu().numpy())
  
    all_preds = np.concatenate(all_preds, axis=0)
    all_targets = np.concatenate(all_targets, axis=0)

    all_preds_unscaled = target_scaler.inverse_transform(all_preds)
    all_targets_unscaled = target_scaler.inverse_transform(all_targets)

    mae = mean_absolute_error(all_targets_unscaled, all_preds_unscaled)
    rmse = np.sqrt(mean_squared_error(all_targets_unscaled, all_preds_unscaled))
    mape = np.mean(np.abs((all_targets_unscaled - all_preds_unscaled) /
                        np.maximum(np.abs(all_targets_unscaled), 1e-8))) * 100
    r2 = r2_score(all_targets_unscaled, all_preds_unscaled)

    print(f"Final Test for {product_group} => MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.2f}%, R²: {r2:.4f}")
    
    # === Plot Predictions vs. Actual ===
    time_steps = np.arange(len(all_targets))
    plt.figure(figsize=(12, 6))
    plt.plot(time_steps, all_targets[:, 0], label="Actual", color='blue')
    plt.plot(time_steps, all_preds[:, 0], label="Predicted", color='red', linestyle='--')
    plt.xlabel("Time Step")
    plt.ylabel("Scaled Transaction Count")
    plt.title(f"Final TCN Model for {product_group} (Input Window={final_input_window})")
    plt.legend()
    plt.savefig(os.path.join(group_output_dir, "predictions_vs_actual.png"))
    plt.close()
    
    # === Plot Residuals ===
    residuals = all_targets[:, 0] - all_preds[:, 0]
    plt.figure(figsize=(12, 6))
    plt.plot(time_steps, residuals, label="Residuals", color='green')
    plt.axhline(0, color='red', linestyle='--', alpha=0.7)
    plt.xlabel("Time Step")
    plt.ylabel("Residual")
    plt.title(f"{product_group}: Residuals (Test Set)")
    plt.legend()
    plt.savefig(os.path.join(group_output_dir, "prediction_residuals.png"))
    plt.close()
    
    # === Save final metrics ===
    metrics_dict = {
        'MAE': mae,
        'RMSE': rmse,
        'MAPE': mape,
        'R2': r2
    }
    metrics_df = pd.DataFrame([metrics_dict])
    metrics_df.to_csv(os.path.join(group_output_dir, "test_metrics.csv"), index=False)
    
    # === Store results in dictionary ===
    all_groups_results[product_group] = {'best_params': best_params, 'metrics': metrics_dict}

print("\nAll groups processed. Results saved in:", output_dir)


[I 2025-03-05 13:35:16,490] A new study created in memory with name: no-name-de3b8f31-8c34-4dfd-87cd-f46c6bb99bec



=== Running for Bra Black Solid ===


  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 13:35:27,478] Trial 0 finished with value: 0.08855985590218866 and parameters: {'input_window': 7, 'lr': 0.0004741437302531928, 'dropout': 0.30508531851266407, 'epochs': 77}. Best is trial 0 with value: 0.08855985590218866.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 13:35:33,445] Trial 1 finished with value: 0.07765513027849912 and parameters: {'input_window': 7, 'lr': 0.0008794483070161946, 'dropout': 0.4688825461988075, 'epochs': 41}. Best is trial 1 with value: 0.07765513027849912.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 13:35:45,428] Trial 2 finished with value: 0.0589371157155297 and parameters: {'input_window': 28, 'lr': 8.240332200501712e-05, 'dropout': 0.0797191626094581, 'epochs': 53}. Best is trial 2 with value: 0.0589371157155297.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 13:36:00,008] Trial 3 finished with value: 0.05484298333286965 and parameters: 

Best parameters for Bra Black Solid: {'input_window': 28, 'lr': 1.0693890573918646e-05, 'dropout': 0.35990957946073043, 'epochs': 37}
Epoch 1/37, Loss: 0.006980
Epoch 2/37, Loss: 0.007139
Epoch 3/37, Loss: 0.006669
Epoch 4/37, Loss: 0.006584
Epoch 5/37, Loss: 0.006034
Epoch 6/37, Loss: 0.006031
Epoch 7/37, Loss: 0.005941
Epoch 8/37, Loss: 0.006055
Epoch 9/37, Loss: 0.005689
Epoch 10/37, Loss: 0.005559
Epoch 11/37, Loss: 0.005611
Epoch 12/37, Loss: 0.006407
Epoch 13/37, Loss: 0.006533
Epoch 14/37, Loss: 0.005916
Epoch 15/37, Loss: 0.005904
Epoch 16/37, Loss: 0.005786
Epoch 17/37, Loss: 0.005985
Epoch 18/37, Loss: 0.005927
Epoch 19/37, Loss: 0.005989
Epoch 20/37, Loss: 0.005729
Epoch 21/37, Loss: 0.005860
Epoch 22/37, Loss: 0.005702
Epoch 23/37, Loss: 0.005871
Epoch 24/37, Loss: 0.005771
Epoch 25/37, Loss: 0.005811
Epoch 26/37, Loss: 0.006022
Epoch 27/37, Loss: 0.005784
Epoch 28/37, Loss: 0.006083
Epoch 29/37, Loss: 0.005960
Epoch 30/37, Loss: 0.005868
Epoch 31/37, Loss: 0.006032
Epoch 3

[I 2025-03-05 13:40:06,403] A new study created in memory with name: no-name-a0ca083e-d2c1-4ecd-b53f-b646f6b9a53e


Final Test for Bra Black Solid => MAE: 129.8472, RMSE: 171.6338, MAPE: 24.16%, R²: 0.2218

=== Running for Dress Black Solid ===


  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 13:40:12,515] Trial 0 finished with value: 0.12573800676404687 and parameters: {'input_window': 7, 'lr': 1.675052396183423e-05, 'dropout': 0.1915863480873098, 'epochs': 43}. Best is trial 0 with value: 0.12573800676404687.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 13:40:21,871] Trial 1 finished with value: 0.1095795846779584 and parameters: {'input_window': 14, 'lr': 0.0008809434745369677, 'dropout': 0.13510025461439223, 'epochs': 63}. Best is trial 1 with value: 0.1095795846779584.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 13:40:35,478] Trial 2 finished with value: 0.12235400613032926 and parameters: {'input_window': 21, 'lr': 2.2745910752193486e-05, 'dropout': 0.15474891847310257, 'epochs': 79}. Best is trial 1 with value: 0.1095795846779584.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 13:40:42,947] Trial 3 finished with value: 0.11864933747929657 and parameters

Best parameters for Dress Black Solid: {'input_window': 28, 'lr': 0.0005732174979456921, 'dropout': 0.37732799159831903, 'epochs': 61}
Epoch 1/61, Loss: 0.018062
Epoch 2/61, Loss: 0.017834
Epoch 3/61, Loss: 0.016406
Epoch 4/61, Loss: 0.017383
Epoch 5/61, Loss: 0.018161
Epoch 6/61, Loss: 0.015657
Epoch 7/61, Loss: 0.015644
Epoch 8/61, Loss: 0.016850
Epoch 9/61, Loss: 0.016808
Epoch 10/61, Loss: 0.016114
Epoch 11/61, Loss: 0.014834
Epoch 12/61, Loss: 0.014553
Epoch 13/61, Loss: 0.015755
Epoch 14/61, Loss: 0.015736
Epoch 15/61, Loss: 0.015422
Epoch 16/61, Loss: 0.014767
Epoch 17/61, Loss: 0.016215
Epoch 18/61, Loss: 0.017146
Epoch 19/61, Loss: 0.014458
Epoch 20/61, Loss: 0.014718
Epoch 21/61, Loss: 0.014834
Epoch 22/61, Loss: 0.014714
Epoch 23/61, Loss: 0.013876
Epoch 24/61, Loss: 0.013982
Epoch 25/61, Loss: 0.013296
Epoch 26/61, Loss: 0.013451
Epoch 27/61, Loss: 0.013059
Epoch 28/61, Loss: 0.012115
Epoch 29/61, Loss: 0.013517
Epoch 30/61, Loss: 0.014280
Epoch 31/61, Loss: 0.012321
Epoch 

[I 2025-03-05 13:45:33,962] A new study created in memory with name: no-name-579fc373-7380-42c6-b742-7d2cde459069


Final Test for Dress Black Solid => MAE: 226.0490, RMSE: 332.1896, MAPE: 21.38%, R²: 0.6182

=== Running for Leggings/Tights Black Solid ===


  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 13:45:46,208] Trial 0 finished with value: 0.048293479204739254 and parameters: {'input_window': 28, 'lr': 4.902309853133715e-05, 'dropout': 0.20199690204837534, 'epochs': 64}. Best is trial 0 with value: 0.048293479204739254.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 13:45:52,835] Trial 1 finished with value: 0.04802966303846938 and parameters: {'input_window': 7, 'lr': 2.516562612309863e-05, 'dropout': 0.09220150505121488, 'epochs': 47}. Best is trial 1 with value: 0.04802966303846938.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 13:46:01,220] Trial 2 finished with value: 0.049931992254095295 and parameters: {'input_window': 21, 'lr': 9.286588245741191e-05, 'dropout': 0.13817679459685195, 'epochs': 47}. Best is trial 1 with value: 0.04802966303846938.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 13:46:07,101] Trial 3 finished with value: 0.04934922627566695 and para

Best parameters for Leggings/Tights Black Solid: {'input_window': 28, 'lr': 0.0005881009945644946, 'dropout': 0.3193703740178816, 'epochs': 69}
Epoch 1/69, Loss: 0.007250
Epoch 2/69, Loss: 0.006859
Epoch 3/69, Loss: 0.006176
Epoch 4/69, Loss: 0.005298
Epoch 5/69, Loss: 0.005225
Epoch 6/69, Loss: 0.004887
Epoch 7/69, Loss: 0.004403
Epoch 8/69, Loss: 0.004453
Epoch 9/69, Loss: 0.004461
Epoch 10/69, Loss: 0.004487
Epoch 11/69, Loss: 0.004156
Epoch 12/69, Loss: 0.004077
Epoch 13/69, Loss: 0.003875
Epoch 14/69, Loss: 0.004009
Epoch 15/69, Loss: 0.003700
Epoch 16/69, Loss: 0.003704
Epoch 17/69, Loss: 0.003935
Epoch 18/69, Loss: 0.003710
Epoch 19/69, Loss: 0.003616
Epoch 20/69, Loss: 0.003189
Epoch 21/69, Loss: 0.003581
Epoch 22/69, Loss: 0.003599
Epoch 23/69, Loss: 0.003705
Epoch 24/69, Loss: 0.003710
Epoch 25/69, Loss: 0.003822
Epoch 26/69, Loss: 0.003696
Epoch 27/69, Loss: 0.004156
Epoch 28/69, Loss: 0.003735
Epoch 29/69, Loss: 0.003409
Epoch 30/69, Loss: 0.003534
Epoch 31/69, Loss: 0.0030

[I 2025-03-05 13:51:31,539] A new study created in memory with name: no-name-254f3762-57b3-4cdd-883d-f98553b2cf69



=== Running for Sweater Black Solid ===


  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 13:51:37,728] Trial 0 finished with value: 0.04730168988300758 and parameters: {'input_window': 7, 'lr': 0.0001360538468741992, 'dropout': 0.10828030973658181, 'epochs': 43}. Best is trial 0 with value: 0.04730168988300758.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 13:51:52,429] Trial 1 finished with value: 0.0511764612581454 and parameters: {'input_window': 28, 'lr': 0.00048098578552821373, 'dropout': 0.49268538482198954, 'epochs': 77}. Best is trial 0 with value: 0.04730168988300758.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 13:52:00,911] Trial 2 finished with value: 0.06438460103098843 and parameters: {'input_window': 21, 'lr': 1.3287157385184453e-05, 'dropout': 0.30593258676727764, 'epochs': 48}. Best is trial 0 with value: 0.04730168988300758.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 13:52:07,249] Trial 3 finished with value: 0.06222185253259103 and parame

Best parameters for Sweater Black Solid: {'input_window': 28, 'lr': 0.000645683941289616, 'dropout': 0.3870790393434035, 'epochs': 46}
Epoch 1/46, Loss: 0.024913
Epoch 2/46, Loss: 0.019399
Epoch 3/46, Loss: 0.016151
Epoch 4/46, Loss: 0.014601
Epoch 5/46, Loss: 0.014025
Epoch 6/46, Loss: 0.013636
Epoch 7/46, Loss: 0.013458
Epoch 8/46, Loss: 0.014421
Epoch 9/46, Loss: 0.014733
Epoch 10/46, Loss: 0.012979
Epoch 11/46, Loss: 0.014287
Epoch 12/46, Loss: 0.013523
Epoch 13/46, Loss: 0.013538
Epoch 14/46, Loss: 0.012626
Epoch 15/46, Loss: 0.013724
Epoch 16/46, Loss: 0.014801
Epoch 17/46, Loss: 0.012736
Epoch 18/46, Loss: 0.012913
Epoch 19/46, Loss: 0.012894
Epoch 20/46, Loss: 0.012440
Epoch 21/46, Loss: 0.013367
Epoch 22/46, Loss: 0.011906
Epoch 23/46, Loss: 0.012604
Epoch 24/46, Loss: 0.011664
Epoch 25/46, Loss: 0.011587
Epoch 26/46, Loss: 0.013065
Epoch 27/46, Loss: 0.012575
Epoch 28/46, Loss: 0.013000
Epoch 29/46, Loss: 0.011613
Epoch 30/46, Loss: 0.015254
Epoch 31/46, Loss: 0.011846
Epoch 

[I 2025-03-05 13:55:58,877] A new study created in memory with name: no-name-6002d388-ee5f-4abf-a725-f20a4cd162c1



=== Running for T-shirt Black Solid ===


  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 13:56:07,487] Trial 0 finished with value: 0.042887837728931046 and parameters: {'input_window': 7, 'lr': 6.978693143955718e-05, 'dropout': 0.24369903907432622, 'epochs': 61}. Best is trial 0 with value: 0.042887837728931046.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 13:56:21,882] Trial 1 finished with value: 0.04329155776598489 and parameters: {'input_window': 28, 'lr': 3.947604842597373e-05, 'dropout': 0.3930095689990698, 'epochs': 77}. Best is trial 0 with value: 0.042887837728931046.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 13:56:26,916] Trial 2 finished with value: 0.04145062617018358 and parameters: {'input_window': 14, 'lr': 0.0004410690742008709, 'dropout': 0.02984864369546192, 'epochs': 32}. Best is trial 2 with value: 0.04145062617018358.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 13:56:41,567] Trial 3 finished with value: 0.041859646313679055 and para

Best parameters for T-shirt Black Solid: {'input_window': 21, 'lr': 0.0003628632205511113, 'dropout': 0.3069128156802553, 'epochs': 57}
Epoch 1/57, Loss: 0.005525
Epoch 2/57, Loss: 0.004783
Epoch 3/57, Loss: 0.005568
Epoch 4/57, Loss: 0.004158
Epoch 5/57, Loss: 0.004396
Epoch 6/57, Loss: 0.004351
Epoch 7/57, Loss: 0.004252
Epoch 8/57, Loss: 0.004235
Epoch 9/57, Loss: 0.004262
Epoch 10/57, Loss: 0.004188
Epoch 11/57, Loss: 0.004292
Epoch 12/57, Loss: 0.003848
Epoch 13/57, Loss: 0.003933
Epoch 14/57, Loss: 0.004279
Epoch 15/57, Loss: 0.003880
Epoch 16/57, Loss: 0.004137
Epoch 17/57, Loss: 0.003566
Epoch 18/57, Loss: 0.004100
Epoch 19/57, Loss: 0.003991
Epoch 20/57, Loss: 0.003895
Epoch 21/57, Loss: 0.003684
Epoch 22/57, Loss: 0.003638
Epoch 23/57, Loss: 0.003626
Epoch 24/57, Loss: 0.003808
Epoch 25/57, Loss: 0.003446
Epoch 26/57, Loss: 0.003546
Epoch 27/57, Loss: 0.003372
Epoch 28/57, Loss: 0.003207
Epoch 29/57, Loss: 0.003403
Epoch 30/57, Loss: 0.003606
Epoch 31/57, Loss: 0.003378
Epoch

[I 2025-03-05 14:00:59,202] A new study created in memory with name: no-name-00b17be4-e784-4cc8-91e3-48c4a1ddf775



=== Running for T-shirt White Solid ===


  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 14:01:09,625] Trial 0 finished with value: 0.04288313610719618 and parameters: {'input_window': 7, 'lr': 0.0005042828681234424, 'dropout': 0.18405560302892376, 'epochs': 60}. Best is trial 0 with value: 0.04288313610719618.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 14:01:18,202] Trial 1 finished with value: 0.03577289591010845 and parameters: {'input_window': 14, 'lr': 0.0007407467729305487, 'dropout': 0.45075225620337867, 'epochs': 44}. Best is trial 1 with value: 0.03577289591010845.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 14:01:28,546] Trial 2 finished with value: 0.040754957332894415 and parameters: {'input_window': 7, 'lr': 2.502819694029073e-05, 'dropout': 0.21103923986449524, 'epochs': 61}. Best is trial 1 with value: 0.03577289591010845.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 14:01:38,645] Trial 3 finished with value: 0.03819026931351328 and paramet

Best parameters for T-shirt White Solid: {'input_window': 14, 'lr': 0.0005865532864168691, 'dropout': 0.25935280953601125, 'epochs': 43}
Epoch 1/43, Loss: 0.008320
Epoch 2/43, Loss: 0.006126
Epoch 3/43, Loss: 0.004669
Epoch 4/43, Loss: 0.003840
Epoch 5/43, Loss: 0.003518
Epoch 6/43, Loss: 0.003520
Epoch 7/43, Loss: 0.003214
Epoch 8/43, Loss: 0.003398
Epoch 9/43, Loss: 0.003177
Epoch 10/43, Loss: 0.003686
Epoch 11/43, Loss: 0.003848
Epoch 12/43, Loss: 0.004015
Epoch 13/43, Loss: 0.004519
Epoch 14/43, Loss: 0.003645
Epoch 15/43, Loss: 0.003671
Epoch 16/43, Loss: 0.003046
Epoch 17/43, Loss: 0.003311
Epoch 18/43, Loss: 0.003341
Epoch 19/43, Loss: 0.003163
Epoch 20/43, Loss: 0.003039
Epoch 21/43, Loss: 0.003338
Epoch 22/43, Loss: 0.003202
Epoch 23/43, Loss: 0.003059
Epoch 24/43, Loss: 0.002722
Epoch 25/43, Loss: 0.002763
Epoch 26/43, Loss: 0.002869
Epoch 27/43, Loss: 0.002709
Epoch 28/43, Loss: 0.002764
Epoch 29/43, Loss: 0.002819
Epoch 30/43, Loss: 0.002929
Epoch 31/43, Loss: 0.002837
Epoc

[I 2025-03-05 14:05:55,344] A new study created in memory with name: no-name-82ccf6a4-ebf7-4d43-a1d6-b4165069a2b1



=== Running for Top Black Solid ===


  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 14:06:04,476] Trial 0 finished with value: 0.05423741240693242 and parameters: {'input_window': 21, 'lr': 0.0004045604097416296, 'dropout': 0.03238250079686278, 'epochs': 50}. Best is trial 0 with value: 0.05423741240693242.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 14:06:17,264] Trial 1 finished with value: 0.050490422268006166 and parameters: {'input_window': 28, 'lr': 3.994289817025715e-05, 'dropout': 0.37739749824438534, 'epochs': 62}. Best is trial 1 with value: 0.050490422268006166.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 14:06:27,500] Trial 2 finished with value: 0.05133203148310265 and parameters: {'input_window': 14, 'lr': 2.8371845444301244e-05, 'dropout': 0.37743610314532183, 'epochs': 58}. Best is trial 1 with value: 0.050490422268006166.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 14:06:36,394] Trial 3 finished with value: 0.04978146198494575 and pa

Best parameters for Top Black Solid: {'input_window': 7, 'lr': 0.00022884876743722472, 'dropout': 0.304791145636313, 'epochs': 79}
Epoch 1/79, Loss: 0.008019
Epoch 2/79, Loss: 0.007054
Epoch 3/79, Loss: 0.007177
Epoch 4/79, Loss: 0.006746
Epoch 5/79, Loss: 0.006431
Epoch 6/79, Loss: 0.006396
Epoch 7/79, Loss: 0.006624
Epoch 8/79, Loss: 0.006776
Epoch 9/79, Loss: 0.006380
Epoch 10/79, Loss: 0.005980
Epoch 11/79, Loss: 0.006065
Epoch 12/79, Loss: 0.005960
Epoch 13/79, Loss: 0.006179
Epoch 14/79, Loss: 0.005973
Epoch 15/79, Loss: 0.005924
Epoch 16/79, Loss: 0.006204
Epoch 17/79, Loss: 0.005944
Epoch 18/79, Loss: 0.005501
Epoch 19/79, Loss: 0.005683
Epoch 20/79, Loss: 0.005679
Epoch 21/79, Loss: 0.005826
Epoch 22/79, Loss: 0.005868
Epoch 23/79, Loss: 0.005712
Epoch 24/79, Loss: 0.005703
Epoch 25/79, Loss: 0.005471
Epoch 26/79, Loss: 0.005745
Epoch 27/79, Loss: 0.005560
Epoch 28/79, Loss: 0.005378
Epoch 29/79, Loss: 0.005619
Epoch 30/79, Loss: 0.005570
Epoch 31/79, Loss: 0.005466
Epoch 32/7

[I 2025-03-05 14:10:58,991] A new study created in memory with name: no-name-b307480f-251d-4b1b-a54f-53d4a35700ed



=== Running for Trousers Black Solid ===


  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 14:11:12,909] Trial 0 finished with value: 0.08403849521355776 and parameters: {'input_window': 21, 'lr': 0.0003488958993974592, 'dropout': 0.1819874807984525, 'epochs': 71}. Best is trial 0 with value: 0.08403849521355776.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 14:11:24,226] Trial 1 finished with value: 0.0752046443053393 and parameters: {'input_window': 28, 'lr': 8.435202444479333e-05, 'dropout': 0.14635717175360213, 'epochs': 57}. Best is trial 1 with value: 0.0752046443053393.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 14:11:32,874] Trial 2 finished with value: 0.07406413763585715 and parameters: {'input_window': 7, 'lr': 1.046472401703496e-05, 'dropout': 0.34341874682880946, 'epochs': 61}. Best is trial 2 with value: 0.07406413763585715.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 14:11:38,212] Trial 3 finished with value: 0.0934083644468726 and parameters:

Best parameters for Trousers Black Solid: {'input_window': 21, 'lr': 4.7529010713268683e-05, 'dropout': 0.3753520948486232, 'epochs': 31}
Epoch 1/31, Loss: 0.040770
Epoch 2/31, Loss: 0.039791
Epoch 3/31, Loss: 0.038743
Epoch 4/31, Loss: 0.036021
Epoch 5/31, Loss: 0.036231
Epoch 6/31, Loss: 0.035773
Epoch 7/31, Loss: 0.037885
Epoch 8/31, Loss: 0.039039
Epoch 9/31, Loss: 0.038245
Epoch 10/31, Loss: 0.035238
Epoch 11/31, Loss: 0.035300
Epoch 12/31, Loss: 0.031486
Epoch 13/31, Loss: 0.033802
Epoch 14/31, Loss: 0.036023
Epoch 15/31, Loss: 0.036415
Epoch 16/31, Loss: 0.034331
Epoch 17/31, Loss: 0.035098
Epoch 18/31, Loss: 0.033817
Epoch 19/31, Loss: 0.035479
Epoch 20/31, Loss: 0.035086
Epoch 21/31, Loss: 0.035052
Epoch 22/31, Loss: 0.033313
Epoch 23/31, Loss: 0.033598
Epoch 24/31, Loss: 0.034007
Epoch 25/31, Loss: 0.034415
Epoch 26/31, Loss: 0.032325
Epoch 27/31, Loss: 0.033169
Epoch 28/31, Loss: 0.032451
Epoch 29/31, Loss: 0.033659
Epoch 30/31, Loss: 0.031412
Epoch 31/31, Loss: 0.034274
Fin

[I 2025-03-05 14:15:15,451] A new study created in memory with name: no-name-9a5d57dd-1bed-410d-b13a-483fed907b0e



=== Running for Trousers Blue Denim ===


  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 14:15:26,845] Trial 0 finished with value: 0.0780457094275394 and parameters: {'input_window': 14, 'lr': 0.00039805181217276346, 'dropout': 0.3103378549271861, 'epochs': 68}. Best is trial 0 with value: 0.0780457094275394.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 14:15:35,715] Trial 1 finished with value: 0.06032909558440961 and parameters: {'input_window': 21, 'lr': 0.0003018081884581513, 'dropout': 0.441105560410107, 'epochs': 50}. Best is trial 1 with value: 0.06032909558440961.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 14:15:46,001] Trial 2 finished with value: 0.05760084542777933 and parameters: {'input_window': 21, 'lr': 6.466067115797602e-05, 'dropout': 0.07069029219616196, 'epochs': 58}. Best is trial 2 with value: 0.05760084542777933.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 14:15:54,610] Trial 3 finished with value: 0.0872625906578099 and parameters:

Best parameters for Trousers Blue Denim: {'input_window': 14, 'lr': 3.6613061313532705e-05, 'dropout': 0.20073745708336618, 'epochs': 55}
Epoch 1/55, Loss: 0.008323
Epoch 2/55, Loss: 0.007346
Epoch 3/55, Loss: 0.007134
Epoch 4/55, Loss: 0.007703
Epoch 5/55, Loss: 0.007024
Epoch 6/55, Loss: 0.007075
Epoch 7/55, Loss: 0.007647
Epoch 8/55, Loss: 0.007380
Epoch 9/55, Loss: 0.007604
Epoch 10/55, Loss: 0.007440
Epoch 11/55, Loss: 0.006795
Epoch 12/55, Loss: 0.007005
Epoch 13/55, Loss: 0.007034
Epoch 14/55, Loss: 0.007062
Epoch 15/55, Loss: 0.007059
Epoch 16/55, Loss: 0.007195
Epoch 17/55, Loss: 0.006679
Epoch 18/55, Loss: 0.006759
Epoch 19/55, Loss: 0.007358
Epoch 20/55, Loss: 0.006790
Epoch 21/55, Loss: 0.006811
Epoch 22/55, Loss: 0.006753
Epoch 23/55, Loss: 0.006788
Epoch 24/55, Loss: 0.006638
Epoch 25/55, Loss: 0.006829
Epoch 26/55, Loss: 0.007086
Epoch 27/55, Loss: 0.007020
Epoch 28/55, Loss: 0.007055
Epoch 29/55, Loss: 0.006683
Epoch 30/55, Loss: 0.007038
Epoch 31/55, Loss: 0.006857
Epo

[I 2025-03-05 14:19:45,075] A new study created in memory with name: no-name-24646c62-5be8-4344-9c38-541eb26e5805



=== Running for Vest top Black Solid ===


  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 14:19:51,975] Trial 0 finished with value: 0.06650122694536424 and parameters: {'input_window': 14, 'lr': 1.3122667844575175e-05, 'dropout': 0.35707205699860156, 'epochs': 40}. Best is trial 0 with value: 0.06650122694536424.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 14:20:06,020] Trial 1 finished with value: 0.06066171269124524 and parameters: {'input_window': 28, 'lr': 5.4937163328211625e-05, 'dropout': 0.0666795228311921, 'epochs': 77}. Best is trial 1 with value: 0.06066171269124524.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 14:20:15,353] Trial 2 finished with value: 0.10388188972875187 and parameters: {'input_window': 7, 'lr': 0.0006871355509797168, 'dropout': 0.08086846054451607, 'epochs': 71}. Best is trial 1 with value: 0.06066171269124524.
  checkpoint = torch.load(pretrained_checkpoint)
[I 2025-03-05 14:20:25,376] Trial 3 finished with value: 0.06599104680511239 and parame

Best parameters for Vest top Black Solid: {'input_window': 28, 'lr': 0.000939251547608988, 'dropout': 0.34552490639340255, 'epochs': 40}
Epoch 1/40, Loss: 0.009715
Epoch 2/40, Loss: 0.008887
Epoch 3/40, Loss: 0.009651
Epoch 4/40, Loss: 0.011985
Epoch 5/40, Loss: 0.010953
Epoch 6/40, Loss: 0.010921
Epoch 7/40, Loss: 0.008547
Epoch 8/40, Loss: 0.008256
Epoch 9/40, Loss: 0.007553
Epoch 10/40, Loss: 0.007244
Epoch 11/40, Loss: 0.007316
Epoch 12/40, Loss: 0.007665
Epoch 13/40, Loss: 0.006481
Epoch 14/40, Loss: 0.008027
Epoch 15/40, Loss: 0.006734
Epoch 16/40, Loss: 0.008280
Epoch 17/40, Loss: 0.007192
Epoch 18/40, Loss: 0.007503
Epoch 19/40, Loss: 0.006971
Epoch 20/40, Loss: 0.006878
Epoch 21/40, Loss: 0.006849
Epoch 22/40, Loss: 0.007219
Epoch 23/40, Loss: 0.008011
Epoch 24/40, Loss: 0.006397
Epoch 25/40, Loss: 0.006294
Epoch 26/40, Loss: 0.006454
Epoch 27/40, Loss: 0.006102
Epoch 28/40, Loss: 0.005736
Epoch 29/40, Loss: 0.006296
Epoch 30/40, Loss: 0.006171
Epoch 31/40, Loss: 0.005913
Epoc

In [42]:
import os
import pandas as pd

output_dir = "final_version/output/1_day/transfer"

metrics_summary = []

# Gather metrics from each product group's folder
for product_group in top_10_groups:
    sanitized_group = process_name(product_group)
    group_output_dir = os.path.join(output_dir, sanitized_group)
    metrics_file = os.path.join(group_output_dir, "test_metrics.csv")
    
    if os.path.exists(metrics_file):
        df = pd.read_csv(metrics_file)
        # Assume test_metrics.csv has columns: MAE, RMSE, MAPE, R2
        # and just one row of metrics
        row = df.iloc[0].to_dict()
        row["Product Group"] = product_group
        metrics_summary.append(row)
    else:
        print(f"Warning: No metrics file found for {product_group}")

# Convert to a DataFrame
summary_df = pd.DataFrame(metrics_summary)

# Reorder columns for clarity
cols_order = ["Product Group", "MAE", "RMSE", "MAPE", "R2"]
summary_df = summary_df[cols_order]

# Save the summary of all product groups
summary_file = os.path.join(output_dir, "final_metrics_summary.csv")
summary_df.to_csv(summary_file, index=False)

# Compute and save average metrics across all products
avg_metrics = summary_df[["MAE", "RMSE", "MAPE", "R2"]].mean()
avg_metrics_df = pd.DataFrame([avg_metrics])
avg_metrics_file = os.path.join(output_dir, "final_test_avg_metrics.csv")
avg_metrics_df.to_csv(avg_metrics_file, index=False)

print(f"Summary metrics saved to: {summary_file}")
print(f"Average metrics saved to: {avg_metrics_file}")


Summary metrics saved to: final_version/output/1_day/transfer/final_metrics_summary.csv
Average metrics saved to: final_version/output/1_day/transfer/final_test_avg_metrics.csv


## Macroeconomic Indicators

In [44]:
consumer_sentiment = pd.read_csv('data/external/consumer_sentiment.csv')
consumer_sentiment['DATE'] = pd.to_datetime(consumer_sentiment['DATE'])
consumer_sentiment.set_index('DATE', inplace=True)
consumer_sentiment = consumer_sentiment.resample('D').ffill()

cpi = pd.read_csv('data/external/cpi_data.csv')
cpi['DATE'] = pd.to_datetime(cpi['DATE'])
cpi.set_index('DATE', inplace=True)
cpi = cpi.resample('D').ffill()

gdp = pd.read_csv('data/external/gdp_data.csv')
gdp['DATE'] = pd.to_datetime(gdp['DATE'])
gdp.set_index('DATE', inplace=True)
gdp = gdp.resample('D').ffill()

unemployment = pd.read_csv('data/external/unemployment_data.csv')
unemployment['DATE'] = pd.to_datetime(unemployment['DATE'])
unemployment.set_index('DATE', inplace=True)
unemployment = unemployment.resample('D').ffill()

In [45]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from modules.utils import process_name

# Output directory
output_dir = "final_version/output/macroeconomic/unemployment/tcn_v2"
os.makedirs(output_dir, exist_ok=True)

all_groups_results = {}

for product_group in top_10_groups:
    print(f"\n=== Processing {product_group} ===")
    sanitized_group = process_name(product_group)
    group_output_dir = os.path.join(output_dir, sanitized_group)
    os.makedirs(group_output_dir, exist_ok=True)

    # 1. Filter data for this product group
    product_data = data[data['product_group'] == product_group].copy()
    if product_data.empty:
        print(f"No data for {product_group}, skipping.")
        continue

    # Ensure date is the index
    product_data['date'] = pd.to_datetime(product_data['date'], errors='coerce')
    product_data.set_index('date', inplace=True)
    product_data = product_data.asfreq('D').fillna(0)

    # 2. Merge Macroeconomic Data 
    print(f"🔄 Merging macroeconomic data for {product_group}...")
    product_data = product_data.join(unemployment[['UNRATE']], how='left')
    product_data.fillna(method='ffill', inplace=True)  # Forward fill missing values

    # 3. Drop unwanted columns
    drop_cols = ['product_group', 'product_type_name', 'colour_group_name', 'graphical_appearance_name']
    product_data.drop(columns=[c for c in drop_cols if c in product_data.columns], inplace=True, errors='ignore')

    if 'transaction_count' not in product_data.columns:
        print(f"No transaction_count in {product_group}, skipping.")
        continue

    # 4. Train/Validation Split (80/20)
    total_len = len(product_data)
    if total_len < 50:
        print(f"Not enough data for {product_group}, skipping.")
        continue
    split_idx = int(0.8 * total_len)
    train_df = product_data.iloc[:split_idx].copy()
    val_df = product_data.iloc[split_idx:].copy()

    # 5. Scale features & target
    feature_scaler = MinMaxScaler()
    target_scaler = MinMaxScaler()

    X_train = train_df.drop(columns=['transaction_count'])
    y_train = train_df[['transaction_count']]

    X_val = val_df.drop(columns=['transaction_count'])
    y_val = val_df[['transaction_count']]

    X_train_scaled = feature_scaler.fit_transform(X_train)
    y_train_scaled = target_scaler.fit_transform(y_train)

    X_val_scaled = feature_scaler.transform(X_val)
    y_val_scaled = target_scaler.transform(y_val)

    train_df_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns, index=X_train.index)
    train_df_scaled['transaction_count'] = y_train_scaled

    val_df_scaled = pd.DataFrame(X_val_scaled, columns=X_val.columns, index=X_val.index)
    val_df_scaled['transaction_count'] = y_val_scaled

    # 6. Create TimeSeriesDataset
    input_window, output_window = 14, 1
    train_dataset = TimeSeriesDataset(
        train_df_scaled,
        input_window=input_window,
        output_window=output_window,
        target_col_name='transaction_count'
    )
    val_dataset = TimeSeriesDataset(
        val_df_scaled,
        input_window=input_window,
        output_window=output_window,
        target_col_name='transaction_count'
    )

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # 7. Build TCN
    num_features = X_train.shape[1] + 1 
    model = TCNForecastingModel(
        num_inputs=num_features,
        num_channels=[64, 64],
        output_size=1,
        kernel_size=3,
        dropout=0.2
    )

    # 8. Train TCN
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    epochs = 50

    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            preds = model(X_batch)
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss /= len(train_loader)

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                preds = model(X_batch)
                loss = criterion(preds, y_batch)
                val_loss += loss.item()
        val_loss /= len(val_loader)

        if (epoch + 1) % 10 == 0:
            print(f"{product_group} | Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.6f}, Val Loss: {val_loss:.6f}")

    # 9. Evaluate on Validation
    model.eval()
    all_preds, all_targets = [], []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            preds = model(X_batch)
            all_preds.append(preds.cpu().numpy())
            all_targets.append(y_batch.cpu().numpy())

    all_preds = np.concatenate(all_preds, axis=0)
    all_targets = np.concatenate(all_targets, axis=0)

    # Invert scaling
    preds_unscaled = target_scaler.inverse_transform(all_preds)
    targets_unscaled = target_scaler.inverse_transform(all_targets)

    mae = mean_absolute_error(targets_unscaled, preds_unscaled)
    rmse = np.sqrt(mean_squared_error(targets_unscaled, preds_unscaled))
    mape = np.mean(np.abs((targets_unscaled - preds_unscaled) / np.maximum(targets_unscaled, 1))) * 100
    r2 = r2_score(targets_unscaled, preds_unscaled)

    all_groups_results[product_group] = {'MAE': mae, 'RMSE': rmse, 'MAPE': mape, 'R2': r2}

    print(f"{product_group} => MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.2f}%, R2: {r2:.4f}")

# Save summary
summary_df = pd.DataFrame.from_dict(all_groups_results, orient='index')
summary_df.to_csv(os.path.join(output_dir, "metrics_summary.csv"))

# Compute and save average metrics
avg_metrics = {
    'MAE': np.mean([all_groups_results[pg]['MAE'] for pg in all_groups_results]),
    'RMSE': np.mean([all_groups_results[pg]['RMSE'] for pg in all_groups_results]),
    'MAPE': np.mean([all_groups_results[pg]['MAPE'] for pg in all_groups_results]),
    'R2': np.mean([all_groups_results[pg]['R2'] for pg in all_groups_results])
}

avg_metrics_df = pd.DataFrame([avg_metrics])
avg_metrics_df.to_csv(os.path.join(output_dir, "avg_metrics.csv"), index=False)



=== Processing Bra Black Solid ===
🔄 Merging macroeconomic data for Bra Black Solid...


  product_data.fillna(method='ffill', inplace=True)  # Forward fill missing values


Bra Black Solid | Epoch 10/50, Train Loss: 0.007986, Val Loss: 0.004822
Bra Black Solid | Epoch 20/50, Train Loss: 0.007422, Val Loss: 0.004596
Bra Black Solid | Epoch 30/50, Train Loss: 0.006994, Val Loss: 0.004574
Bra Black Solid | Epoch 40/50, Train Loss: 0.006545, Val Loss: 0.004309
Bra Black Solid | Epoch 50/50, Train Loss: 0.006784, Val Loss: 0.004716
Bra Black Solid => MAE: 160.0898, RMSE: 197.7088, MAPE: 30.91%, R2: 0.0106

=== Processing Dress Black Solid ===
🔄 Merging macroeconomic data for Dress Black Solid...


  product_data.fillna(method='ffill', inplace=True)  # Forward fill missing values


Dress Black Solid | Epoch 10/50, Train Loss: 0.014203, Val Loss: 0.017184
Dress Black Solid | Epoch 20/50, Train Loss: 0.011841, Val Loss: 0.012281
Dress Black Solid | Epoch 30/50, Train Loss: 0.010475, Val Loss: 0.011497
Dress Black Solid | Epoch 40/50, Train Loss: 0.010078, Val Loss: 0.010855
Dress Black Solid | Epoch 50/50, Train Loss: 0.008548, Val Loss: 0.010640
Dress Black Solid => MAE: 310.4699, RMSE: 414.7816, MAPE: 30.38%, R2: 0.4351

=== Processing Leggings/Tights Black Solid ===
🔄 Merging macroeconomic data for Leggings/Tights Black Solid...


  product_data.fillna(method='ffill', inplace=True)  # Forward fill missing values


Leggings/Tights Black Solid | Epoch 10/50, Train Loss: 0.010247, Val Loss: 0.004693
Leggings/Tights Black Solid | Epoch 20/50, Train Loss: 0.008281, Val Loss: 0.003860
Leggings/Tights Black Solid | Epoch 30/50, Train Loss: 0.007341, Val Loss: 0.002937
Leggings/Tights Black Solid | Epoch 40/50, Train Loss: 0.006622, Val Loss: 0.002884
Leggings/Tights Black Solid | Epoch 50/50, Train Loss: 0.006626, Val Loss: 0.002669
Leggings/Tights Black Solid => MAE: 119.0474, RMSE: 147.5866, MAPE: 35.71%, R2: 0.1734

=== Processing Sweater Black Solid ===
🔄 Merging macroeconomic data for Sweater Black Solid...


  product_data.fillna(method='ffill', inplace=True)  # Forward fill missing values


Sweater Black Solid | Epoch 10/50, Train Loss: 0.004605, Val Loss: 0.001466
Sweater Black Solid | Epoch 20/50, Train Loss: 0.003422, Val Loss: 0.001214
Sweater Black Solid | Epoch 30/50, Train Loss: 0.003114, Val Loss: 0.001108
Sweater Black Solid | Epoch 40/50, Train Loss: 0.003111, Val Loss: 0.001241
Sweater Black Solid | Epoch 50/50, Train Loss: 0.002486, Val Loss: 0.001217
Sweater Black Solid => MAE: 183.7265, RMSE: 226.7800, MAPE: 98.59%, R2: 0.5552

=== Processing T-shirt Black Solid ===
🔄 Merging macroeconomic data for T-shirt Black Solid...


  product_data.fillna(method='ffill', inplace=True)  # Forward fill missing values


T-shirt Black Solid | Epoch 10/50, Train Loss: 0.013024, Val Loss: 0.003906
T-shirt Black Solid | Epoch 20/50, Train Loss: 0.012242, Val Loss: 0.004929
T-shirt Black Solid | Epoch 30/50, Train Loss: 0.010745, Val Loss: 0.005446
T-shirt Black Solid | Epoch 40/50, Train Loss: 0.009698, Val Loss: 0.005268
T-shirt Black Solid | Epoch 50/50, Train Loss: 0.009464, Val Loss: 0.005093
T-shirt Black Solid => MAE: 130.5226, RMSE: 170.4104, MAPE: 19.97%, R2: -0.3987

=== Processing T-shirt White Solid ===
🔄 Merging macroeconomic data for T-shirt White Solid...


  product_data.fillna(method='ffill', inplace=True)  # Forward fill missing values


T-shirt White Solid | Epoch 10/50, Train Loss: 0.010860, Val Loss: 0.006251
T-shirt White Solid | Epoch 20/50, Train Loss: 0.007925, Val Loss: 0.003996
T-shirt White Solid | Epoch 30/50, Train Loss: 0.006499, Val Loss: 0.002650
T-shirt White Solid | Epoch 40/50, Train Loss: 0.006513, Val Loss: 0.002169
T-shirt White Solid | Epoch 50/50, Train Loss: 0.004962, Val Loss: 0.001975
T-shirt White Solid => MAE: 88.2236, RMSE: 124.5234, MAPE: 15.56%, R2: 0.6181

=== Processing Top Black Solid ===
🔄 Merging macroeconomic data for Top Black Solid...


  product_data.fillna(method='ffill', inplace=True)  # Forward fill missing values


Top Black Solid | Epoch 10/50, Train Loss: 0.004801, Val Loss: 0.002567
Top Black Solid | Epoch 20/50, Train Loss: 0.004305, Val Loss: 0.002669
Top Black Solid | Epoch 30/50, Train Loss: 0.003972, Val Loss: 0.002590
Top Black Solid | Epoch 40/50, Train Loss: 0.003940, Val Loss: 0.002523
Top Black Solid | Epoch 50/50, Train Loss: 0.003888, Val Loss: 0.002612
Top Black Solid => MAE: 159.0238, RMSE: 201.4399, MAPE: 23.35%, R2: 0.0657

=== Processing Trousers Black Solid ===
🔄 Merging macroeconomic data for Trousers Black Solid...


  product_data.fillna(method='ffill', inplace=True)  # Forward fill missing values


Trousers Black Solid | Epoch 10/50, Train Loss: 0.006704, Val Loss: 0.001506
Trousers Black Solid | Epoch 20/50, Train Loss: 0.006042, Val Loss: 0.001279
Trousers Black Solid | Epoch 30/50, Train Loss: 0.005831, Val Loss: 0.001232
Trousers Black Solid | Epoch 40/50, Train Loss: 0.005392, Val Loss: 0.001233
Trousers Black Solid | Epoch 50/50, Train Loss: 0.005319, Val Loss: 0.001216
Trousers Black Solid => MAE: 196.6200, RMSE: 256.5932, MAPE: 20.23%, R2: 0.1809

=== Processing Trousers Blue Denim ===
🔄 Merging macroeconomic data for Trousers Blue Denim...


  product_data.fillna(method='ffill', inplace=True)  # Forward fill missing values


Trousers Blue Denim | Epoch 10/50, Train Loss: 0.011492, Val Loss: 0.004454
Trousers Blue Denim | Epoch 20/50, Train Loss: 0.010883, Val Loss: 0.004202
Trousers Blue Denim | Epoch 30/50, Train Loss: 0.009409, Val Loss: 0.003788
Trousers Blue Denim | Epoch 40/50, Train Loss: 0.009416, Val Loss: 0.003429
Trousers Blue Denim | Epoch 50/50, Train Loss: 0.008136, Val Loss: 0.003276
Trousers Blue Denim => MAE: 134.2721, RMSE: 176.1062, MAPE: 30.39%, R2: 0.1860

=== Processing Vest top Black Solid ===
🔄 Merging macroeconomic data for Vest top Black Solid...


  product_data.fillna(method='ffill', inplace=True)  # Forward fill missing values


Vest top Black Solid | Epoch 10/50, Train Loss: 0.014740, Val Loss: 0.007566
Vest top Black Solid | Epoch 20/50, Train Loss: 0.012365, Val Loss: 0.005177
Vest top Black Solid | Epoch 30/50, Train Loss: 0.009953, Val Loss: 0.004577
Vest top Black Solid | Epoch 40/50, Train Loss: 0.009509, Val Loss: 0.003572
Vest top Black Solid | Epoch 50/50, Train Loss: 0.008509, Val Loss: 0.004121
Vest top Black Solid => MAE: 170.6302, RMSE: 214.2187, MAPE: 25.48%, R2: 0.5521
