In [2]:
import pandas as pd
import os

# Change working directory to the parent directory
os.chdir("/Users/megan/Thesis")

# Load preprocessed data
data = pd.read_csv('data/data_with_keywords.csv')
data['date'] = pd.to_datetime(data['date'], errors='coerce')

# Load the keywords CSV file into a DataFrame
keywords_df = pd.read_csv('data/external/keywords_trends.csv')

# Display the first few rows of the DataFrame to check the data
keywords_df.head()

keywords_list = pd.read_csv('data/external/keywords.csv')['Keyword'].tolist()

def get_trends_file(keyword, trends_folder='data/external/google_trends_v2'):
    """
    keyword: A single string (e.g. "Bag" or "Bag Beige Solid")
    Returns the path if the file exists, else prints a warning and returns None.
    """
    file_name = f"{keyword.replace(' ', '_')}_trend_data.csv"
    file_path = os.path.join(trends_folder, file_name)
    if os.path.exists(file_path):
        return file_path
    else:
        print(f"Warning: Trend file for {keyword} not found.")
        return None


def load_and_process_trends(trend_file, product_data):
    if 'date' in product_data.columns:
        product_data['date'] = pd.to_datetime(product_data['date'], errors='coerce')
        product_data.set_index('date', inplace=True)
    else:
        pass

    # Load trends
    trend_data = pd.read_csv(trend_file)
    trend_data.drop(columns=['isPartial'], inplace=True)
    trend_data['date'] = pd.to_datetime(trend_data['date'], errors='coerce')
    trend_data['date'] = trend_data['date'] + pd.Timedelta(weeks=36)
    trend_data.set_index('date', inplace=True)

    trend_data = trend_data.reindex(product_data.index, method='ffill')
    return trend_data


def add_trends_to_product_data(product_data, trend_data, trend_column_name):
    """
    Adds the trend data to the product_data based on the trend column name.

    Parameters:
    product_data (pd.DataFrame): The product data.
    trend_data (pd.DataFrame): The trend data containing the trend values.
    trend_column_name (str): The name of the column to store the trend data in product_data.
    
    Returns:
    pd.DataFrame: The updated product data with the trend column added.
    """
    # Add the trend data to the product_data DataFrame
    product_data[trend_column_name] = trend_data[trend_column_name]  # Add dynamic trend column

    return product_data


In [3]:
import numpy as np
import pandas as pd
import torch.nn as nn
import numpy as np
import pandas as pd
from torch.utils.data import Dataset
from torch.utils.data import Dataset, DataLoader

class TimeSeriesDataset(Dataset):
    def __init__(self, df, input_window=14, output_window=14, target_col_name='transaction_count'):
        """
        df               : A pandas DataFrame that includes both features and the target column.
        input_window     : Number of time steps used as input (lookback).
        output_window    : Number of time steps to forecast (e.g. 14 for 14-day).
        target_col_name  : Name of the target column in df.
        """
        # Separate the features vs. target
        self.X_data = df.values  # all columns except target
        self.y_data = df[target_col_name].values               

        self.input_window = input_window
        self.output_window = output_window

    def __len__(self):
        # The maximum valid index is total_rows - (input_window + output_window)
        return len(self.X_data) - (self.input_window + self.output_window - 1)

    def __getitem__(self, idx):
        # 1) Slice out the input window
        x_start = idx
        x_end   = idx + self.input_window
        X = self.X_data[x_start : x_end]   # shape => (input_window, num_features)

        # 2) Slice out the next 'output_window' points of the target
        y_start = x_end
        y_end   = x_end + self.output_window
        Y = self.y_data[y_start : y_end]   # shape => (output_window,)

        # Convert to float32 for PyTorch
        X = X.astype(np.float32)
        Y = Y.astype(np.float32)
        return X, Y


In [7]:
class TCNBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, dilation=1):
        super().__init__()
        padding = (kernel_size - 1) * dilation
        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size, 
                              stride=1, padding=padding, dilation=dilation)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2) 
        
    def forward(self, x):
        """
        x shape: [batch_size, channels, seq_len]
        """
        out = self.conv(x)
        # Remove extra time-steps from padding to maintain causality
        out = out[:, :, :-self.conv.padding[0]]  # remove the last "padding" points
        out = self.relu(out)
        out = self.dropout(out)
        return out
    
class TCN(nn.Module):
    def __init__(self, in_channels, channel_list, kernel_size=3, dropout=0.2):
        super().__init__()
        blocks = []
        current_in = in_channels
        for i, out_ch in enumerate(channel_list):
            dilation = 2 ** i
            block = TCNBlock(current_in, out_ch, kernel_size, dilation=dilation)
            blocks.append(block)
            current_in = out_ch
        self.network = nn.Sequential(*blocks)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = self.network(x)
        x = self.dropout(x)
        return x

    
class TCNForecastingModel(nn.Module):
    def __init__(self, num_inputs, num_channels, output_size, kernel_size=2, dropout=0.2, input_window=30):
        super(TCNForecastingModel, self).__init__()
        self.tcn = TCN(num_inputs, num_channels, kernel_size, dropout)
        self.fc = nn.Linear(num_channels[-1], output_size)
        self.input_window = input_window

    def forward(self, x):
        x = x.permute(0, 2, 1)
        y = self.tcn(x)
        y = y[:, :, -1]
        return self.fc(y)

In [4]:
data["day_of_week"] = data["date"].dt.dayofweek 
data["day_of_month"] = data["date"].dt.day
data["month"] = data["date"].dt.month
data["day_of_year"] = data["date"].dt.dayofyear
data["year"] = data["date"].dt.year

data = pd.get_dummies(data, columns=['most_common_age_bin'], drop_first=True, dtype=int)

# Define numerical columns (exclude 'product_group' and 'date')
numerical_columns = [
    'transaction_count',
    'avg_price',
    'unique_customers',
    'unique_articles_sold',
    'median_age',
    'fashion_news_subscribers',
    'first_purchase_days_ago',
    'recent_purchase_days_ago',
    'day_of_week',
    'day_of_month',
    'month',
    'day_of_year',
    'year'
]

# categorical_columns = [
#     'most_common_age_bin_20-29',
#     'most_common_age_bin_30-39',
#     'most_common_age_bin_40-49',
#     'most_common_age_bin_50-59',
#     'most_common_age_bin_60+'
# ]

# categorical_columns = [
#     'most_common_age_bin_20-29', 'most_common_age_bin_30-39',
#     'most_common_age_bin_40-49', 'most_common_age_bin_50-59', 'most_common_age_bin_60+'
# ]

# # One-hot encode the categorical columns
# data = pd.get_dummies(data, columns=categorical_columns, dtype=int)

data.drop(columns=['std_price', 'club_member_ratio'], inplace=True)

In [5]:
top_10_groups = [
    "Bra Black Solid",
    "Dress Black Solid",
    "Leggings/Tights Black Solid",
    "Sweater Black Solid",
    "T-shirt Black Solid",
    "T-shirt White Solid",
    "Top Black Solid",
    "Trousers Black Solid",
    "Trousers Blue Denim",
    "Vest top Black Solid"
]

# remove top 10 products from pretrain data
pretrain_data = data[~data['product_group'].isin(top_10_groups)].copy()
top10_data = data[data['product_group'].isin(top_10_groups)].copy()

In [131]:
pretrain_data.dtypes

date                         datetime64[ns]
product_group                        object
transaction_count                     int64
avg_price                           float64
sales_channel                         int64
unique_customers                      int64
unique_articles_sold                  int64
median_age                          float64
fashion_news_subscribers              int64
first_purchase_days_ago               int64
recent_purchase_days_ago              int64
product_type_name                    object
colour_group_name                    object
graphical_appearance_name            object
day_of_week                           int32
day_of_month                          int32
month                                 int32
day_of_year                           int32
year                                  int32
most_common_age_bin_20-29             int64
most_common_age_bin_30-39             int64
most_common_age_bin_40-49             int64
most_common_age_bin_50-59       

In [124]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from modules.utils import process_name

# Example product groups
top_10_groups = [
    "Bra Black Solid",
    "Dress Black Solid",
    "Leggings/Tights Black Solid",
    "Sweater Black Solid",
    "T-shirt Black Solid",
    "T-shirt White Solid",
    "Top Black Solid",
    "Trousers Black Solid",
    "Trousers Blue Denim",
    "Vest top Black Solid"
]

output_dir = "final_version/output/google_trends/1_day/tcn"
os.makedirs(output_dir, exist_ok=True)

all_groups_results = {}

for product_group in top_10_groups:
    print(f"\n=== Processing {product_group} ===")
    sanitized_group = process_name(product_group)
    group_output_dir = os.path.join(output_dir, sanitized_group)
    os.makedirs(group_output_dir, exist_ok=True)
    
    # 1. Filter data for this product group
    product_data = data[data['product_group'] == product_group].copy()
    if product_data.empty:
        print(f"No data for {product_group}, skipping.")
        continue

    product_data['date'] = pd.to_datetime(product_data['date'], errors='coerce')
    product_data.set_index('date', inplace=True)
    product_data = product_data.asfreq('D').fillna(0)
    
    # 2. Identify matched keywords (up to 3)
    matched_keywords = []
    for col in ['product_type_name', 'colour_group_name', 'graphical_appearance_name']:
        if col in product_data.columns:
            val = product_data[col].iloc[0]
            if val in keywords_list:
                matched_keywords.append(val)
    
    matched_keywords = matched_keywords[:3]  # keep at most 3

    # 3. Merge each matched trend
    if matched_keywords:
        for keyword in matched_keywords:
            trend_file = get_trends_file(keyword)
            if trend_file:
                trend_data = load_and_process_trends(trend_file, product_data)
                product_data = add_trends_to_product_data(product_data, trend_data, keyword)
            else:
                print(f"Warning: Trend file for {keyword} not found.")
    else:
        print(f"No matched keywords for {product_group}.")

    # 4. Drop unwanted columns
    drop_cols = [
        'product_group', 'product_type_name', 
        'colour_group_name', 'graphical_appearance_name',
    ]
    product_data.drop(columns=[c for c in drop_cols if c in product_data.columns],
                      inplace=True, errors='ignore')

    if 'transaction_count' not in product_data.columns:
        print(f"No transaction_count in {product_group}, skipping.")
        continue

    # 5. Train/Validation Split (80/20)
    total_len = len(product_data)
    if total_len < 50:
        print(f"Not enough data for {product_group}, skipping.")
        continue
    split_idx = int(0.8 * total_len)
    train_df = product_data.iloc[:split_idx].copy()
    val_df = product_data.iloc[split_idx:].copy()
    
    # 6. Scale features & target
    feature_scaler = MinMaxScaler()
    target_scaler = MinMaxScaler()

    X_train = train_df.drop(columns=['transaction_count'])
    y_train = train_df[['transaction_count']]

    X_val = val_df.drop(columns=['transaction_count'])
    y_val = val_df[['transaction_count']]

    X_train_scaled = feature_scaler.fit_transform(X_train)
    y_train_scaled = target_scaler.fit_transform(y_train)

    X_val_scaled = feature_scaler.transform(X_val)
    y_val_scaled = target_scaler.transform(y_val)

    train_df_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns, index=X_train.index)
    train_df_scaled['transaction_count'] = y_train_scaled

    val_df_scaled = pd.DataFrame(X_val_scaled, columns=X_val.columns, index=X_val.index)
    val_df_scaled['transaction_count'] = y_val_scaled

    # 7. Create TimeSeriesDataset
    input_window, output_window = 14, 1
    train_dataset = TimeSeriesDataset(
        train_df_scaled,
        input_window=input_window,
        output_window=output_window,
        target_col_name='transaction_count'
    )
    val_dataset = TimeSeriesDataset(
        val_df_scaled,
        input_window=input_window,
        output_window=output_window,
        target_col_name='transaction_count'
    )

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # 8. Build TCN
    num_features = X_train.shape[1] + 1  
    model = TCNForecastingModel(
        num_inputs=num_features,
        num_channels=[64, 64],
        output_size=1,
        kernel_size=3,
        dropout=0.2
    )

    # 9. Train TCN
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    epochs = 50

    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            preds = model(X_batch)
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss /= len(train_loader)

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                preds = model(X_batch)
                loss = criterion(preds, y_batch)
                val_loss += loss.item()
        val_loss /= len(val_loader)

        if (epoch + 1) % 10 == 0:
            print(f"{product_group} | Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.6f}, Val Loss: {val_loss:.6f}")

    # 10. Evaluate on Validation
    model.eval()
    all_preds, all_targets = [], []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            preds = model(X_batch)
            all_preds.append(preds.cpu().numpy())
            all_targets.append(y_batch.cpu().numpy())

    all_preds = np.concatenate(all_preds, axis=0)
    all_targets = np.concatenate(all_targets, axis=0)

    # Invert scaling
    preds_unscaled = target_scaler.inverse_transform(all_preds)
    targets_unscaled = target_scaler.inverse_transform(all_targets)

    mae = mean_absolute_error(targets_unscaled, preds_unscaled)
    rmse = np.sqrt(mean_squared_error(targets_unscaled, preds_unscaled))
    mape = np.mean(np.abs((targets_unscaled - preds_unscaled) / np.maximum(targets_unscaled, 1))) * 100
    r2 = r2_score(targets_unscaled, preds_unscaled)

    all_groups_results[product_group] = {'MAE': mae, 'RMSE': rmse, 'MAPE': mape, 'R2': r2}

    print(f"{product_group} => MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.2f}%, R2: {r2:.4f}")

    # Save Predictions vs Actual Graph
    plt.figure(figsize=(12, 6))
    plt.plot(targets_unscaled, label="Actual")
    plt.plot(preds_unscaled, label="Predicted", linestyle='--')
    plt.xlabel("Time Step")
    plt.ylabel("Transaction Count")
    plt.title(f"TCN Predictions vs Actual for {product_group}")
    plt.legend()
    plt.savefig(os.path.join(group_output_dir, "predictions_vs_actual.png"))
    plt.close()

# Save summary
summary_df = pd.DataFrame.from_dict(all_groups_results, orient='index')
summary_df.to_csv(os.path.join(output_dir, "tcn_trends_summary.csv"))

print("\nAll TCN processing completed. Results saved.")



=== Processing Bra Black Solid ===
Bra Black Solid | Epoch 10/50, Train Loss: 0.007758, Val Loss: 0.004996
Bra Black Solid | Epoch 20/50, Train Loss: 0.007446, Val Loss: 0.005058
Bra Black Solid | Epoch 30/50, Train Loss: 0.006672, Val Loss: 0.005185
Bra Black Solid | Epoch 40/50, Train Loss: 0.006254, Val Loss: 0.005404
Bra Black Solid | Epoch 50/50, Train Loss: 0.005921, Val Loss: 0.005068
Bra Black Solid => MAE: 163.9274, RMSE: 200.0572, MAPE: 31.13%, R2: -0.0130

=== Processing Dress Black Solid ===
Dress Black Solid | Epoch 10/50, Train Loss: 0.013949, Val Loss: 0.018836
Dress Black Solid | Epoch 20/50, Train Loss: 0.012199, Val Loss: 0.015234
Dress Black Solid | Epoch 30/50, Train Loss: 0.010285, Val Loss: 0.012193
Dress Black Solid | Epoch 40/50, Train Loss: 0.009881, Val Loss: 0.010613
Dress Black Solid | Epoch 50/50, Train Loss: 0.009586, Val Loss: 0.009749
Dress Black Solid => MAE: 257.9106, RMSE: 403.9576, MAPE: 22.39%, R2: 0.4642

=== Processing Leggings/Tights Black Solid

In [8]:
import os
import numpy as np
import pandas as pd
import torch
import optuna
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from modules.utils import process_name

# Output directory
output_dir = "final_version/output/google_trends/1_day/tcn"
os.makedirs(output_dir, exist_ok=True)

def objective_fn(trial, train_loader, val_loader, num_features):
    """
    Optuna objective function for hyperparameter tuning.
    """
    dropout = trial.suggest_float("dropout", 0.1, 0.5)
    lr = trial.suggest_float("lr", 1e-5, 1e-3, log=True)
    kernel_size = trial.suggest_int("kernel_size", 2, 5)
    num_channels = [trial.suggest_int("num_channels_1", 32, 128), trial.suggest_int("num_channels_2", 32, 128)]
    
    model = TCNForecastingModel(
        num_inputs=num_features,
        num_channels=num_channels,
        output_size=1,
        kernel_size=kernel_size,
        dropout=dropout
    )
    
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    model.train()
    for epoch in range(20):  # Reduced epochs for tuning
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            preds = model(X_batch)
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()
    
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            preds = model(X_batch)
            loss = criterion(preds, y_batch)
            val_loss += loss.item()
    val_loss /= len(val_loader)
    
    return val_loss

all_groups_results = {}

for product_group in top_10_groups:
    print(f"\n=== Processing {product_group} ===")
    sanitized_group = process_name(product_group)
    group_output_dir = os.path.join(output_dir, sanitized_group)
    os.makedirs(group_output_dir, exist_ok=True)

    # 1. Filter data for this product group
    product_data = data[data['product_group'] == product_group].copy()
    if product_data.empty:
        print(f"No data for {product_group}, skipping.")
        continue

    product_data['date'] = pd.to_datetime(product_data['date'], errors='coerce')
    product_data.set_index('date', inplace=True)
    product_data = product_data.asfreq('D').fillna(0)

    # 2. Merge Google Trends Data
    print(f"🔄 Merging Google Trends data for {product_group}...")
    for col in ['product_type_name', 'colour_group_name', 'graphical_appearance_name']:
        if col in product_data.columns:
            val = product_data[col].iloc[0]
            if val in keywords_list:
                trend_file = get_trends_file(val)
                if trend_file:
                    trend_data = load_and_process_trends(trend_file, product_data)
                    product_data = add_trends_to_product_data(product_data, trend_data, val)
    product_data.fillna(method='ffill', inplace=True)

    # 3. Drop unwanted columns
    drop_cols = ['product_group', 'product_type_name', 'colour_group_name', 'graphical_appearance_name']
    product_data.drop(columns=[c for c in drop_cols if c in product_data.columns], inplace=True, errors='ignore')

    if 'transaction_count' not in product_data.columns:
        print(f"No transaction_count in {product_group}, skipping.")
        continue

    # 4. Train/Validation Split (80/20)
    total_len = len(product_data)
    if total_len < 50:
        print(f"Not enough data for {product_group}, skipping.")
        continue
    split_idx = int(0.8 * total_len)
    train_df = product_data.iloc[:split_idx].copy()
    val_df = product_data.iloc[split_idx:].copy()

    # 5. Scale features & target
    feature_scaler = MinMaxScaler()
    target_scaler = MinMaxScaler()

    X_train = train_df.drop(columns=['transaction_count'])
    y_train = train_df[['transaction_count']]

    X_val = val_df.drop(columns=['transaction_count'])
    y_val = val_df[['transaction_count']]

    X_train_scaled = feature_scaler.fit_transform(X_train)
    y_train_scaled = target_scaler.fit_transform(y_train)

    X_val_scaled = feature_scaler.transform(X_val)
    y_val_scaled = target_scaler.transform(y_val)

    train_df_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns, index=X_train.index)
    train_df_scaled['transaction_count'] = y_train_scaled

    val_df_scaled = pd.DataFrame(X_val_scaled, columns=X_val.columns, index=X_val.index)
    val_df_scaled['transaction_count'] = y_val_scaled

    # 6. Create TimeSeriesDataset
    input_window, output_window = 14, 1
    train_dataset = TimeSeriesDataset(
        train_df_scaled,
        input_window=input_window,
        output_window=output_window,
        target_col_name='transaction_count'
    )
    val_dataset = TimeSeriesDataset(
        val_df_scaled,
        input_window=input_window,
        output_window=output_window,
        target_col_name='transaction_count'
    )

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Hyperparameter tuning with Optuna
    study = optuna.create_study(direction='minimize')
    study.optimize(lambda trial: objective_fn(trial, train_loader, val_loader, X_train.shape[1] + 1), n_trials=30)
    best_params = study.best_trial.params
    print(f"Best parameters for {product_group}: {best_params}")

    # Save best hyperparameters
    pd.DataFrame([best_params]).to_csv(os.path.join(group_output_dir, "best_hyperparameters.csv"), index=False)

print("\n✅ All Google Trends TCN processing with Optuna tuning completed. Results saved.")


  product_data.fillna(method='ffill', inplace=True)
[I 2025-03-05 15:56:38,587] A new study created in memory with name: no-name-35d186f0-f1ce-4907-9592-cffc8856245f



=== Processing Bra Black Solid ===
🔄 Merging Google Trends data for Bra Black Solid...


[I 2025-03-05 15:56:44,078] Trial 0 finished with value: 0.004116572113707662 and parameters: {'dropout': 0.1381775997051039, 'lr': 0.00012889277554061477, 'kernel_size': 3, 'num_channels_1': 98, 'num_channels_2': 92}. Best is trial 0 with value: 0.004116572113707662.
[I 2025-03-05 15:56:48,277] Trial 1 finished with value: 0.004512680904008448 and parameters: {'dropout': 0.2635857020509152, 'lr': 1.8068649806516738e-05, 'kernel_size': 4, 'num_channels_1': 102, 'num_channels_2': 63}. Best is trial 0 with value: 0.004116572113707662.
[I 2025-03-05 15:56:52,501] Trial 2 finished with value: 0.005563615495339036 and parameters: {'dropout': 0.31082163759597703, 'lr': 1.662212616092473e-05, 'kernel_size': 3, 'num_channels_1': 75, 'num_channels_2': 66}. Best is trial 0 with value: 0.004116572113707662.
[I 2025-03-05 15:56:56,855] Trial 3 finished with value: 0.005289941001683474 and parameters: {'dropout': 0.10301841534111299, 'lr': 0.0002875747369949596, 'kernel_size': 5, 'num_channels_1': 

Best parameters for Bra Black Solid: {'dropout': 0.13721910416127028, 'lr': 0.0002926406669070098, 'kernel_size': 2, 'num_channels_1': 127, 'num_channels_2': 90}

=== Processing Dress Black Solid ===
🔄 Merging Google Trends data for Dress Black Solid...


[I 2025-03-05 15:58:46,772] Trial 0 finished with value: 0.023632198106497526 and parameters: {'dropout': 0.1738988734635309, 'lr': 1.1888405206028547e-05, 'kernel_size': 2, 'num_channels_1': 103, 'num_channels_2': 93}. Best is trial 0 with value: 0.023632198106497526.
[I 2025-03-05 15:58:51,073] Trial 1 finished with value: 0.01601804499514401 and parameters: {'dropout': 0.21001817999945752, 'lr': 7.79317838468787e-05, 'kernel_size': 2, 'num_channels_1': 65, 'num_channels_2': 70}. Best is trial 1 with value: 0.01601804499514401.
[I 2025-03-05 15:58:54,767] Trial 2 finished with value: 0.014124898007139564 and parameters: {'dropout': 0.4312713501331612, 'lr': 0.0001593311902929538, 'kernel_size': 3, 'num_channels_1': 46, 'num_channels_2': 94}. Best is trial 2 with value: 0.014124898007139564.
[I 2025-03-05 15:58:58,531] Trial 3 finished with value: 0.019917412381619214 and parameters: {'dropout': 0.14217884084879248, 'lr': 2.9071894325527927e-05, 'kernel_size': 4, 'num_channels_1': 82,

Best parameters for Dress Black Solid: {'dropout': 0.2990607957008566, 'lr': 0.0009623514951648911, 'kernel_size': 3, 'num_channels_1': 36, 'num_channels_2': 34}

=== Processing Leggings/Tights Black Solid ===
🔄 Merging Google Trends data for Leggings/Tights Black Solid...


[I 2025-03-05 16:00:32,208] Trial 0 finished with value: 0.0033781217178329827 and parameters: {'dropout': 0.3380621402688181, 'lr': 0.00013149070436603363, 'kernel_size': 5, 'num_channels_1': 95, 'num_channels_2': 32}. Best is trial 0 with value: 0.0033781217178329827.
[I 2025-03-05 16:00:35,277] Trial 1 finished with value: 0.0034444604301825167 and parameters: {'dropout': 0.49881020120828024, 'lr': 0.0005131244737393343, 'kernel_size': 4, 'num_channels_1': 38, 'num_channels_2': 57}. Best is trial 0 with value: 0.0033781217178329827.
[I 2025-03-05 16:00:38,730] Trial 2 finished with value: 0.003396551945479587 and parameters: {'dropout': 0.22004879339224145, 'lr': 0.00015618763935102992, 'kernel_size': 3, 'num_channels_1': 127, 'num_channels_2': 43}. Best is trial 0 with value: 0.0033781217178329827.
[I 2025-03-05 16:00:43,648] Trial 3 finished with value: 0.0035388207295909526 and parameters: {'dropout': 0.18679584043860933, 'lr': 4.962895278880783e-05, 'kernel_size': 5, 'num_channe

Best parameters for Leggings/Tights Black Solid: {'dropout': 0.17054121666588165, 'lr': 0.0002787730427122464, 'kernel_size': 2, 'num_channels_1': 116, 'num_channels_2': 105}

=== Processing Sweater Black Solid ===
🔄 Merging Google Trends data for Sweater Black Solid...


[I 2025-03-05 16:03:05,565] Trial 0 finished with value: 0.0032926193438470364 and parameters: {'dropout': 0.481790176387518, 'lr': 1.4665349603016886e-05, 'kernel_size': 3, 'num_channels_1': 118, 'num_channels_2': 63}. Best is trial 0 with value: 0.0032926193438470364.
[I 2025-03-05 16:03:10,647] Trial 1 finished with value: 0.0017350461333990098 and parameters: {'dropout': 0.34644154237448344, 'lr': 2.031230021156042e-05, 'kernel_size': 3, 'num_channels_1': 117, 'num_channels_2': 87}. Best is trial 1 with value: 0.0017350461333990098.
[I 2025-03-05 16:03:16,333] Trial 2 finished with value: 0.0012186000793008133 and parameters: {'dropout': 0.23860330054475454, 'lr': 0.0004303226895870442, 'kernel_size': 3, 'num_channels_1': 113, 'num_channels_2': 112}. Best is trial 2 with value: 0.0012186000793008133.
[I 2025-03-05 16:03:20,785] Trial 3 finished with value: 0.0021124499384313823 and parameters: {'dropout': 0.1857651432747381, 'lr': 2.3998686600763904e-05, 'kernel_size': 5, 'num_chan

Best parameters for Sweater Black Solid: {'dropout': 0.15808145024214382, 'lr': 0.0005935354185147409, 'kernel_size': 2, 'num_channels_1': 48, 'num_channels_2': 82}

=== Processing T-shirt Black Solid ===
🔄 Merging Google Trends data for T-shirt Black Solid...


[I 2025-03-05 16:05:22,663] Trial 0 finished with value: 0.008976623840862886 and parameters: {'dropout': 0.36377871098806414, 'lr': 1.7032665634613552e-05, 'kernel_size': 5, 'num_channels_1': 59, 'num_channels_2': 62}. Best is trial 0 with value: 0.008976623840862886.
[I 2025-03-05 16:05:28,908] Trial 1 finished with value: 0.004140580305829644 and parameters: {'dropout': 0.14833480910076965, 'lr': 3.94916188267716e-05, 'kernel_size': 5, 'num_channels_1': 98, 'num_channels_2': 111}. Best is trial 1 with value: 0.004140580305829644.
[I 2025-03-05 16:05:33,973] Trial 2 finished with value: 0.0059129650646355 and parameters: {'dropout': 0.292276383321343, 'lr': 3.1494612619812234e-05, 'kernel_size': 5, 'num_channels_1': 48, 'num_channels_2': 105}. Best is trial 1 with value: 0.004140580305829644.
[I 2025-03-05 16:05:38,015] Trial 3 finished with value: 0.004446345940232277 and parameters: {'dropout': 0.32229836753661945, 'lr': 0.0005544355326553622, 'kernel_size': 4, 'num_channels_1': 94

Best parameters for T-shirt Black Solid: {'dropout': 0.10496665587160137, 'lr': 0.0005887969769196279, 'kernel_size': 5, 'num_channels_1': 60, 'num_channels_2': 51}

=== Processing T-shirt White Solid ===
🔄 Merging Google Trends data for T-shirt White Solid...


[I 2025-03-05 16:07:22,908] Trial 0 finished with value: 0.002451904921326786 and parameters: {'dropout': 0.39410760096327013, 'lr': 0.0006830865595600765, 'kernel_size': 2, 'num_channels_1': 105, 'num_channels_2': 33}. Best is trial 0 with value: 0.002451904921326786.
[I 2025-03-05 16:07:27,328] Trial 1 finished with value: 0.00609288620762527 and parameters: {'dropout': 0.15596516065180877, 'lr': 1.6725611098627406e-05, 'kernel_size': 2, 'num_channels_1': 64, 'num_channels_2': 119}. Best is trial 0 with value: 0.002451904921326786.
[I 2025-03-05 16:07:30,502] Trial 2 finished with value: 0.006744395918212831 and parameters: {'dropout': 0.10523483123187467, 'lr': 2.176859795964763e-05, 'kernel_size': 3, 'num_channels_1': 48, 'num_channels_2': 54}. Best is trial 0 with value: 0.002451904921326786.
[I 2025-03-05 16:07:33,838] Trial 3 finished with value: 0.005294420081190765 and parameters: {'dropout': 0.3387685385903553, 'lr': 5.647906382365787e-05, 'kernel_size': 3, 'num_channels_1': 

Best parameters for T-shirt White Solid: {'dropout': 0.280211873743468, 'lr': 0.00028122753337125265, 'kernel_size': 3, 'num_channels_1': 128, 'num_channels_2': 85}

=== Processing Top Black Solid ===
🔄 Merging Google Trends data for Top Black Solid...


[I 2025-03-05 16:09:25,807] Trial 0 finished with value: 0.0019993568916106597 and parameters: {'dropout': 0.15523857813528924, 'lr': 6.0610348388241105e-05, 'kernel_size': 2, 'num_channels_1': 102, 'num_channels_2': 61}. Best is trial 0 with value: 0.0019993568916106597.
[I 2025-03-05 16:09:30,667] Trial 1 finished with value: 0.002866897126659751 and parameters: {'dropout': 0.4260348895906422, 'lr': 2.14776253590898e-05, 'kernel_size': 4, 'num_channels_1': 68, 'num_channels_2': 126}. Best is trial 0 with value: 0.0019993568916106597.
[I 2025-03-05 16:09:33,713] Trial 2 finished with value: 0.0029274300439283253 and parameters: {'dropout': 0.38631034473070214, 'lr': 3.5293810401399296e-05, 'kernel_size': 3, 'num_channels_1': 62, 'num_channels_2': 61}. Best is trial 0 with value: 0.0019993568916106597.
[I 2025-03-05 16:09:37,643] Trial 3 finished with value: 0.0034405728336423637 and parameters: {'dropout': 0.3401973862833941, 'lr': 1.820127288795487e-05, 'kernel_size': 3, 'num_channel

Best parameters for Top Black Solid: {'dropout': 0.2468132197441826, 'lr': 0.0005973814223593947, 'kernel_size': 2, 'num_channels_1': 110, 'num_channels_2': 87}

=== Processing Trousers Black Solid ===
🔄 Merging Google Trends data for Trousers Black Solid...


[I 2025-03-05 16:11:38,859] Trial 0 finished with value: 0.0016238623007666319 and parameters: {'dropout': 0.4356264244441783, 'lr': 1.7852627892922654e-05, 'kernel_size': 2, 'num_channels_1': 126, 'num_channels_2': 42}. Best is trial 0 with value: 0.0016238623007666319.
[I 2025-03-05 16:11:42,785] Trial 1 finished with value: 0.0012555268243886531 and parameters: {'dropout': 0.3152339291665508, 'lr': 0.00027956767316223693, 'kernel_size': 3, 'num_channels_1': 80, 'num_channels_2': 78}. Best is trial 1 with value: 0.0012555268243886531.
[I 2025-03-05 16:11:47,486] Trial 2 finished with value: 0.0022070227714721113 and parameters: {'dropout': 0.3495200231221467, 'lr': 1.1122176346744802e-05, 'kernel_size': 3, 'num_channels_1': 81, 'num_channels_2': 109}. Best is trial 1 with value: 0.0012555268243886531.
[I 2025-03-05 16:11:51,662] Trial 3 finished with value: 0.0015955834067426621 and parameters: {'dropout': 0.1851716595181009, 'lr': 5.951797593605201e-05, 'kernel_size': 4, 'num_channe

Best parameters for Trousers Black Solid: {'dropout': 0.1719881084700191, 'lr': 0.00030653747467052246, 'kernel_size': 3, 'num_channels_1': 64, 'num_channels_2': 76}

=== Processing Trousers Blue Denim ===
🔄 Merging Google Trends data for Trousers Blue Denim...


[I 2025-03-05 16:13:31,850] Trial 0 finished with value: 0.003875650046393275 and parameters: {'dropout': 0.19264658648690602, 'lr': 0.0002014243438665461, 'kernel_size': 5, 'num_channels_1': 66, 'num_channels_2': 121}. Best is trial 0 with value: 0.003875650046393275.
[I 2025-03-05 16:13:37,740] Trial 1 finished with value: 0.004417493799701333 and parameters: {'dropout': 0.4433308238044088, 'lr': 1.631267503134161e-05, 'kernel_size': 5, 'num_channels_1': 118, 'num_channels_2': 119}. Best is trial 0 with value: 0.003875650046393275.
[I 2025-03-05 16:13:41,444] Trial 2 finished with value: 0.0036088132299482824 and parameters: {'dropout': 0.2894757402557753, 'lr': 0.0002042442261173503, 'kernel_size': 2, 'num_channels_1': 73, 'num_channels_2': 74}. Best is trial 2 with value: 0.0036088132299482824.
[I 2025-03-05 16:13:45,750] Trial 3 finished with value: 0.004081763979047537 and parameters: {'dropout': 0.26325757335067757, 'lr': 0.0003675039650574493, 'kernel_size': 5, 'num_channels_1'

Best parameters for Trousers Blue Denim: {'dropout': 0.285029565834012, 'lr': 0.00022808708617784595, 'kernel_size': 2, 'num_channels_1': 74, 'num_channels_2': 79}

=== Processing Vest top Black Solid ===
🔄 Merging Google Trends data for Vest top Black Solid...


[I 2025-03-05 16:15:50,424] Trial 0 finished with value: 0.005543003417551517 and parameters: {'dropout': 0.23490553642845918, 'lr': 6.0216905788401526e-05, 'kernel_size': 2, 'num_channels_1': 100, 'num_channels_2': 112}. Best is trial 0 with value: 0.005543003417551517.
[I 2025-03-05 16:15:54,118] Trial 1 finished with value: 0.012048532068729401 and parameters: {'dropout': 0.43457733044232805, 'lr': 2.9706778853279197e-05, 'kernel_size': 2, 'num_channels_1': 49, 'num_channels_2': 97}. Best is trial 0 with value: 0.005543003417551517.
[I 2025-03-05 16:16:00,185] Trial 2 finished with value: 0.003604222519788891 and parameters: {'dropout': 0.48039440145201895, 'lr': 0.00031298508968597176, 'kernel_size': 5, 'num_channels_1': 107, 'num_channels_2': 128}. Best is trial 2 with value: 0.003604222519788891.
[I 2025-03-05 16:16:03,493] Trial 3 finished with value: 0.010892627015709876 and parameters: {'dropout': 0.3638158084324513, 'lr': 1.9186972290596043e-05, 'kernel_size': 4, 'num_channel

Best parameters for Vest top Black Solid: {'dropout': 0.2071217321162468, 'lr': 0.0006427005870557553, 'kernel_size': 2, 'num_channels_1': 122, 'num_channels_2': 90}

✅ All Google Trends TCN processing with Optuna tuning completed. Results saved.


In [None]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import optuna
from modules.utils import process_name

# Output directory
output_dir = "final_version/output/google_trends/1_day/tcn"
os.makedirs(output_dir, exist_ok=True)

def objective_fn(trial, train_loader, val_loader, num_features):
    """
    Optuna objective function for hyperparameter tuning.
    """
    dropout = trial.suggest_float("dropout", 0.1, 0.5)
    lr = trial.suggest_float("lr", 1e-5, 1e-3, log=True)
    kernel_size = trial.suggest_int("kernel_size", 2, 5)
    num_channels = [trial.suggest_int("num_channels_1", 32, 128), trial.suggest_int("num_channels_2", 32, 128)]
    
    model = TCNForecastingModel(
        num_inputs=num_features,
        num_channels=num_channels,
        output_size=1,
        kernel_size=kernel_size,
        dropout=dropout
    )
    
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    model.train()
    for epoch in range(20):  # Reduced epochs for tuning
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            preds = model(X_batch)
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()
    
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            preds = model(X_batch)
            loss = criterion(preds, y_batch)
            val_loss += loss.item()
    val_loss /= len(val_loader)
    
    return val_loss

# Run Optuna for all product groups
for product_group in top_10_groups:
    print(f"\n=== Processing {product_group} ===")
    sanitized_group = process_name(product_group)
    group_output_dir = os.path.join(output_dir, sanitized_group)
    os.makedirs(group_output_dir, exist_ok=True)

    # Load best hyperparameters
    best_params_path = os.path.join(group_output_dir, "best_hyperparameters.csv")
    if not os.path.exists(best_params_path):
        print(f"Skipping {product_group}, no best hyperparameters found.")
        continue

    best_params = pd.read_csv(best_params_path).iloc[0].to_dict()

    # Train model with best hyperparameters
    model = TCNForecastingModel(
        num_inputs=X_train.shape[1] + 1,
        num_channels=[int(best_params['num_channels_1']), int(best_params['num_channels_2'])],
        output_size=1,
        kernel_size=int(best_params['kernel_size']),
        dropout=best_params['dropout']
    )

    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=best_params['lr'])
    
    model.train()
    for epoch in range(50):
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            preds = model(X_batch)
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()
    
    # Save trained model
    torch.save(model.state_dict(), os.path.join(group_output_dir, "best_model.pth"))
    
    # Evaluate the model
    model.eval()
    all_preds, all_targets = [], []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            preds = model(X_batch)
            all_preds.append(preds.cpu().numpy())
            all_targets.append(y_batch.cpu().numpy())

    all_preds = np.concatenate(all_preds, axis=0)
    all_targets = np.concatenate(all_targets, axis=0)

    # Compute metrics
    preds_unscaled = target_scaler.inverse_transform(all_preds)
    targets_unscaled = target_scaler.inverse_transform(all_targets)

    mae = mean_absolute_error(targets_unscaled, preds_unscaled)
    rmse = np.sqrt(mean_squared_error(targets_unscaled, preds_unscaled))
    mape = np.mean(np.abs((targets_unscaled - preds_unscaled) / np.maximum(targets_unscaled, 1))) * 100
    r2 = r2_score(targets_unscaled, preds_unscaled)

    # Save metrics
    metrics = {'MAE': mae, 'RMSE': rmse, 'MAPE': mape, 'R2': r2}
    pd.DataFrame([metrics]).to_csv(os.path.join(group_output_dir, "test_metrics.csv"), index=False)

    # Save Predictions vs Actual Graph
    plt.figure(figsize=(12, 6))
    plt.plot(targets_unscaled, label="Actual")
    plt.plot(preds_unscaled, label="Predicted", linestyle='--')
    plt.xlabel("Time Step")
    plt.ylabel("Transaction Count")
    plt.title(f"TCN Predictions vs Actual for {product_group}")
    plt.legend()
    plt.savefig(os.path.join(group_output_dir, "predictions_vs_actual.png"))
    plt.close()


=== Processing Bra Black Solid ===

=== Processing Dress Black Solid ===

=== Processing Leggings/Tights Black Solid ===

=== Processing Sweater Black Solid ===

=== Processing T-shirt Black Solid ===

=== Processing T-shirt White Solid ===

=== Processing Top Black Solid ===

=== Processing Trousers Black Solid ===

=== Processing Trousers Blue Denim ===

=== Processing Vest top Black Solid ===

✅ All Google Trends TCN processing with Optuna tuning, best model training, evaluation, and summary completed. Results saved.


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [12]:
import os
import pandas as pd

output_dir = "final_version/output/google_trends/1_day/tcn"

metrics_summary = []

# Gather metrics from each product group's folder
for product_group in top_10_groups:
    sanitized_group = process_name(product_group)
    group_output_dir = os.path.join(output_dir, sanitized_group)
    metrics_file = os.path.join(group_output_dir, "test_metrics.csv")
    
    if os.path.exists(metrics_file):
        df = pd.read_csv(metrics_file)
        # Assume test_metrics.csv has columns: MAE, RMSE, MAPE, R2
        # and just one row of metrics
        row = df.iloc[0].to_dict()
        row["Product Group"] = product_group
        metrics_summary.append(row)
    else:
        print(f"Warning: No metrics file found for {product_group}")

# Convert to a DataFrame
summary_df = pd.DataFrame(metrics_summary)

# Reorder columns for clarity
cols_order = ["Product Group", "MAE", "RMSE", "MAPE", "R2"]
summary_df = summary_df[cols_order]

# Save the summary of all product groups
summary_file = os.path.join(output_dir, "final_metrics_summary.csv")
summary_df.to_csv(summary_file, index=False)

# Compute and save average metrics across all products
avg_metrics = summary_df[["MAE", "RMSE", "MAPE", "R2"]].mean()
avg_metrics_df = pd.DataFrame([avg_metrics])
avg_metrics_file = os.path.join(output_dir, "final_test_avg_metrics.csv")
avg_metrics_df.to_csv(avg_metrics_file, index=False)

print(f"Summary metrics saved to: {summary_file}")
print(f"Average metrics saved to: {avg_metrics_file}")


Summary metrics saved to: final_version/output/google_trends/1_day/tcn/final_metrics_summary.csv
Average metrics saved to: final_version/output/google_trends/1_day/tcn/final_test_avg_metrics.csv


## Macroeconomic Data

In [17]:
consumer_sentiment = pd.read_csv('data/external/consumer_sentiment.csv')
consumer_sentiment['DATE'] = pd.to_datetime(consumer_sentiment['DATE'])
consumer_sentiment.set_index('DATE', inplace=True)
consumer_sentiment = consumer_sentiment.resample('D').ffill()

cpi = pd.read_csv('data/external/cpi_data.csv')
cpi['DATE'] = pd.to_datetime(cpi['DATE'])
cpi.set_index('DATE', inplace=True)
cpi = cpi.resample('D').ffill()

gdp = pd.read_csv('data/external/gdp_data.csv')
gdp['DATE'] = pd.to_datetime(gdp['DATE'])
gdp.set_index('DATE', inplace=True)
gdp = gdp.resample('D').ffill()

unemployment = pd.read_csv('data/external/unemployment_data.csv')
unemployment['DATE'] = pd.to_datetime(unemployment['DATE'])
unemployment.set_index('DATE', inplace=True)
unemployment = unemployment.resample('D').ffill()

In [18]:
import pandas as pd

data = pd.read_csv('data/top_10_product_groups.csv')
data['date'] = pd.to_datetime(data['date'], errors='coerce')
data.columns

Index(['date', 'product_group', 'transaction_count', 'avg_price',
       'sales_channel', 'unique_customers', 'unique_articles_sold',
       'median_age', 'fashion_news_subscribers', 'first_purchase_days_ago',
       'recent_purchase_days_ago', 'age_bin_10-19', 'age_bin_20-29',
       'age_bin_30-39', 'age_bin_40-49', 'age_bin_50-59', 'age_bin_60+'],
      dtype='object')

In [146]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from modules.utils import process_name

# Output directory
output_dir = "final_version/output/macroeconomic/cpi/tcn"
os.makedirs(output_dir, exist_ok=True)

all_groups_results = {}

for product_group in top_10_groups:
    print(f"\n=== Processing {product_group} ===")
    sanitized_group = process_name(product_group)
    group_output_dir = os.path.join(output_dir, sanitized_group)
    os.makedirs(group_output_dir, exist_ok=True)

    # 1. Filter data for this product group
    product_data = data[data['product_group'] == product_group].copy()
    if product_data.empty:
        print(f"No data for {product_group}, skipping.")
        continue

    # Ensure date is the index
    product_data['date'] = pd.to_datetime(product_data['date'], errors='coerce')
    product_data.set_index('date', inplace=True)
    product_data = product_data.asfreq('D').fillna(0)

    # 2. Merge Macroeconomic Data (Consumer Sentiment)
    print(f"🔄 Merging macroeconomic data for {product_group}...")
    product_data = product_data.join(cpi[['USACP030000CTGYM']], how='left')
    product_data.fillna(method='ffill', inplace=True)  # Forward fill missing values

    # 3. Drop unwanted columns
    drop_cols = ['product_group', 'product_type_name', 'colour_group_name', 'graphical_appearance_name']
    product_data.drop(columns=[c for c in drop_cols if c in product_data.columns], inplace=True, errors='ignore')

    if 'transaction_count' not in product_data.columns:
        print(f"No transaction_count in {product_group}, skipping.")
        continue

    # 4. Train/Validation Split (80/20)
    total_len = len(product_data)
    if total_len < 50:
        print(f"Not enough data for {product_group}, skipping.")
        continue
    split_idx = int(0.8 * total_len)
    train_df = product_data.iloc[:split_idx].copy()
    val_df = product_data.iloc[split_idx:].copy()

    # 5. Scale features & target
    feature_scaler = MinMaxScaler()
    target_scaler = MinMaxScaler()

    X_train = train_df.drop(columns=['transaction_count'])
    y_train = train_df[['transaction_count']]

    X_val = val_df.drop(columns=['transaction_count'])
    y_val = val_df[['transaction_count']]

    X_train_scaled = feature_scaler.fit_transform(X_train)
    y_train_scaled = target_scaler.fit_transform(y_train)

    X_val_scaled = feature_scaler.transform(X_val)
    y_val_scaled = target_scaler.transform(y_val)

    train_df_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns, index=X_train.index)
    train_df_scaled['transaction_count'] = y_train_scaled

    val_df_scaled = pd.DataFrame(X_val_scaled, columns=X_val.columns, index=X_val.index)
    val_df_scaled['transaction_count'] = y_val_scaled

    # 6. Create TimeSeriesDataset
    input_window, output_window = 14, 1
    train_dataset = TimeSeriesDataset(
        train_df_scaled,
        input_window=input_window,
        output_window=output_window,
        target_col_name='transaction_count'
    )
    val_dataset = TimeSeriesDataset(
        val_df_scaled,
        input_window=input_window,
        output_window=output_window,
        target_col_name='transaction_count'
    )

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # 7. Build TCN
    num_features = X_train.shape[1] + 1 
    model = TCNForecastingModel(
        num_inputs=num_features,
        num_channels=[64, 64],
        output_size=1,
        kernel_size=3,
        dropout=0.2
    )

    # 8. Train TCN
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    epochs = 50

    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            preds = model(X_batch)
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss /= len(train_loader)

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                preds = model(X_batch)
                loss = criterion(preds, y_batch)
                val_loss += loss.item()
        val_loss /= len(val_loader)

        if (epoch + 1) % 10 == 0:
            print(f"{product_group} | Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.6f}, Val Loss: {val_loss:.6f}")

    # 9. Evaluate on Validation
    model.eval()
    all_preds, all_targets = [], []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            preds = model(X_batch)
            all_preds.append(preds.cpu().numpy())
            all_targets.append(y_batch.cpu().numpy())

    all_preds = np.concatenate(all_preds, axis=0)
    all_targets = np.concatenate(all_targets, axis=0)

    # Invert scaling
    preds_unscaled = target_scaler.inverse_transform(all_preds)
    targets_unscaled = target_scaler.inverse_transform(all_targets)

    mae = mean_absolute_error(targets_unscaled, preds_unscaled)
    rmse = np.sqrt(mean_squared_error(targets_unscaled, preds_unscaled))
    mape = np.mean(np.abs((targets_unscaled - preds_unscaled) / np.maximum(targets_unscaled, 1))) * 100
    r2 = r2_score(targets_unscaled, preds_unscaled)

    all_groups_results[product_group] = {'MAE': mae, 'RMSE': rmse, 'MAPE': mape, 'R2': r2}

    print(f"{product_group} => MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.2f}%, R2: {r2:.4f}")

    # Save Predictions vs Actual Graph
    plt.figure(figsize=(12, 6))
    plt.plot(targets_unscaled, label="Actual")
    plt.plot(preds_unscaled, label="Predicted", linestyle='--')
    plt.xlabel("Time Step")
    plt.ylabel("Transaction Count")
    plt.title(f"TCN Predictions vs Actual with CPI for {product_group}")
    plt.legend()
    plt.savefig(os.path.join(group_output_dir, "predictions_vs_actual.png"))
    plt.close()

# Save summary
summary_df = pd.DataFrame.from_dict(all_groups_results, orient='index')
summary_df.to_csv(os.path.join(output_dir, "metrics_summary.csv"))

# Compute and save average metrics
avg_metrics = {
    'MAE': np.mean([all_groups_results[pg]['MAE'] for pg in all_groups_results]),
    'RMSE': np.mean([all_groups_results[pg]['RMSE'] for pg in all_groups_results]),
    'MAPE': np.mean([all_groups_results[pg]['MAPE'] for pg in all_groups_results]),
    'R2': np.mean([all_groups_results[pg]['R2'] for pg in all_groups_results])
}

avg_metrics_df = pd.DataFrame([avg_metrics])
avg_metrics_df.to_csv(os.path.join(output_dir, "avg_metrics.csv"), index=False)


print("\n✅ All TCN processing completed. Results saved.")



=== Processing Bra Black Solid ===
🔄 Merging macroeconomic data for Bra Black Solid...
Bra Black Solid | Epoch 10/50, Train Loss: 0.007396, Val Loss: 0.004328
Bra Black Solid | Epoch 20/50, Train Loss: 0.006508, Val Loss: 0.005507
Bra Black Solid | Epoch 30/50, Train Loss: 0.006304, Val Loss: 0.006876
Bra Black Solid | Epoch 40/50, Train Loss: 0.006162, Val Loss: 0.007880
Bra Black Solid | Epoch 50/50, Train Loss: 0.005928, Val Loss: 0.008154
Bra Black Solid => MAE: 218.5691, RMSE: 249.5633, MAPE: 43.33%, R2: -0.5764

=== Processing Dress Black Solid ===
🔄 Merging macroeconomic data for Dress Black Solid...
Dress Black Solid | Epoch 10/50, Train Loss: 0.016218, Val Loss: 0.017863
Dress Black Solid | Epoch 20/50, Train Loss: 0.013352, Val Loss: 0.013471
Dress Black Solid | Epoch 30/50, Train Loss: 0.012026, Val Loss: 0.013320
Dress Black Solid | Epoch 40/50, Train Loss: 0.011832, Val Loss: 0.012571
Dress Black Solid | Epoch 50/50, Train Loss: 0.010317, Val Loss: 0.011828
Dress Black So

In [35]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from modules.utils import process_name, load_best_hyperparameters

# Output directory
output_dir = "final_version/output/macroeconomic/cpi/tcn"

# Initialize results dictionary
all_groups_results = {}

# Train and evaluate using best hyperparameters
for product_group in top_10_groups:
    print(f"\n=== Training with best hyperparameters for {product_group} ===")
    sanitized_group = process_name(product_group)
    group_output_dir = os.path.join(output_dir, sanitized_group)
    os.makedirs(group_output_dir, exist_ok=True)

    # Load best hyperparameters
    best_params = load_best_hyperparameters(group_output_dir)
    if best_params is None:
        print(f"Skipping {product_group}, no best hyperparameters found.")
        continue

    # Load product data
    product_data = data[data['product_group'] == product_group].copy()
    if product_data.empty:
        print(f"Skipping {product_group}, no data available.")
        continue

    # Ensure date is the index
    product_data['date'] = pd.to_datetime(product_data['date'], errors='coerce')
    product_data.set_index('date', inplace=True)
    product_data = product_data.asfreq('D').fillna(0)

    # Train-validation split
    total_len = len(product_data)
    if total_len < 50:  # Ensure minimum required data points
        print(f"Skipping {product_group}, not enough data points.")
        continue
    
    split_idx = int(0.8 * total_len)
    train_df = product_data.iloc[:split_idx].copy()
    val_df = product_data.iloc[split_idx:].copy()

    # Ensure train_df is not empty
    if train_df.empty:
        print(f"Skipping {product_group}, train dataset is empty.")
        continue

    # Scale features and target
    feature_scaler = MinMaxScaler()
    target_scaler = MinMaxScaler()

    X_train = train_df.drop(columns=['transaction_count'], errors='ignore')
    y_train = train_df[['transaction_count']]
    X_val = val_df.drop(columns=['transaction_count'], errors='ignore')
    y_val = val_df[['transaction_count']]

    # Ensure X_train is not empty before scaling
    if X_train.shape[0] == 0:
        print(f"Skipping {product_group}, no training samples found after filtering.")
        continue

    X_train_scaled = feature_scaler.fit_transform(X_train)
    y_train_scaled = target_scaler.fit_transform(y_train)
    X_val_scaled = feature_scaler.transform(X_val)
    y_val_scaled = target_scaler.transform(y_val)

    train_df_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns, index=X_train.index)
    train_df_scaled['transaction_count'] = y_train_scaled
    val_df_scaled = pd.DataFrame(X_val_scaled, columns=X_val.columns, index=X_val.index)
    val_df_scaled['transaction_count'] = y_val_scaled

    # Create dataset
    train_dataset = TimeSeriesDataset(
        train_df_scaled, input_window=14, output_window=1, target_col_name='transaction_count'
    )
    val_dataset = TimeSeriesDataset(
        val_df_scaled, input_window=14, output_window=1, target_col_name='transaction_count'
    )

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Build model using best hyperparameters
    model = TCNForecastingModel(
        num_inputs=X_train.shape[1] + 1,
        num_channels=[int(best_params['num_channels_1']), int(best_params['num_channels_2'])],
        output_size=1,
        kernel_size=int(best_params['kernel_size']),
        dropout=best_params['dropout']
    )

    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=best_params['lr'])

    # Train the model
    model.train()
    for epoch in range(50):
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            preds = model(X_batch.permute(0, 2, 1))  # Ensure correct shape
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()

    # Evaluate model
    model.eval()
    all_preds, all_targets = [], []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            preds = model(X_batch.permute(0, 2, 1))  # Ensure correct shape
            all_preds.append(preds.cpu().numpy())
            all_targets.append(y_batch.cpu().numpy())

    all_preds = np.concatenate(all_preds, axis=0)
    all_targets = np.concatenate(all_targets, axis=0)

    # Invert scaling
    preds_unscaled = target_scaler.inverse_transform(all_preds)
    targets_unscaled = target_scaler.inverse_transform(all_targets)

    # Compute metrics
    mae = mean_absolute_error(targets_unscaled, preds_unscaled)
    rmse = np.sqrt(mean_squared_error(targets_unscaled, preds_unscaled))
    mape = np.mean(np.abs((targets_unscaled - preds_unscaled) / np.maximum(targets_unscaled, 1))) * 100
    r2 = r2_score(targets_unscaled, preds_unscaled)

    all_groups_results[product_group] = {'MAE': mae, 'RMSE': rmse, 'MAPE': mape, 'R2': r2}

    print(f"{product_group} => MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.2f}%, R2: {r2:.4f}")

    # Save Predictions vs Actual Graph
    plt.figure(figsize=(12, 6))
    plt.plot(targets_unscaled, label="Actual")
    plt.plot(preds_unscaled, label="Predicted", linestyle='--')
    plt.xlabel("Time Step")
    plt.ylabel("Transaction Count")
    plt.title(f"TCN Predictions vs Actual with CPI for {product_group}")
    plt.legend()
    plt.savefig(os.path.join(group_output_dir, "predictions_vs_actual.png"))
    plt.close()

# Save summary and average metrics
summary_df = pd.DataFrame.from_dict(all_groups_results, orient='index')
summary_df.to_csv(os.path.join(output_dir, "tcn_finetuned_summary.csv"))

# Compute and save average metrics
avg_metrics = {
    'MAE': np.mean([all_groups_results[pg]['MAE'] for pg in all_groups_results]),
    'RMSE': np.mean([all_groups_results[pg]['RMSE'] for pg in all_groups_results]),
    'MAPE': np.mean([all_groups_results[pg]['MAPE'] for pg in all_groups_results]),
    'R2': np.mean([all_groups_results[pg]['R2'] for pg in all_groups_results])
}

avg_metrics_df = pd.DataFrame([avg_metrics])
avg_metrics_df.to_csv(os.path.join(output_dir, "avg_finetuned_metrics.csv"), index=False)

print("\n✅ Final TCN training with best hyperparameters completed. Metrics and predictions saved.")


ImportError: cannot import name 'load_best_hyperparameters' from 'modules.utils' (/Users/megan/Thesis/modules/utils.py)

In [32]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from modules.utils import process_name

# Output directory
output_dir = "final_version/output/macroeconomic/cpi/tcn"

def load_best_hyperparameters(group_output_dir):
    best_params_path = os.path.join(group_output_dir, "best_hyperparameters.csv")
    if not os.path.exists(best_params_path):
        return None
    return pd.read_csv(best_params_path).iloc[0].to_dict()

all_groups_results = {}

# Train and evaluate using best hyperparameters
for product_group in os.listdir(output_dir):
    group_output_dir = os.path.join(output_dir, product_group)
    if not os.path.isdir(group_output_dir):
        continue
    
    print(f"\n=== Training with best hyperparameters for {product_group} ===")
    best_params = load_best_hyperparameters(group_output_dir)
    if best_params is None:
        print(f"Skipping {product_group}, no best hyperparameters found.")
        continue

    # Load product data
    product_data = data[data['product_group'] == product_group].copy()
    
    # Train-validation split
    split_idx = int(0.8 * len(product_data))
    train_df = product_data.iloc[:split_idx].copy()
    val_df = product_data.iloc[split_idx:].copy()
    
    # Scale features and target
    feature_scaler = MinMaxScaler()
    target_scaler = MinMaxScaler()
    
    X_train = train_df.drop(columns=['transaction_count'])
    y_train = train_df[['transaction_count']]
    X_val = val_df.drop(columns=['transaction_count'])
    y_val = val_df[['transaction_count']]
    
    X_train_scaled = feature_scaler.fit_transform(X_train)
    y_train_scaled = target_scaler.fit_transform(y_train)
    X_val_scaled = feature_scaler.transform(X_val)
    y_val_scaled = target_scaler.transform(y_val)
    
    train_df_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns, index=X_train.index)
    train_df_scaled['transaction_count'] = y_train_scaled
    val_df_scaled = pd.DataFrame(X_val_scaled, columns=X_val.columns, index=X_val.index)
    val_df_scaled['transaction_count'] = y_val_scaled
    
    # Create dataset
    input_window, output_window = 14, 1
    train_dataset = TimeSeriesDataset(train_df_scaled, input_window, output_window, 'transaction_count')
    val_dataset = TimeSeriesDataset(val_df_scaled, input_window, output_window, 'transaction_count')
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
    
    # Load best model
    model = TCNForecastingModel(
        num_inputs=X_train.shape[1] + 1,
        num_channels=[int(best_params['num_channels_1']), int(best_params['num_channels_2'])],
        output_size=1,
        kernel_size=int(best_params['kernel_size']),
        dropout=best_params['dropout']
    )
    
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=best_params['lr'])
    
    # Train the model
    model.train()
    for epoch in range(50):
        train_loss = 0.0
        for X_batch, y_batch in train_loader:
            X_batch = X_batch.permute(0, 2, 1)
            optimizer.zero_grad()
            preds = model(X_batch)
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss /= len(train_loader)
        if (epoch + 1) % 10 == 0:
            print(f"Epoch {epoch+1}/50, Train Loss: {train_loss:.6f}")
    
    # Evaluate model
    model.eval()
    all_preds, all_targets = [], []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch = X_batch.permute(0, 2, 1)
            preds = model(X_batch)
            all_preds.append(preds.cpu().numpy())
            all_targets.append(y_batch.cpu().numpy())
    
    all_preds = np.concatenate(all_preds, axis=0)
    all_targets = np.concatenate(all_targets, axis=0)
    preds_unscaled = target_scaler.inverse_transform(all_preds)
    targets_unscaled = target_scaler.inverse_transform(all_targets)
    
    # Compute metrics
    mae = mean_absolute_error(targets_unscaled, preds_unscaled)
    rmse = np.sqrt(mean_squared_error(targets_unscaled, preds_unscaled))
    mape = np.mean(np.abs((targets_unscaled - preds_unscaled) / np.maximum(targets_unscaled, 1))) * 100
    r2 = r2_score(targets_unscaled, preds_unscaled)
    
    all_groups_results[product_group] = {'MAE': mae, 'RMSE': rmse, 'MAPE': mape, 'R2': r2}
    
    print(f"{product_group} => MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.2f}%, R2: {r2:.4f}")
    
    # Save Predictions vs Actual Graph
    plt.figure(figsize=(12, 6))
    plt.plot(targets_unscaled, label="Actual")
    plt.plot(preds_unscaled, label="Predicted", linestyle='--')
    plt.xlabel("Time Step")
    plt.ylabel("Transaction Count")
    plt.title(f"TCN Predictions vs Actual with CPI for {product_group}")
    plt.legend()
    plt.savefig(os.path.join(group_output_dir, "predictions_vs_actual.png"))
    plt.close()

# Save summary and average metrics
summary_df = pd.DataFrame.from_dict(all_groups_results, orient='index')
summary_df.to_csv(os.path.join(output_dir, "tcn_trends_summary.csv"))

avg_metrics = {
    'MAE': np.mean([all_groups_results[pg]['MAE'] for pg in all_groups_results]),
    'RMSE': np.mean([all_groups_results[pg]['RMSE'] for pg in all_groups_results]),
    'MAPE': np.mean([all_groups_results[pg]['MAPE'] for pg in all_groups_results]),
    'R2': np.mean([all_groups_results[pg]['R2'] for pg in all_groups_results])
}

avg_metrics_df = pd.DataFrame([avg_metrics])
avg_metrics_df.to_csv(os.path.join(output_dir, "avg_metrics.csv"), index=False)

print("\n✅ Training with best hyperparameters completed. Metrics and predictions saved.")



=== Training with best hyperparameters for Trousers_Black_Solid ===


ValueError: Found array with 0 sample(s) (shape=(0, 16)) while a minimum of 1 is required by MinMaxScaler.

In [147]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from modules.utils import process_name

# Output directory
output_dir = "final_version/output/macroeconomic/gdp/tcn"
os.makedirs(output_dir, exist_ok=True)

all_groups_results = {}

for product_group in top_10_groups:
    print(f"\n=== Processing {product_group} ===")
    sanitized_group = process_name(product_group)
    group_output_dir = os.path.join(output_dir, sanitized_group)
    os.makedirs(group_output_dir, exist_ok=True)

    # 1. Filter data for this product group
    product_data = data[data['product_group'] == product_group].copy()
    if product_data.empty:
        print(f"No data for {product_group}, skipping.")
        continue

    # Ensure date is the index
    product_data['date'] = pd.to_datetime(product_data['date'], errors='coerce')
    product_data.set_index('date', inplace=True)
    product_data = product_data.asfreq('D').fillna(0)

    # 2. Merge Macroeconomic Data (Consumer Sentiment)
    print(f"🔄 Merging macroeconomic data for {product_group}...")
    product_data = product_data.join(gdp[['GDP']], how='left')
    product_data.fillna(method='ffill', inplace=True)  # Forward fill missing values

    # 3. Drop unwanted columns
    drop_cols = ['product_group', 'product_type_name', 'colour_group_name', 'graphical_appearance_name']
    product_data.drop(columns=[c for c in drop_cols if c in product_data.columns], inplace=True, errors='ignore')

    if 'transaction_count' not in product_data.columns:
        print(f"No transaction_count in {product_group}, skipping.")
        continue

    # 4. Train/Validation Split (80/20)
    total_len = len(product_data)
    if total_len < 50:
        print(f"Not enough data for {product_group}, skipping.")
        continue
    split_idx = int(0.8 * total_len)
    train_df = product_data.iloc[:split_idx].copy()
    val_df = product_data.iloc[split_idx:].copy()

    # 5. Scale features & target
    feature_scaler = MinMaxScaler()
    target_scaler = MinMaxScaler()

    X_train = train_df.drop(columns=['transaction_count'])
    y_train = train_df[['transaction_count']]

    X_val = val_df.drop(columns=['transaction_count'])
    y_val = val_df[['transaction_count']]

    X_train_scaled = feature_scaler.fit_transform(X_train)
    y_train_scaled = target_scaler.fit_transform(y_train)

    X_val_scaled = feature_scaler.transform(X_val)
    y_val_scaled = target_scaler.transform(y_val)

    train_df_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns, index=X_train.index)
    train_df_scaled['transaction_count'] = y_train_scaled

    val_df_scaled = pd.DataFrame(X_val_scaled, columns=X_val.columns, index=X_val.index)
    val_df_scaled['transaction_count'] = y_val_scaled

    # 6. Create TimeSeriesDataset
    input_window, output_window = 14, 1
    train_dataset = TimeSeriesDataset(
        train_df_scaled,
        input_window=input_window,
        output_window=output_window,
        target_col_name='transaction_count'
    )
    val_dataset = TimeSeriesDataset(
        val_df_scaled,
        input_window=input_window,
        output_window=output_window,
        target_col_name='transaction_count'
    )

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # 7. Build TCN
    num_features = X_train.shape[1] + 1 
    model = TCNForecastingModel(
        num_inputs=num_features,
        num_channels=[64, 64],
        output_size=1,
        kernel_size=3,
        dropout=0.2
    )

    # 8. Train TCN
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    epochs = 50

    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            preds = model(X_batch)
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss /= len(train_loader)

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                preds = model(X_batch)
                loss = criterion(preds, y_batch)
                val_loss += loss.item()
        val_loss /= len(val_loader)

        if (epoch + 1) % 10 == 0:
            print(f"{product_group} | Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.6f}, Val Loss: {val_loss:.6f}")

    # 9. Evaluate on Validation
    model.eval()
    all_preds, all_targets = [], []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            preds = model(X_batch)
            all_preds.append(preds.cpu().numpy())
            all_targets.append(y_batch.cpu().numpy())

    all_preds = np.concatenate(all_preds, axis=0)
    all_targets = np.concatenate(all_targets, axis=0)

    # Invert scaling
    preds_unscaled = target_scaler.inverse_transform(all_preds)
    targets_unscaled = target_scaler.inverse_transform(all_targets)

    mae = mean_absolute_error(targets_unscaled, preds_unscaled)
    rmse = np.sqrt(mean_squared_error(targets_unscaled, preds_unscaled))
    mape = np.mean(np.abs((targets_unscaled - preds_unscaled) / np.maximum(targets_unscaled, 1))) * 100
    r2 = r2_score(targets_unscaled, preds_unscaled)

    all_groups_results[product_group] = {'MAE': mae, 'RMSE': rmse, 'MAPE': mape, 'R2': r2}

    print(f"{product_group} => MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.2f}%, R2: {r2:.4f}")

    # Save Predictions vs Actual Graph
    plt.figure(figsize=(12, 6))
    plt.plot(targets_unscaled, label="Actual")
    plt.plot(preds_unscaled, label="Predicted", linestyle='--')
    plt.xlabel("Time Step")
    plt.ylabel("Transaction Count")
    plt.title(f"TCN Predictions vs Actual with GDP for {product_group}")
    plt.legend()
    plt.savefig(os.path.join(group_output_dir, "predictions_vs_actual.png"))
    plt.close()

# Save summary
summary_df = pd.DataFrame.from_dict(all_groups_results, orient='index')
summary_df.to_csv(os.path.join(output_dir, "metrics_summary.csv"))

# Compute and save average metrics
avg_metrics = {
    'MAE': np.mean([all_groups_results[pg]['MAE'] for pg in all_groups_results]),
    'RMSE': np.mean([all_groups_results[pg]['RMSE'] for pg in all_groups_results]),
    'MAPE': np.mean([all_groups_results[pg]['MAPE'] for pg in all_groups_results]),
    'R2': np.mean([all_groups_results[pg]['R2'] for pg in all_groups_results])
}

avg_metrics_df = pd.DataFrame([avg_metrics])
avg_metrics_df.to_csv(os.path.join(output_dir, "avg_metrics.csv"), index=False)


print("\n✅ All TCN processing completed. Results saved.")



=== Processing Bra Black Solid ===
🔄 Merging macroeconomic data for Bra Black Solid...
Bra Black Solid | Epoch 10/50, Train Loss: 0.007562, Val Loss: 0.004630
Bra Black Solid | Epoch 20/50, Train Loss: 0.006329, Val Loss: 0.003753
Bra Black Solid | Epoch 30/50, Train Loss: 0.006160, Val Loss: 0.003685
Bra Black Solid | Epoch 40/50, Train Loss: 0.005870, Val Loss: 0.003947
Bra Black Solid | Epoch 50/50, Train Loss: 0.005693, Val Loss: 0.003752
Bra Black Solid => MAE: 146.5768, RMSE: 179.8569, MAPE: 27.92%, R2: 0.1812

=== Processing Dress Black Solid ===
🔄 Merging macroeconomic data for Dress Black Solid...
Dress Black Solid | Epoch 10/50, Train Loss: 0.017544, Val Loss: 0.012691
Dress Black Solid | Epoch 20/50, Train Loss: 0.013655, Val Loss: 0.009880
Dress Black Solid | Epoch 30/50, Train Loss: 0.012410, Val Loss: 0.009137
Dress Black Solid | Epoch 40/50, Train Loss: 0.011519, Val Loss: 0.008703
Dress Black Solid | Epoch 50/50, Train Loss: 0.010095, Val Loss: 0.008257
Dress Black Sol

In [None]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import optuna
from modules.utils import process_name

# Output directory
output_dir = "final_version/output/macroeconomic/gdp/tcn"
os.makedirs(output_dir, exist_ok=True)

def objective_fn(trial, train_loader, val_loader, num_features):
    """
    Optuna objective function for hyperparameter tuning.
    """
    dropout = trial.suggest_float("dropout", 0.1, 0.5)
    lr = trial.suggest_float("lr", 1e-5, 1e-3, log=True)
    kernel_size = trial.suggest_int("kernel_size", 2, 5)
    num_channels = [trial.suggest_int("num_channels_1", 32, 128), trial.suggest_int("num_channels_2", 32, 128)]
    
    model = TCNForecastingModel(
        num_inputs=num_features,
        num_channels=num_channels,
        output_size=1,
        kernel_size=kernel_size,
        dropout=dropout
    )
    
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    model.train()
    for epoch in range(20):  # Reduced epochs for tuning
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            preds = model(X_batch)
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()
    
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            preds = model(X_batch)
            loss = criterion(preds, y_batch)
            val_loss += loss.item()
    val_loss /= len(val_loader)
    
    return val_loss

all_groups_results = {}

# Run Optuna for all product groups
for product_group in top_10_groups:
    print(f"\n=== Processing {product_group} ===")
    sanitized_group = process_name(product_group)
    group_output_dir = os.path.join(output_dir, sanitized_group)
    os.makedirs(group_output_dir, exist_ok=True)
    
    # Run Optuna hyperparameter tuning
    study = optuna.create_study(direction="minimize")
    study.optimize(lambda trial: objective_fn(trial, train_loader, val_loader, X_train.shape[1] + 1), n_trials=30)
    
    # Save best hyperparameters
    best_params = study.best_trial.params
    pd.DataFrame([best_params]).to_csv(os.path.join(group_output_dir, "best_hyperparameters.csv"), index=False)
    
    print(f"✅ Best hyperparameters for {product_group}: {best_params}")
    
    # Train model with best hyperparameters
    model = TCNForecastingModel(
        num_inputs=X_train.shape[1] + 1,
        num_channels=[int(best_params['num_channels_1']), int(best_params['num_channels_2'])],
        output_size=1,
        kernel_size=int(best_params['kernel_size']),
        dropout=best_params['dropout']
    )

    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=best_params['lr'])
    
    model.train()
    for epoch in range(50):
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            preds = model(X_batch)
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()
    
    # Save trained model
    torch.save(model.state_dict(), os.path.join(group_output_dir, "best_model.pth"))
    
    # Evaluate the model
    model.eval()
    all_preds, all_targets = [], []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            preds = model(X_batch)
            all_preds.append(preds.cpu().numpy())
            all_targets.append(y_batch.cpu().numpy())

    all_preds = np.concatenate(all_preds, axis=0)
    all_targets = np.concatenate(all_targets, axis=0)

    # Compute metrics
    preds_unscaled = target_scaler.inverse_transform(all_preds)
    targets_unscaled = target_scaler.inverse_transform(all_targets)

    mae = mean_absolute_error(targets_unscaled, preds_unscaled)
    rmse = np.sqrt(mean_squared_error(targets_unscaled, preds_unscaled))
    mape = np.mean(np.abs((targets_unscaled - preds_unscaled) / np.maximum(targets_unscaled, 1))) * 100
    r2 = r2_score(targets_unscaled, preds_unscaled)

    # Save metrics
    metrics = {'MAE': mae, 'RMSE': rmse, 'MAPE': mape, 'R2': r2}
    pd.DataFrame([metrics]).to_csv(os.path.join(group_output_dir, "test_metrics.csv"), index=False)
    all_groups_results[product_group] = metrics

    # Save Predictions vs Actual Graph
    plt.figure(figsize=(12, 6))
    plt.plot(targets_unscaled, label="Actual")
    plt.plot(preds_unscaled, label="Predicted", linestyle='--')
    plt.xlabel("Time Step")
    plt.ylabel("Transaction Count")
    plt.title(f"TCN Predictions vs Actual for {product_group}")
    plt.legend()
    plt.savefig(os.path.join(group_output_dir, "predictions_vs_actual.png"))
    plt.close()

# Save summary and average metrics
summary_df = pd.DataFrame.from_dict(all_groups_results, orient='index')
summary_df.to_csv(os.path.join(output_dir, "tcn_trends_summary.csv"))

avg_metrics = {
    'MAE': np.mean([all_groups_results[pg]['MAE'] for pg in all_groups_results]),
    'RMSE': np.mean([all_groups_results[pg]['RMSE'] for pg in all_groups_results]),
    'MAPE': np.mean([all_groups_results[pg]['MAPE'] for pg in all_groups_results]),
    'R2': np.mean([all_groups_results[pg]['R2'] for pg in all_groups_results])
}

pd.DataFrame([avg_metrics]).to_csv(os.path.join(output_dir, "avg_metrics.csv"), index=False)

print("\n✅ All CPI TCN processing with Optuna tuning, best model training, evaluation, and summary completed. Results saved.")


In [148]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from modules.utils import process_name

# Output directory
output_dir = "final_version/output/macroeconomic/unemployment/tcn"
os.makedirs(output_dir, exist_ok=True)

all_groups_results = {}

for product_group in top_10_groups:
    print(f"\n=== Processing {product_group} ===")
    sanitized_group = process_name(product_group)
    group_output_dir = os.path.join(output_dir, sanitized_group)
    os.makedirs(group_output_dir, exist_ok=True)

    # 1. Filter data for this product group
    product_data = data[data['product_group'] == product_group].copy()
    if product_data.empty:
        print(f"No data for {product_group}, skipping.")
        continue

    # Ensure date is the index
    product_data['date'] = pd.to_datetime(product_data['date'], errors='coerce')
    product_data.set_index('date', inplace=True)
    product_data = product_data.asfreq('D').fillna(0)

    # 2. Merge Macroeconomic Data (Consumer Sentiment)
    print(f"🔄 Merging macroeconomic data for {product_group}...")
    product_data = product_data.join(unemployment[['UNRATE']], how='left')
    product_data.fillna(method='ffill', inplace=True)  # Forward fill missing values

    # 3. Drop unwanted columns
    drop_cols = ['product_group', 'product_type_name', 'colour_group_name', 'graphical_appearance_name']
    product_data.drop(columns=[c for c in drop_cols if c in product_data.columns], inplace=True, errors='ignore')

    if 'transaction_count' not in product_data.columns:
        print(f"No transaction_count in {product_group}, skipping.")
        continue

    # 4. Train/Validation Split (80/20)
    total_len = len(product_data)
    if total_len < 50:
        print(f"Not enough data for {product_group}, skipping.")
        continue
    split_idx = int(0.8 * total_len)
    train_df = product_data.iloc[:split_idx].copy()
    val_df = product_data.iloc[split_idx:].copy()

    # 5. Scale features & target
    feature_scaler = MinMaxScaler()
    target_scaler = MinMaxScaler()

    X_train = train_df.drop(columns=['transaction_count'])
    y_train = train_df[['transaction_count']]

    X_val = val_df.drop(columns=['transaction_count'])
    y_val = val_df[['transaction_count']]

    X_train_scaled = feature_scaler.fit_transform(X_train)
    y_train_scaled = target_scaler.fit_transform(y_train)

    X_val_scaled = feature_scaler.transform(X_val)
    y_val_scaled = target_scaler.transform(y_val)

    train_df_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns, index=X_train.index)
    train_df_scaled['transaction_count'] = y_train_scaled

    val_df_scaled = pd.DataFrame(X_val_scaled, columns=X_val.columns, index=X_val.index)
    val_df_scaled['transaction_count'] = y_val_scaled

    # 6. Create TimeSeriesDataset
    input_window, output_window = 14, 1
    train_dataset = TimeSeriesDataset(
        train_df_scaled,
        input_window=input_window,
        output_window=output_window,
        target_col_name='transaction_count'
    )
    val_dataset = TimeSeriesDataset(
        val_df_scaled,
        input_window=input_window,
        output_window=output_window,
        target_col_name='transaction_count'
    )

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # 7. Build TCN
    num_features = X_train.shape[1] + 1 
    model = TCNForecastingModel(
        num_inputs=num_features,
        num_channels=[64, 64],
        output_size=1,
        kernel_size=3,
        dropout=0.2
    )

    # 8. Train TCN
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    epochs = 50

    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            preds = model(X_batch)
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss /= len(train_loader)

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                preds = model(X_batch)
                loss = criterion(preds, y_batch)
                val_loss += loss.item()
        val_loss /= len(val_loader)

        if (epoch + 1) % 10 == 0:
            print(f"{product_group} | Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.6f}, Val Loss: {val_loss:.6f}")

    # 9. Evaluate on Validation
    model.eval()
    all_preds, all_targets = [], []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            preds = model(X_batch)
            all_preds.append(preds.cpu().numpy())
            all_targets.append(y_batch.cpu().numpy())

    all_preds = np.concatenate(all_preds, axis=0)
    all_targets = np.concatenate(all_targets, axis=0)

    # Invert scaling
    preds_unscaled = target_scaler.inverse_transform(all_preds)
    targets_unscaled = target_scaler.inverse_transform(all_targets)

    mae = mean_absolute_error(targets_unscaled, preds_unscaled)
    rmse = np.sqrt(mean_squared_error(targets_unscaled, preds_unscaled))
    mape = np.mean(np.abs((targets_unscaled - preds_unscaled) / np.maximum(targets_unscaled, 1))) * 100
    r2 = r2_score(targets_unscaled, preds_unscaled)

    all_groups_results[product_group] = {'MAE': mae, 'RMSE': rmse, 'MAPE': mape, 'R2': r2}

    print(f"{product_group} => MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.2f}%, R2: {r2:.4f}")

    # Save Predictions vs Actual Graph
    plt.figure(figsize=(12, 6))
    plt.plot(targets_unscaled, label="Actual")
    plt.plot(preds_unscaled, label="Predicted", linestyle='--')
    plt.xlabel("Time Step")
    plt.ylabel("Transaction Count")
    plt.title(f"TCN Predictions vs Actual with Unemployment Rate for {product_group}")
    plt.legend()
    plt.savefig(os.path.join(group_output_dir, "predictions_vs_actual.png"))
    plt.close()

# Save summary
summary_df = pd.DataFrame.from_dict(all_groups_results, orient='index')
summary_df.to_csv(os.path.join(output_dir, "metrics_summary.csv"))

# Compute and save average metrics
avg_metrics = {
    'MAE': np.mean([all_groups_results[pg]['MAE'] for pg in all_groups_results]),
    'RMSE': np.mean([all_groups_results[pg]['RMSE'] for pg in all_groups_results]),
    'MAPE': np.mean([all_groups_results[pg]['MAPE'] for pg in all_groups_results]),
    'R2': np.mean([all_groups_results[pg]['R2'] for pg in all_groups_results])
}

avg_metrics_df = pd.DataFrame([avg_metrics])
avg_metrics_df.to_csv(os.path.join(output_dir, "avg_metrics.csv"), index=False)


print("\n✅ All TCN processing completed. Results saved.")



=== Processing Bra Black Solid ===
🔄 Merging macroeconomic data for Bra Black Solid...
Bra Black Solid | Epoch 10/50, Train Loss: 0.008869, Val Loss: 0.004915
Bra Black Solid | Epoch 20/50, Train Loss: 0.007986, Val Loss: 0.005637
Bra Black Solid | Epoch 30/50, Train Loss: 0.007189, Val Loss: 0.005532
Bra Black Solid | Epoch 40/50, Train Loss: 0.006897, Val Loss: 0.005186
Bra Black Solid | Epoch 50/50, Train Loss: 0.006564, Val Loss: 0.005222
Bra Black Solid => MAE: 176.1876, RMSE: 206.0615, MAPE: 34.47%, R2: -0.0747

=== Processing Dress Black Solid ===
🔄 Merging macroeconomic data for Dress Black Solid...
Dress Black Solid | Epoch 10/50, Train Loss: 0.015863, Val Loss: 0.016913
Dress Black Solid | Epoch 20/50, Train Loss: 0.014175, Val Loss: 0.013848
Dress Black Solid | Epoch 30/50, Train Loss: 0.012146, Val Loss: 0.010948
Dress Black Solid | Epoch 40/50, Train Loss: 0.012206, Val Loss: 0.010110
Dress Black Solid | Epoch 50/50, Train Loss: 0.011590, Val Loss: 0.009620
Dress Black So