In [1]:
import torch
import torch.nn as nn
import numpy as np
import copy

from vn1forecasting.dualt import (
    DataPreprocessor, 
    generate_time_series_samples, prepare_batch_data, plot_predictions_vs_actual_with_price,
    validate_model_with_loss, MultiTimeSeriesTransformer, run_inference_on_test, save_predictions_in_custom_format
)

In [None]:
# Set the device to MPS (Metal Performance Shaders) if available; otherwise, fallback to CPU
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")

In [3]:
preprocessor = DataPreprocessor()
preprocessed_df = preprocessor.preprocess_data()

In [None]:
preprocessed_df.head(196).tail(20)

In [None]:
preprocessed_df.loc[
    (preprocessed_df.Client==0)&
    (preprocessed_df.Warehouse==3)&
    (preprocessed_df.Product==897)
].set_index('Date').rolling_13w_sales.plot()

In [None]:
preprocessed_df.Price.hist(bins=100)

In [None]:
preprocessed_df.Sales.hist(bins=100, range=(0,1))

In [None]:
preprocessed_df.Sales.min(), preprocessed_df.Sales.max()

In [None]:
# Generate train and validation samples
n_samples = 20  # Number of samples to generate
train_samples, valid_samples = generate_time_series_samples(preprocessed_df, n_samples)
train_samples[0]['sales'], train_samples[0]['price'], train_samples[0]['target']

In [None]:
batch_data = prepare_batch_data(train_samples, mode='train')
sales, price, decoder_input, wom, woy, moy, qoy, sales_padding_mask, price_padding_mask, price_validity_mask, target, client, warehouse, product, rolling_4w_sales, rolling_13w_sales = batch_data
price_padding_mask[0]

In [None]:
# Model Initialization
model = MultiTimeSeriesTransformer(
    input_dim=1,
    d_model=64,
    nhead=4,
    num_encoder_layers=2,
    num_decoder_layers=2,
    dim_feedforward=256,
    num_wom=5,
    num_woy=53,
    num_moy=12,
    num_qoy=4,
    date_embedding_dim=3,
    num_clients=len(preprocessor.client_encoder.classes_),
    num_warehouses=len(preprocessor.warehouse_encoder.classes_),
    num_products=len(preprocessor.product_encoder.classes_),
    category_embedding_dim=16,
    dropout=0.1
)

In [12]:
model = model.to(device)

In [13]:
def freeze_module_by_name(model, module_name_substring):
    """
    Freeze all parameters whose name contains 'module_name_substring'.
    i.e., param.requires_grad = False
    """
    for name, param in model.named_parameters():
        if module_name_substring in name:
            param.requires_grad = False

def unfreeze_module_by_name(model, module_name_substring):
    """
    Unfreeze all parameters whose name contains 'module_name_substring'.
    i.e., param.requires_grad = True
    """
    for name, param in model.named_parameters():
        if module_name_substring in name:
            param.requires_grad = True

In [None]:
for phase, n_epochs, batch_size, n_samples, lr in [
    ('init', 1, 24, 50000, 1e-3),
    ('core', 51, 512, 200000, 1e-3),
    ('core', 51, 512, 1, 1e-4),
    ('core', 51, 512, 1, 1e-5),
    ('tune', 51, 512, 200000, 1e-5),
    ('finish', 51, 512, 200000, 1e-5)
]:
    loss_fn = nn.MSELoss(reduction='none')
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    # ---------------------------------------------------------------------------
    # 1) Phase-Specific Freezing
    # ---------------------------------------------------------------------------
    if phase == 'tune':
        # Freeze all embeddings: 
        freeze_module_by_name(model, 'embedding')
        freeze_module_by_name(model, 'sales_encoder')
        print("Froze all embedding layers.")
        print("Froze the sales encoder.")

    if phase == 'finish':
        # Freeze the price encoder:
        freeze_module_by_name(model, 'price_encoder')
        unfreeze_module_by_name(model, 'sales_encoder')
        print("Froze the price encoder.")
        print("Unfroze the sales encoder.")

    # ---------------------------------------------------------------------------
    # 2) Sample Generation
    # ---------------------------------------------------------------------------
    if phase in ['init', 'tune', 'finish']:
        train_samples, valid_samples = [], []
    _train_samples, _valid_samples = generate_time_series_samples(
        preprocessed_df, n_samples, train_valid_split=0.8, phase=phase
    )
    train_samples += _train_samples
    valid_samples += _valid_samples
    print(f"Training samples: {len(train_samples)}")

    # Initialize early stopping variables
    best_val_loss = float('inf')
    best_model_state = None
    patience = 10
    patience_counter = 0
    
    # ---------------------------------------------------------------------------
    # 3) Train/Eval Loop for this Phase
    # ---------------------------------------------------------------------------
    for epoch in range(n_epochs):
        model.train()
        total_train_loss = 0

        # Shuffle train samples
        np.random.shuffle(train_samples)

        # Process training batches
        for i in range(0, len(train_samples), batch_size):
            # Prepare batch data
            batch_samples = train_samples[i:i + batch_size]
            batch_data = prepare_batch_data(batch_samples, mode='train', device=device)
            (sales, price, decoder_input, wom, woy, moy, qoy,
             sales_padding_mask, price_padding_mask, price_validity_mask,
             client, warehouse, product, target, rolling_4w_sales,
             rolling_13w_sales) = batch_data

            # Forward pass
            predictions = model(
                sales, price, decoder_input, wom, woy, moy, qoy,
                sales_padding_mask, price_padding_mask, price_validity_mask,
                client, warehouse, product, rolling_4w_sales, rolling_13w_sales
            ).squeeze(-1)

            # Compute loss
            loss = model.masked_loss(predictions, target)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item()

        # Validation
        val_loss, val_predictions, val_targets = validate_model_with_loss(
            model, valid_samples, batch_size=batch_size, device=device
        )
        print(f"Epoch {epoch}, Train Loss: {total_train_loss / (len(train_samples) // batch_size):.4f}, "
              f"Validation Loss: {val_loss:.4f}")

        # Additional metrics (MSE, MAE)
        valid_mask = ~np.isnan(val_targets)
        val_mse = np.mean((val_predictions[valid_mask] - val_targets[valid_mask]) ** 2)
        val_mae = np.mean(np.abs(val_predictions[valid_mask] - val_targets[valid_mask]))
        print(f"Validation MSE: {val_mse:.4f}")
        print(f"Validation MAE: {val_mae:.4f}")

        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = copy.deepcopy(model.state_dict())
            patience_counter = 0
        else:
            patience_counter += 1

        if patience_counter >= patience:
            print(f"Early stopping triggered after {epoch} epochs - no val loss improvement for {patience} epochs")
            model.load_state_dict(best_model_state)
            break

        current_lr = optimizer.param_groups[0]['lr']
        print(f"Current learning rate: {current_lr:.2e}")

    # Ensure best model for next phase
    model.load_state_dict(best_model_state)


In [15]:
#preprocessed_df.loc[preprocessed_df.Product==1293]

In [None]:
# Select a sample from validation data
sample_index = 7
sample = valid_samples[sample_index]  # Replace 0 with the desired index

# Plot predictions vs actuals
plot_predictions_vs_actual_with_price(
    sample=sample,
    scalers=preprocessor.normalization_params,  # Access the normalization scalers
    preprocessor=preprocessor,  # Pass the preprocessor for inverse_transform
    val_predictions=val_predictions[sample_index],  # Optional, if not already in sample
    val_targets=val_targets[sample_index]  # Optional, if not already in sample
)


In [None]:
sample

In [18]:
# Generate test samples
test_samples = generate_time_series_samples(
    preprocessed_df,
    mode='test'
)


In [None]:
len(test_samples)

In [None]:
test_samples[0]

In [21]:
# Run inference
test_predictions = run_inference_on_test(
    model=model,
    test_samples=test_samples,
    batch_size=128,
    preprocessor=preprocessor,
    device=device
)

In [None]:
test_predictions.head()

In [None]:
output_path = "./test_predictions_custom.csv"

# Save predictions in custom format
formatted_df = save_predictions_in_custom_format(
    test_predictions=test_predictions, 
    test_samples=test_samples, 
    output_path=output_path
)

# Preview the formatted DataFrame
print(formatted_df.head())


In [None]:
formatted_df.senoni.min(), formatted_df.senoni.max()

In [None]:
import pandas as pd
def read_and_prepare_data(file_path: str, value_name: str = "y") -> pd.DataFrame:
    """Reads data in wide format and converts it to long format with `unique_id`, `ds`, and `y` columns."""
    df = pd.read_csv(file_path)
    df["unique_id"] = df[["Client", "Warehouse", "Product"]].astype(str).agg("-".join, axis=1)
    df = df.drop(["Client", "Warehouse", "Product"], axis=1)
    df = df.melt(id_vars=["unique_id"], var_name="ds", value_name=value_name)
    df["ds"] = pd.to_datetime(df["ds"])
    return df.sort_values(by=["unique_id", "ds"])

def get_competition_forecasts() -> pd.DataFrame:
    """Reads competition forecasts and merges them into a single DataFrame."""
    places = ["1st", "2nd", "3rd", "4th", "5th"]
    fcst_dfs = [read_and_prepare_data(f"../data/solution_{place}_place.csv", place) for place in places]
    return pd.concat(fcst_dfs, axis=1).loc[:, ~pd.concat(fcst_dfs, axis=1).columns.duplicated()]

# Prepare data
fcst_df_comp = get_competition_forecasts()
res = formatted_df.iloc[:, :3].merge(fcst_df_comp, on=["unique_id", "ds"], how="left")
actual = read_and_prepare_data("../data/phase_2_sales.csv")
result = actual[["unique_id", "ds", "y"]].merge(res, on=["unique_id", "ds"], how="left")

# Verify unique_id consistency
assert set(res["unique_id"].unique()) == set(result["unique_id"].unique()), "Some unique_ids are missing"

# Compute scores
scores = {
    model: round(
        (
            np.nansum(np.abs(result[model] - result["y"])) + 
            np.abs(np.nansum(result[model] - result["y"]))
        ) / result["y"].sum(),
        4
    )
    for model in res.columns if model not in ["unique_id", "ds"]
}

# Create and sort score DataFrame
score_df = pd.DataFrame(scores.items(), columns=["model", "score"]).sort_values(by="score").reset_index(drop=True)
score_df