In [None]:
import torch
from vn1forecasting.data import DataPreprocessor, generate_time_series_samples, prepare_batch_data
from vn1forecasting.model import MultiTimeSeriesTransformer
from vn1forecasting.pipeline import train_model, validate_model_with_loss, run_inference_on_test
from vn1forecasting.results import save_predictions_in_custom_format, evaluate_forecasts
from vn1forecasting.utils import plot_predictions_vs_actual_with_price

# Set the device to MPS (Metal Performance Shaders) if available; otherwise, fallback to CPU
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")

In [2]:
preprocessor = DataPreprocessor()
preprocessed_df = preprocessor.preprocess_data()

In [None]:
preprocessed_df.tail(20)

In [None]:
preprocessed_df.loc[
    (preprocessed_df.Client==0)&
    (preprocessed_df.Warehouse==3)&
    (preprocessed_df.Product==897)
].set_index('Date').rolling_13w_sales.plot()

In [None]:
preprocessed_df.Price.hist(bins=100, range=(0.1, 0.8999))

In [None]:
preprocessed_df.Sales.hist(bins=100, range=(0.1001,1))

In [None]:
# Generate train and validation samples
n_samples = 20  # Number of samples to generate
train_samples, valid_samples = generate_time_series_samples(preprocessed_df, n_samples)
train_samples[0]['cursor_date'], train_samples[0]['sales'], train_samples[0]['price'], train_samples[0]['target']

In [None]:
batch_data = prepare_batch_data(train_samples, mode='train')
sales, price, decoder_input, wom, woy, moy, qoy, sales_padding_mask, price_padding_mask, price_validity_mask, target, client, warehouse, product, rolling_4w_sales, rolling_13w_sales = batch_data
price_padding_mask[0]

In [None]:
# Model Initialization
model = MultiTimeSeriesTransformer(
    input_dim=1,
    d_model=64,
    nhead=4,
    num_encoder_layers=2,
    num_decoder_layers=2,
    dim_feedforward=256,
    num_wom=5,
    num_woy=53,
    num_moy=12,
    num_qoy=4,
    date_embedding_dim=3,
    num_clients=len(preprocessor.client_encoder.classes_),
    num_warehouses=len(preprocessor.warehouse_encoder.classes_),
    num_products=len(preprocessor.product_encoder.classes_),
    category_embedding_dim=16,
    dropout=0.1
)
model = model.to(device)

In [None]:
trained_model, valid_samples, val_predictions, val_targets = train_model(
    model=model,
    preprocessed_df=preprocessed_df,
    device=device,
    generate_time_series_samples=generate_time_series_samples,
    prepare_batch_data=prepare_batch_data,
    validate_model_with_loss=validate_model_with_loss,
    phases_config=[
        ('init', 1, 24, 50000, 1e-3),
        ('core', 51, 512, 200000, 1e-3),
        ('core', 51, 512, 1, 1e-4),
        ('core', 51, 512, 1, 1e-5),
        ('tune', 51, 512, 200000, 1e-5),
        ('finish', 51, 512, 200000, 1e-5)
    ],

)

In [None]:
# Select a sample from validation data
sample_index = 7
sample = valid_samples[sample_index]  # Replace 0 with the desired index

# Plot predictions vs actuals
plot_predictions_vs_actual_with_price(
    sample=sample,
    scalers=preprocessor.normalization_params,  # Access the normalization scalers
    preprocessor=preprocessor,  # Pass the preprocessor for inverse_transform
    val_predictions=val_predictions[sample_index],  # Optional, if not already in sample
    val_targets=val_targets[sample_index]  # Optional, if not already in sample
)


In [12]:
# Generate test samples
test_samples = generate_time_series_samples(
    preprocessed_df,
    mode='test'
)


In [None]:
# Run inference
test_predictions = run_inference_on_test(
    model=model,
    test_samples=test_samples,
    batch_size=512,
    prepare_batch_data=prepare_batch_data,
    preprocessor=preprocessor,
    device=device
)

In [None]:
output_path = "./test_predictions_custom.csv"

# Save predictions in custom format
formatted_df = save_predictions_in_custom_format(
    test_predictions=test_predictions, 
    test_samples=test_samples, 
    output_path=output_path
)

# Preview the formatted DataFrame
print(formatted_df.head())


In [None]:
forecast_paths = [
    ("../data/solution_1st_place.csv", "1st"),
    ("../data/solution_2nd_place.csv", "2nd"),
    ("../data/solution_3rd_place.csv", "3rd"),
    ("../data/solution_4th_place.csv", "4th"),
    ("../data/solution_5th_place.csv", "5th"),
]

# Evaluate forecasts
actual_path = "../data/phase_2_sales.csv"
score_df = evaluate_forecasts(actual_path, formatted_df, forecast_paths)
print("Model Scores:")
print(score_df)
