# Seperating stock to forcast for certain dates depending on the product

In [1]:
%pip install nbformat pandas numpy matplotlib seaborn scikit-learn darts 


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
import sys
sys.path.append('../../src')

In [None]:
from data_config import ProductHandler as ph

In [None]:
ph.custom_prod_set()

In [None]:
# define the features and target variable from 'product_sales'
# CHANGE THE CUSTOMER CODE HERE >>>>>>>>>>
custom_code_df = pd.DataFrame(ph.get_custom_code_data('ALB'))

# customer code for later referencing
custom_ref = custom_code_df.loc[0, 'ProductNumber'][:3].lower().upper()
print(custom_ref)

custom_code_df.head()

In [None]:
from app_utils import Transform as trans
from model_utils import *
from sklearn.pipeline import Pipeline

# Main imports for N-BEAT model
from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler
from darts.utils.timeseries_generation import datetime_attribute_timeseries

In [None]:
# Compute OrderQuantity values with a zscore with threshold of 3 and remove them (Outlier removal)
custom_code_df = trans.compute_zscore(custom_code_df)

# Ensure datetime col is formatted properly since model is a time series
custom_code_df["OrderDate"] = pd.to_datetime(custom_code_df["OrderDate"])
custom_code_df.head()

In [None]:
X = custom_code_df.drop('OrderQuantity', axis=1)
y = custom_code_df.OrderQuantity

In [None]:
# Group by ProductNumber and create TimeSeries objects
series_list = []
covariates_list = []

for product_id, group in custom_code_df.groupby("ProductNumber"):
    # Target series (OrderQuantity over time)
    target_series = TimeSeries.from_dataframe(
        group, 
        time_col="OrderDate", 
        value_cols=y,  # 'OrderQuantity'
        freq="W"  # Weekly frequency (Can change to, 'D', 'MS', etc.)
    )
    
    # Covariates (features like moving averages, prev_month_sales, etc.)
    covariates = TimeSeries.from_dataframe(
        group,
        time_col="OrderDate",
        value_cols=[col for col in X if col not in ['ProductNumber', 'OrderQuantity']]
    )
    
    series_list.append(target_series)
    covariates_list.append(covariates)

# Scale the data (critical for N-BEATS)
target_scaler = Scaler()
scaled_series = target_scaler.fit_transform(series_list)

covariate_scaler = Scaler()
scaled_covariates = covariate_scaler.fit_transform(covariates_list)

In [None]:
# Split each product's series into train/val
train_series, val_series = [], []
train_covariates, val_covariates = [], []

for series, covariate in zip(scaled_series, scaled_covariates):
    train, val = series.split_before(0.8)  # 80% training, 20% validation
    train_cov, val_cov = covariate.split_before(0.8)
    
    train_series.append(train)
    val_series.append(val)
    train_covariates.append(train_cov)
    val_covariates.append(val_cov)

In [None]:
# Once found best hyperparams, comment out and use the results to reduce compute time and resources
nbeat_params = find_best_hyperparameters(NBEATSModel(), param_grids(NBEATSModel().__class__.__name__), X_train_preprocessed, y_train)

In [None]:
nbeat_tuned = NBEATSModel(**nbeat_params)

In [None]:
nbeat_tuned.fit(
    series=train_series,
    past_covariates=train_covariates, # Features known in the past (e.g., prev_month_sales)
    val_series=val_series,
    val_past_covariates=val_covariates,
    verbose=True
)

In [None]:
# Forecast for the first product
product_idx = 0
forecast = nbeat_tuned.predict(
    n=len(val_series[product_idx]),  # Match validation horizon
    series=train_series[product_idx],
    past_covariates=train_covariates[product_idx]
)

# Inverse scaling
forecast_unscaled = target_scaler.inverse_transform(forecast)
actual_unscaled = target_scaler.inverse_transform(val_series[product_idx])

# Plot
train_series[product_idx].plot(label="Train")
actual_unscaled.plot(label="Actual")
forecast_unscaled.plot(label="Forecast")
plt.legend()
plt.show()

In [None]:
# Define forecast horizon (e.g., predict next 6 months)
HORIZON = 6  # Should match `output_chunk_length` (defined in model params)

print('----------- TRAINING METRICS -----------')
train_metrics = evaluate_timeseries(
    model=nbeat_tuned,
    val_series=train_series, # Training data (TimeSeries objects)
    val_covariates=train_covariates, # Training features (TimeSeries)
    horizon=HORIZON,
    target_scaler=target_scaler
)

print('\n\n----------- TESTING METRICS -----------')
test_metrics = evaluate_timeseries(
    model=nbeat_tuned,
    val_series=val_series, # Validation data (TimeSeries)
    val_covariates=val_covariates, # Validation features (TimeSeries)
    horizon=HORIZON,
    target_scaler=target_scaler
)