In [None]:
import os
import pandas as pd
from darts import TimeSeries
from darts.models import NBEATSModel, NHiTSModel  # and others conditionally
from darts.metrics import mape

# model import logic as before…

forecast_horizon = 7
input_chunk = 30

models = {
    "NBEATS": NBEATSModel(input_chunk_length=input_chunk, output_chunk_length=forecast_horizon),
    "NHiTS": NHiTSModel(input_chunk_length=input_chunk, output_chunk_length=forecast_horizon),
    # add TFT, PatchTST if available…
    "TFT": TFTModel(input_chunk_length=input_chunk, output_chunk_length=forecast_horizon)
}

results = {name: [] for name in models.keys()}


In [None]:
import os
import random
import pandas as pd
from darts import TimeSeries
from darts.metrics import mape
from darts.models import NBEATSModel, NHiTSModel  # plus any other models you successfully imported before

# --- Define your forecasting settings ---
forecast_horizon = 7
input_chunk = 30

# Define your models dictionary
models = {
    "NBEATS": NBEATSModel(input_chunk_length=input_chunk, output_chunk_length=forecast_horizon),
    "NHiTS": NHiTSModel(input_chunk_length=input_chunk, output_chunk_length=forecast_horizon),
    # Add TFT, PatchTST if you managed to import them
}

# Prepare results storage
results = {name: [] for name in models.keys()}

# Path to your CSV folder
input_folder = "products_csv"
all_csvs = [f for f in os.listdir(input_folder) if f.lower().endswith(".csv")]

# Select 10 random files (or all if fewer)
n_samples = 10
if len(all_csvs) <= n_samples:
    sampled = all_csvs
else:
    sampled = random.sample(all_csvs, n_samples)

print("Selected files:", sampled)

# Loop through sampled CSVs
for fname in sampled:
    path = os.path.join(input_folder, fname)
    df = pd.read_csv(path)

    # Drop rows missing crucial data
    df = df.dropna(subset=["Order Date", "Qty Ordered"])

    df["Order Date"] = pd.to_datetime(df["Order Date"])
    df = df.sort_values("Order Date")

    df = df.set_index("Order Date")

    # Drop duplicates
    df = df[~df.index.duplicated(keep="first")]

    # Make full daily index
    full_idx = pd.date_range(start=df.index.min(), end=df.index.max(), freq="D")
    df = df.reindex(full_idx)
    df["Qty Ordered"] = df["Qty Ordered"].fillna(0)

    df = df.reset_index().rename(columns={"index": "Order Date"})

    # Convert to TimeSeries
    try:
        series = TimeSeries.from_dataframe(
            df,
            time_col="Order Date",
            value_cols="Qty Ordered",
            fill_missing_dates=True,
            freq="D"
        )
    except Exception as e:
        print(f"Could not convert {fname} to TimeSeries: {e}")
        continue

    # Now **forecast_horizon** is defined earlier
    if len(series) <= forecast_horizon * 2:
        print(f"Skipping {fname} because series too short (len={len(series)})")
        continue

    train, val = series[:-forecast_horizon], series[-forecast_horizon:]

    for name, model in models.items():
        try:
            model.fit(train)
            pred = model.predict(len(val))
            acc = 100 - mape(val, pred)
            results[name].append(acc)
        except Exception as e:
            print(f"Error with {name} on {fname}: {e}")

# Compute mean accuracies
mean_acc = {
    n: (sum(lst) / len(lst) if len(lst) > 0 else None)
    for n, lst in results.items()
}

print("Mean Accuracies over sampled files:")
for n, acc in mean_acc.items():
    print(f"  {n}: {acc}")


In [None]:
from darts.metrics import mae, mase  # or define WAPE manually

for name, model in models.items():
    try:
        model.fit(train)
        pred = model.predict(len(val))
        # Option A: MAE
        error = mae(val, pred)
        # Option B: MASE
        # error = mase(val, pred, train)  # MASE often needs in-sample data
        results[name].append(error)
    except Exception as e:
        print(f"Error with {name} on {fname}: {e}")

# At the end, lower error is better (for MAE/MASE)
mean_error = {n: (sum(lst)/len(lst) if lst else None) for n, lst in results.items()}
print("Mean Errors over sampled files:")
for n, err in mean_error.items():
    print(f"  {n}: {err}")


  # NHiTS: 4.776637142298035e-05 is best model. 