In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")


from statsforecast.models import (AutoETS, HoltWinters, IMAPA, AutoTheta, CrostonOptimized, CrostonClassic)
from statsforecast.core import StatsForecast
from utilsforecast.evaluation import evaluate
from typing import List

import plotly.express as px
import os

# importing dataset

In [2]:
# Read sales data (we ignore the price data in this example)
sales = pd.read_csv("Datasets/Phase 0 - Sales.csv", na_values=np.nan)
sales = sales.melt(id_vars=['Client', 'Warehouse', 'Product'], var_name='ds', value_name='y')

df = sales.copy()
df["unique_id"] = df["Client"].astype(str) + "/" + df["Warehouse"].astype(str) + "/" + df["Product"].astype(str)
df = df.drop(columns =  ["Client", "Warehouse", "Product"])

df.columns = ["ds", "y", "unique_id"]
df["ds"] = pd.to_datetime(df["ds"])
df

Unnamed: 0,ds,y,unique_id
0,2020-07-06,7.0,0/1/367
1,2020-07-06,0.0,0/1/639
2,2020-07-06,21.0,0/1/655
3,2020-07-06,7.0,0/1/1149
4,2020-07-06,0.0,0/1/1485
...,...,...,...
2559005,2023-10-02,80.0,46/318/13485
2559006,2023-10-02,39.0,46/318/13582
2559007,2023-10-02,1.0,46/318/13691
2559008,2023-10-02,3.0,46/318/13946


# PLOTS to understand data

In [12]:
df.groupby(['unique_id']).ngroups

15053

In [15]:
# Create a folder named 'plots' if it doesn't exist
if not os.path.exists('plots'):
    os.makedirs('plots')

count = 0
# Loop through each unique_id and create individual plots
for unique_id in df['unique_id'].unique():
    count += 1
    # Filter the data for the current unique_id
    filtered_df = df[df['unique_id'] == unique_id]
    
    # Create the line plot
    fig = px.line(filtered_df, x='ds', y='y', title=f'Time Series for Unique ID: {unique_id}')
    
    # Save the plot as an HTML file in the 'plots' folder
    file_name = f'plots/{unique_id.replace("/", "_")}_time_series_plot.html'
    fig.write_html(file_name)
    
    print(f'Saved plot for unique_id {unique_id} at {file_name}')
    
    if count > 100:
        break


Saved plot for unique_id 0/1/367 at plots/0_1_367_time_series_plot.html
Saved plot for unique_id 0/1/639 at plots/0_1_639_time_series_plot.html
Saved plot for unique_id 0/1/655 at plots/0_1_655_time_series_plot.html
Saved plot for unique_id 0/1/1149 at plots/0_1_1149_time_series_plot.html
Saved plot for unique_id 0/1/1485 at plots/0_1_1485_time_series_plot.html
Saved plot for unique_id 0/1/1965 at plots/0_1_1965_time_series_plot.html
Saved plot for unique_id 0/1/1969 at plots/0_1_1969_time_series_plot.html
Saved plot for unique_id 0/1/3179 at plots/0_1_3179_time_series_plot.html
Saved plot for unique_id 0/1/3234 at plots/0_1_3234_time_series_plot.html
Saved plot for unique_id 0/1/3463 at plots/0_1_3463_time_series_plot.html
Saved plot for unique_id 0/1/3515 at plots/0_1_3515_time_series_plot.html
Saved plot for unique_id 0/1/4337 at plots/0_1_4337_time_series_plot.html
Saved plot for unique_id 0/1/4431 at plots/0_1_4431_time_series_plot.html
Saved plot for unique_id 0/1/5317 at plots/0

# Modelling

In [3]:
# temp_df = []
# count = 0
# for unique_id in df['unique_id'].unique():
#     count += 1
#     df_copy = df[df['unique_id'] == unique_id]
#     df_copy['y'] = df_copy['y'].ewm(span=4).mean()  # 4 weeks in a month --> alpha = 2 / (span + 1)
    
#     print(count)
#     temp_df.append(df_copy)

# temp_df = pd.concat(temp_df)

# Define a function to apply exponential smoothing to each group
def smooth_group(group):  
    group = group.copy()  # to avoid modifying the original dataframe
    group['y'] = group['y'].ewm(span=4).mean()
    return group

# Assuming 'df' is your input dataframe
# Apply smoothing to each unique 'unique_id'
smoothed_df = df.groupby('unique_id', group_keys=False).apply(smooth_group)

# Now 'smoothed_df' contains the original data along with a new 'smoothed_y' column
print(smoothed_df)



                ds          y     unique_id
0       2020-07-06   7.000000       0/1/367
1       2020-07-06   0.000000       0/1/639
2       2020-07-06  21.000000       0/1/655
3       2020-07-06   7.000000      0/1/1149
4       2020-07-06   0.000000      0/1/1485
...            ...        ...           ...
2559005 2023-10-02  63.615008  46/318/13485
2559006 2023-10-02  33.075006  46/318/13582
2559007 2023-10-02   1.850885  46/318/13691
2559008 2023-10-02   4.719896  46/318/13946
2559009 2023-10-02   1.273752  46/318/14294

[2559010 rows x 3 columns]


In [4]:
# Model inputs
horizon = 13
freq = "W-MON"

# Set model parameters
season_length = 52  # Yearly seasonality

In [6]:
sf = StatsForecast(                    
                     models=[
                            CrostonClassic(),
                        #     HoltWinters(error_type = 'M', season_length = season_length, alias="Mul_HW"),
                            CrostonOptimized(),
                            AutoTheta(season_length=season_length, decomposition_type="additive", model="OTM"),
                            AutoETS(model='ZZZ'),
                            HoltWinters(error_type = 'A', season_length = season_length, alias="Add_HW"),
                            IMAPA(),
                             ], 
                     freq=freq, 
                     n_jobs=-1,
                     verbose=True)

# Cross-validation

In [7]:
cross_val_results = sf.cross_validation(
    df = smoothed_df,
    n_windows=1,  # Only one window for validation (the last 7 weeks)
    h=7,  # Forecast horizon: predicting 7 weeks into the future
    step_size=1,  # Move window forward by 1 week 
    
    # metric=mse  # Use Mean Squared Error as the evaluation metric
)

# Enforce non-negativity
cross_val_results[str(sf.models[0])] = cross_val_results[str(sf.models[0])].clip(0)

In [8]:
cross_val_results.reset_index()

Unnamed: 0,unique_id,ds,cutoff,y,CrostonClassic,CrostonOptimized,AutoTheta,AutoETS,Add_HW,IMAPA
0,0/1/10705,2023-08-21,2023-08-14,2.042447e+00,2.746450,3.089380,2.888985,2.904547,4.572447,3.089380
1,0/1/10705,2023-08-28,2023-08-14,2.425468e+00,2.746450,3.089380,2.873732,2.904547,4.346179,3.089380
2,0/1/10705,2023-09-04,2023-08-14,3.055281e+00,2.746450,3.089380,2.858479,2.904547,3.408430,3.089380
3,0/1/10705,2023-09-11,2023-08-14,1.833169e+00,2.746450,3.089380,2.843225,2.904547,3.232447,3.089380
4,0/1/10705,2023-09-18,2023-08-14,1.899901e+00,2.746450,3.089380,2.827972,2.904547,2.359288,3.089380
...,...,...,...,...,...,...,...,...,...,...
105366,9/82/9950,2023-09-04,2023-08-14,4.093961e-07,0.003189,0.000206,0.000242,0.000003,0.011756,0.009272
105367,9/82/9950,2023-09-11,2023-08-14,2.456377e-07,0.003189,0.000206,0.000311,0.000003,0.010473,0.009272
105368,9/82/9950,2023-09-18,2023-08-14,1.473826e-07,0.003189,0.000206,0.000379,0.000003,0.009912,0.009272
105369,9/82/9950,2023-09-25,2023-08-14,8.842957e-08,0.003189,0.000206,0.000447,0.000003,0.009589,0.009272


In [9]:
# merge to get actual demand y from df
cross_val_results.rename(columns={'y' : 'y_smooth'}, inplace=True)
cross_val_results.reset_index()
cross_val_results = pd.merge(cross_val_results, df, how='left', on=['unique_id', 'ds'])
cross_val_results = cross_val_results[['unique_id', 'ds', 'cutoff', 'y', 'CrostonClassic',
       'CrostonOptimized', 'AutoTheta', 'AutoETS', 'Add_HW', 'IMAPA']]
cross_val_results

Unnamed: 0,unique_id,ds,cutoff,y,CrostonClassic,CrostonOptimized,AutoTheta,AutoETS,Add_HW,IMAPA
0,0/1/10705,2023-08-21,2023-08-14,1.0,2.746450,3.089380,2.888985,2.904547,4.572447,3.089380
1,0/1/10705,2023-08-28,2023-08-14,3.0,2.746450,3.089380,2.873732,2.904547,4.346179,3.089380
2,0/1/10705,2023-09-04,2023-08-14,4.0,2.746450,3.089380,2.858479,2.904547,3.408430,3.089380
3,0/1/10705,2023-09-11,2023-08-14,0.0,2.746450,3.089380,2.843225,2.904547,3.232447,3.089380
4,0/1/10705,2023-09-18,2023-08-14,2.0,2.746450,3.089380,2.827972,2.904547,2.359288,3.089380
...,...,...,...,...,...,...,...,...,...,...
105366,9/82/9950,2023-09-04,2023-08-14,0.0,0.003189,0.000206,0.000242,0.000003,0.011756,0.009272
105367,9/82/9950,2023-09-11,2023-08-14,0.0,0.003189,0.000206,0.000311,0.000003,0.010473,0.009272
105368,9/82/9950,2023-09-18,2023-08-14,0.0,0.003189,0.000206,0.000379,0.000003,0.009912,0.009272
105369,9/82/9950,2023-09-25,2023-08-14,0.0,0.003189,0.000206,0.000447,0.000003,0.009589,0.009272


In [10]:
cross_val_results.to_csv("FCA.csv")

In [11]:
def score(df, pred_cols=[], modify = False):
    df_copy = df.copy()
    # Calculate scores for all the model columns
    model_scores = {}
    
    for model in pred_cols:
        submission = df_copy[model].values
        objective = df_copy['y'].values

        # Step 1: Calculate absolute error
        abs_err = np.nansum(np.abs(submission - objective))

        # Step 2: Calculate the error (difference between submission and objective)
        err = np.nansum(submission - objective)

        # Step 3: Compute the score
        score = abs_err + np.abs(err)
        score /= np.nansum(objective)

        print(model, score)
        score
        
        if modify:
            df_copy[model + '_score'] = score 
        
        # Store the score in the model_scores dictionary
        model_scores[model] = score
    
    # Find the model with the minimum score
    best_model = min(model_scores, key=model_scores.get)
    best_score = model_scores[best_model]
    
    if modify:
        # Add best_model and best_model_score as new columns
        df_copy['best_model'] = best_model
        df_copy['best_model_score'] = best_score
    
    # print(df_copy)
    return df_copy
        
        
        
score(df=cross_val_results, pred_cols=['CrostonClassic', 'CrostonOptimized', 'AutoTheta', 'AutoETS', 'Add_HW', 'IMAPA'])

CrostonClassic 0.7225936734367281
CrostonOptimized 0.5089157175572676
AutoTheta 0.4368048875140784
AutoETS 0.43319292040516544
Add_HW 0.7356150256374612
IMAPA 0.5213618624892865


Unnamed: 0,unique_id,ds,cutoff,y,CrostonClassic,CrostonOptimized,AutoTheta,AutoETS,Add_HW,IMAPA
0,0/1/10705,2023-08-21,2023-08-14,1.0,2.746450,3.089380,2.888985,2.904547,4.572447,3.089380
1,0/1/10705,2023-08-28,2023-08-14,3.0,2.746450,3.089380,2.873732,2.904547,4.346179,3.089380
2,0/1/10705,2023-09-04,2023-08-14,4.0,2.746450,3.089380,2.858479,2.904547,3.408430,3.089380
3,0/1/10705,2023-09-11,2023-08-14,0.0,2.746450,3.089380,2.843225,2.904547,3.232447,3.089380
4,0/1/10705,2023-09-18,2023-08-14,2.0,2.746450,3.089380,2.827972,2.904547,2.359288,3.089380
...,...,...,...,...,...,...,...,...,...,...
105366,9/82/9950,2023-09-04,2023-08-14,0.0,0.003189,0.000206,0.000242,0.000003,0.011756,0.009272
105367,9/82/9950,2023-09-11,2023-08-14,0.0,0.003189,0.000206,0.000311,0.000003,0.010473,0.009272
105368,9/82/9950,2023-09-18,2023-08-14,0.0,0.003189,0.000206,0.000379,0.000003,0.009912,0.009272
105369,9/82/9950,2023-09-25,2023-08-14,0.0,0.003189,0.000206,0.000447,0.000003,0.009589,0.009272


In [12]:
cross_val_results.reset_index(inplace=True)

In [13]:
def calculate_model_scores(df, pred_cols, modify=False):
    """
    Calculate scores for different models (pred_cols), and determine the best model for each unique_id.
    
    Parameters:
    df: DataFrame - Cross-validation results.
    pred_cols: List - Columns corresponding to model predictions (e.g., ['AutoETS', 'Add', 'IMAPA']).
    modify: Boolean - Whether to modify the original DataFrame or return a new one.
    
    Returns:
    DataFrame - Modified DataFrame with 'best_model' and 'best_model_score' if modify=True, else returns a new DataFrame.
    """
    
    # Initialize a list to store modified DataFrames for each unique_id
    modified_dfs = []
    
    # Loop through each unique_id in the dataframe
    for unique_id in df['unique_id'].unique():
        # Subset the dataframe by unique_id
        print()
        print(unique_id)
        unique_df = df[df['unique_id'] == unique_id].copy()
        
        # Calculate scores for all the model columns
        model_scores = {}
        for model in pred_cols:
            submission = unique_df[model].values
            objective = unique_df['y'].values
            
            abs_err = np.nansum(np.abs(submission - objective))
            err = np.nansum(submission - objective)
            score = (abs_err + np.abs(err)) / np.nansum(objective)
            
            # Store the score in the model_scores dictionary
            model_scores[model] = score
        
        # Find the model with the minimum score
        best_model = min(model_scores, key=model_scores.get)
        best_score = model_scores[best_model]
        
        print(best_model, best_score)
        
        
        if modify:
            # Add best_model and best_model_score as new columns
            unique_df['best_model'] = best_model
            unique_df['best_model_score'] = best_score
        
        # Append the modified dataframe to the list
        modified_dfs.append(unique_df)
    
    # Concatenate all modified DataFrames
    result_df = pd.concat(modified_dfs)
    
    
    # Return the modified DataFrame
    return result_df



# Apply the function to your cross-validation results
cross_val_results = calculate_model_scores(cross_val_results, pred_cols=['CrostonClassic', "CrostonOptimized", 'AutoTheta', 'AutoETS', 'Add_HW', 'IMAPA'], modify=True)



0/1/10705
CrostonOptimized 0.6119871709966507

0/1/11000
Add_HW 0.6096694828224499

0/1/11015
CrostonClassic 1.5120720366671336

0/1/1149
AutoETS 0.8998415273753151

0/1/11526
Add_HW 2.214588865517592

0/1/11552
CrostonClassic 2.0

0/1/11983
CrostonClassic 0.586886184427446

0/1/12171
AutoETS 2.4051814669362326

0/1/12298
AutoTheta 1.8273848742246628

0/1/14160
Add_HW 0.9977717839783203

0/1/14311
IMAPA 0.8414331444270216

0/1/1485
IMAPA 1.0645572619906831

0/1/1965
Add_HW 1.3560000193019368

0/1/1969
AutoTheta 1.3049926519393922

0/1/3179
Add_HW 1.2098979073419593

0/1/3234
CrostonClassic 1.2788652267844738

0/1/3463
CrostonOptimized 1.2825773298295196

0/1/3515
CrostonClassic 0.889456468414866

0/1/367
CrostonClassic 1.4608197926011934

0/1/4337
AutoETS 1.005902838287821

0/1/4431
Add_HW 0.614388515120461

0/1/5317
AutoETS 1.6656282098465953

0/1/5604
CrostonClassic 2.0

0/1/5619
CrostonClassic 0.8953579234304829

0/1/5657
CrostonClassic 0.9726918543982548

0/1/5871
CrostonClassic 1

In [14]:
cross_val_results

Unnamed: 0,index,unique_id,ds,cutoff,y,CrostonClassic,CrostonOptimized,AutoTheta,AutoETS,Add_HW,IMAPA,best_model,best_model_score
0,0,0/1/10705,2023-08-21,2023-08-14,1.0,2.746450,3.089380,2.888985,2.904547,4.572447,3.089380,CrostonOptimized,0.611987
1,1,0/1/10705,2023-08-28,2023-08-14,3.0,2.746450,3.089380,2.873732,2.904547,4.346179,3.089380,CrostonOptimized,0.611987
2,2,0/1/10705,2023-09-04,2023-08-14,4.0,2.746450,3.089380,2.858479,2.904547,3.408430,3.089380,CrostonOptimized,0.611987
3,3,0/1/10705,2023-09-11,2023-08-14,0.0,2.746450,3.089380,2.843225,2.904547,3.232447,3.089380,CrostonOptimized,0.611987
4,4,0/1/10705,2023-09-18,2023-08-14,2.0,2.746450,3.089380,2.827972,2.904547,2.359288,3.089380,CrostonOptimized,0.611987
...,...,...,...,...,...,...,...,...,...,...,...,...,...
105366,105366,9/82/9950,2023-09-04,2023-08-14,0.0,0.003189,0.000206,0.000242,0.000003,0.011756,0.009272,CrostonClassic,inf
105367,105367,9/82/9950,2023-09-11,2023-08-14,0.0,0.003189,0.000206,0.000311,0.000003,0.010473,0.009272,CrostonClassic,inf
105368,105368,9/82/9950,2023-09-18,2023-08-14,0.0,0.003189,0.000206,0.000379,0.000003,0.009912,0.009272,CrostonClassic,inf
105369,105369,9/82/9950,2023-09-25,2023-08-14,0.0,0.003189,0.000206,0.000447,0.000003,0.009589,0.009272,CrostonClassic,inf


In [15]:
cross_val_results.to_csv("FCA_V6_smooth.csv", index = False)

# Training and prediction for the test set

In [16]:
# Fit the model
sf.fit(df = smoothed_df)

# Generate test predictions
yhat_test = sf.predict(h=horizon)
yhat_test = yhat_test.reset_index()

# Enforce non-negativity
yhat_test[str(sf.models[0])] = yhat_test[str(sf.models[0])].clip(0)

In [18]:
yhat_test

Unnamed: 0,unique_id,ds,CrostonClassic,CrostonOptimized,AutoTheta,AutoETS,Add_HW,IMAPA
0,0/1/10705,2023-10-09,2.844362,3.273373,4.161641,4.180062e+00,1.922012,3.273373
1,0/1/10705,2023-10-16,2.844362,3.273373,4.146151,4.180062e+00,1.873775,3.273373
2,0/1/10705,2023-10-23,2.844362,3.273373,4.130661,4.180062e+00,2.204642,3.273373
3,0/1/10705,2023-10-30,2.844362,3.273373,4.115170,4.180062e+00,2.016935,3.273373
4,0/1/10705,2023-11-06,2.844362,3.273373,4.099680,4.180062e+00,1.887788,3.273373
...,...,...,...,...,...,...,...,...
195684,9/82/9950,2023-12-04,0.002923,0.000018,0.000538,7.952537e-08,0.008541,0.009296
195685,9/82/9950,2023-12-11,0.002923,0.000018,0.000595,7.952537e-08,0.009210,0.009296
195686,9/82/9950,2023-12-18,0.002923,0.000018,0.000652,7.952537e-08,0.009544,0.009296
195687,9/82/9950,2023-12-25,0.002923,0.000018,0.000708,7.952537e-08,0.009861,0.009296


# OUTPUT PREPROCESSING

In [19]:
cross_val_results

Unnamed: 0,index,unique_id,ds,cutoff,y,CrostonClassic,CrostonOptimized,AutoTheta,AutoETS,Add_HW,IMAPA,best_model,best_model_score
0,0,0/1/10705,2023-08-21,2023-08-14,1.0,2.746450,3.089380,2.888985,2.904547,4.572447,3.089380,CrostonOptimized,0.611987
1,1,0/1/10705,2023-08-28,2023-08-14,3.0,2.746450,3.089380,2.873732,2.904547,4.346179,3.089380,CrostonOptimized,0.611987
2,2,0/1/10705,2023-09-04,2023-08-14,4.0,2.746450,3.089380,2.858479,2.904547,3.408430,3.089380,CrostonOptimized,0.611987
3,3,0/1/10705,2023-09-11,2023-08-14,0.0,2.746450,3.089380,2.843225,2.904547,3.232447,3.089380,CrostonOptimized,0.611987
4,4,0/1/10705,2023-09-18,2023-08-14,2.0,2.746450,3.089380,2.827972,2.904547,2.359288,3.089380,CrostonOptimized,0.611987
...,...,...,...,...,...,...,...,...,...,...,...,...,...
105366,105366,9/82/9950,2023-09-04,2023-08-14,0.0,0.003189,0.000206,0.000242,0.000003,0.011756,0.009272,CrostonClassic,inf
105367,105367,9/82/9950,2023-09-11,2023-08-14,0.0,0.003189,0.000206,0.000311,0.000003,0.010473,0.009272,CrostonClassic,inf
105368,105368,9/82/9950,2023-09-18,2023-08-14,0.0,0.003189,0.000206,0.000379,0.000003,0.009912,0.009272,CrostonClassic,inf
105369,105369,9/82/9950,2023-09-25,2023-08-14,0.0,0.003189,0.000206,0.000447,0.000003,0.009589,0.009272,CrostonClassic,inf


In [20]:
yhat_test

Unnamed: 0,unique_id,ds,CrostonClassic,CrostonOptimized,AutoTheta,AutoETS,Add_HW,IMAPA
0,0/1/10705,2023-10-09,2.844362,3.273373,4.161641,4.180062e+00,1.922012,3.273373
1,0/1/10705,2023-10-16,2.844362,3.273373,4.146151,4.180062e+00,1.873775,3.273373
2,0/1/10705,2023-10-23,2.844362,3.273373,4.130661,4.180062e+00,2.204642,3.273373
3,0/1/10705,2023-10-30,2.844362,3.273373,4.115170,4.180062e+00,2.016935,3.273373
4,0/1/10705,2023-11-06,2.844362,3.273373,4.099680,4.180062e+00,1.887788,3.273373
...,...,...,...,...,...,...,...,...
195684,9/82/9950,2023-12-04,0.002923,0.000018,0.000538,7.952537e-08,0.008541,0.009296
195685,9/82/9950,2023-12-11,0.002923,0.000018,0.000595,7.952537e-08,0.009210,0.009296
195686,9/82/9950,2023-12-18,0.002923,0.000018,0.000652,7.952537e-08,0.009544,0.009296
195687,9/82/9950,2023-12-25,0.002923,0.000018,0.000708,7.952537e-08,0.009861,0.009296


In [21]:
# Initialize an empty list to store the result
result_list = []
count = 0

# Loop through each unique_id in the test DataFrame
for unique_id in cross_val_results['unique_id'].unique():
    count += 1
    
    # Get the best model for the current unique_id from the test DataFrame
    best_model = cross_val_results[cross_val_results['unique_id'] == unique_id]['best_model'].iloc[0]
    
    # Filter the forecast DataFrame for this unique_id
    forecast_filtered = yhat_test[yhat_test['unique_id'] == unique_id][['unique_id', 'ds', best_model]]
    
    # Rename the best model column to 'best_model_forecast' for clarity
    forecast_filtered = forecast_filtered.rename(columns={best_model: 'best_model_forecast'})
    
    # Append the filtered forecast to the result list
    result_list.append(forecast_filtered)
    print(count, unique_id)

# Concatenate the list of DataFrames to form the final result DataFrame
final_forecast_df = pd.concat(result_list, ignore_index=True)

# Display the final result DataFrame
print(final_forecast_df)


1 0/1/10705
2 0/1/11000
3 0/1/11015
4 0/1/1149
5 0/1/11526
6 0/1/11552
7 0/1/11983
8 0/1/12171
9 0/1/12298
10 0/1/14160
11 0/1/14311
12 0/1/1485
13 0/1/1965
14 0/1/1969
15 0/1/3179
16 0/1/3234
17 0/1/3463
18 0/1/3515
19 0/1/367
20 0/1/4337
21 0/1/4431
22 0/1/5317
23 0/1/5604
24 0/1/5619
25 0/1/5657
26 0/1/5871
27 0/1/6341
28 0/1/639
29 0/1/655
30 0/1/6898
31 0/1/7022
32 0/1/7186
33 0/1/7457
34 0/1/7469
35 0/1/7661
36 0/1/7984
37 0/1/8011
38 0/1/8157
39 0/1/8264
40 0/1/8468
41 0/1/8587
42 0/1/8869
43 0/100/10705
44 0/100/10789
45 0/100/11015
46 0/100/1149
47 0/100/11526
48 0/100/11983
49 0/100/12171
50 0/100/14311
51 0/100/1969
52 0/100/3234
53 0/100/3463
54 0/100/3515
55 0/100/367
56 0/100/4337
57 0/100/4431
58 0/100/5317
59 0/100/5619
60 0/100/5657
61 0/100/5871
62 0/100/6341
63 0/100/639
64 0/100/655
65 0/100/6898
66 0/100/7022
67 0/100/7186
68 0/100/7457
69 0/100/7984
70 0/100/8011
71 0/100/8157
72 0/100/8264
73 0/100/8468
74 0/100/8587
75 0/100/8869
76 0/120/10705
77 0/120/10774
78

In [22]:
final_forecast_df.to_csv("fx.csv", index=False)

# Submission

In [24]:
yhat_test = final_forecast_df.copy()
name = "SMOOTHED_CROSSTON(SIMPLE+OPT)_THETA_AUTOETS_HW_IMAPA.csv"

# Apply some data wrangling to ensure everything is in the expected format of the competition
yhat_test[["Client", "Warehouse", "Product"]] = yhat_test["unique_id"].str.split("/", expand=True)
yhat_test[["Client", "Warehouse", "Product"]] = yhat_test[["Client", "Warehouse", "Product"]].astype(np.int64)
yhat_test = yhat_test.sort_values(by=["Client", "Warehouse", "Product"]).reset_index(drop=True)
yhat_test = yhat_test.drop(columns = "unique_id")
yhat_test = yhat_test.set_index(["Client", "Warehouse", "Product", "ds"])
yhat_test = yhat_test.unstack(3)
yhat_test.columns = yhat_test.columns.get_level_values(1)
yhat_test.to_csv(name)