In [1]:
!pip install prophet

Collecting prophet
  Downloading prophet-1.1.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.5 kB)
Collecting cmdstanpy>=1.0.4 (from prophet)
  Downloading cmdstanpy-1.2.4-py3-none-any.whl.metadata (4.1 kB)
Collecting holidays>=0.25 (from prophet)
  Downloading holidays-0.53-py3-none-any.whl.metadata (23 kB)
Collecting importlib-resources (from prophet)
  Downloading importlib_resources-6.4.0-py3-none-any.whl.metadata (3.9 kB)
Collecting stanio<2.0.0,>=0.4.0 (from cmdstanpy>=1.0.4->prophet)
  Downloading stanio-0.5.1-py3-none-any.whl.metadata (1.6 kB)
Downloading prophet-1.1.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (14.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.4/14.4 MB[0m [31m55.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading cmdstanpy-1.2.4-py3-none-any.whl (94 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.5/94.5 kB[0m [31m27.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloa

In [9]:
import numpy as np 
import pandas as pd 
from prophet import Prophet 
import os 
import math 
from tqdm import tqdm
from sklearn.model_selection import ParameterGrid 
from sklearn.metrics import mean_absolute_percentage_error

In [2]:
train = pd.read_csv("train.csv") 
train_calendar = pd.read_csv("train_calendar.csv")
test = pd.read_csv("test.csv") 
test_calendar = pd.read_csv("test_calendar.csv") 

In [3]:
train.head() 

Unnamed: 0,warehouse,date,orders,holiday_name,holiday,shutdown,mini_shutdown,shops_closed,winter_school_holidays,school_holidays,blackout,mov_change,frankfurt_shutdown,precipitation,snow,user_activity_1,user_activity_2,id
0,Prague_1,2020-12-05,6895.0,,0,0,0,0,0,0,0,0.0,0,0.0,0.0,1722.0,32575.0,Prague_1_2020-12-05
1,Prague_1,2020-12-06,6584.0,,0,0,0,0,0,0,0,0.0,0,0.0,0.0,1688.0,32507.0,Prague_1_2020-12-06
2,Prague_1,2020-12-07,7030.0,,0,0,0,0,0,0,0,0.0,0,0.0,0.0,1696.0,32552.0,Prague_1_2020-12-07
3,Prague_1,2020-12-08,6550.0,,0,0,0,0,0,0,0,0.0,0,0.8,0.0,1681.0,32423.0,Prague_1_2020-12-08
4,Prague_1,2020-12-09,6910.0,,0,0,0,0,0,0,0,0.0,0,0.5,0.0,1704.0,32410.0,Prague_1_2020-12-09


In [4]:
test.head()

Unnamed: 0,warehouse,date,holiday_name,holiday,shops_closed,winter_school_holidays,school_holidays,id
0,Prague_1,2024-03-16,,0,0,0,0,Prague_1_2024-03-16
1,Prague_1,2024-03-17,,0,0,0,0,Prague_1_2024-03-17
2,Prague_1,2024-03-18,,0,0,0,0,Prague_1_2024-03-18
3,Prague_1,2024-03-19,,0,0,0,0,Prague_1_2024-03-19
4,Prague_1,2024-03-20,,0,0,0,0,Prague_1_2024-03-20


In [5]:
np.unique(train["warehouse"]) 

array(['Brno_1', 'Budapest_1', 'Frankfurt_1', 'Munich_1', 'Prague_1',
       'Prague_2', 'Prague_3'], dtype=object)

# Prague_1 Model

In [6]:
prague1 = train[train["warehouse"]=="Prague_1"] 
prague1 = prague1[["date", "orders"]]  
prague1.rename(columns={"date":"ds", "orders":"y"}, inplace=True) 
prague1

Unnamed: 0,ds,y
0,2020-12-05,6895.0
1,2020-12-06,6584.0
2,2020-12-07,7030.0
3,2020-12-08,6550.0
4,2020-12-09,6910.0
...,...,...
1188,2024-03-11,9866.0
1189,2024-03-12,9710.0
1190,2024-03-13,9628.0
1191,2024-03-14,10056.0


In [7]:
prague1_test = test[test["warehouse"]=="Prague_1"] 
prague1_test = prague1_test[["date"]] 
prague1_test.rename(columns={"date":"ds"}, inplace=True)
prague1_test

Unnamed: 0,ds
0,2024-03-16
1,2024-03-17
2,2024-03-18
3,2024-03-19
4,2024-03-20
...,...
56,2024-05-11
57,2024-05-12
58,2024-05-13
59,2024-05-14


In [8]:
validation_size = int(prague1.shape[0] * 0.2)
train_cv = prague1[:-validation_size] 
valid_cv = prague1[-validation_size:] 
print(train_cv.shape, valid_cv.shape) 

(955, 2) (238, 2)


In [10]:
# Define a grid of parameters to test
param_grid = {
    'changepoint_prior_scale': [0.01, 0.1, 0.5, 1.0, 2.0],
    'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
    'holidays_prior_scale': [0.01, 0.1, 1.0, 10.0]
}

best_mape = float('inf') 
best_params = {} 

for params in ParameterGrid(param_grid): 
    model = Prophet(
        changepoint_prior_scale = params["changepoint_prior_scale"], 
        seasonality_prior_scale = params["seasonality_prior_scale"], 
        holidays_prior_scale = params["holidays_prior_scale"], 
    )
    model.fit(train_cv) 
    # validate on the validation part 
    forecast = model.predict(valid_cv[["ds"]]) 
    mape = mean_absolute_percentage_error(valid_cv["y"], forecast["yhat"]) 
    if mape < best_mape: 
        best_mape = mape 
        best_params = params  
        
print(f"Best MAPE: {best_mape}") 
print(f"Best Params: {best_params}") 

12:02:31 - cmdstanpy - INFO - Chain [1] start processing
12:02:31 - cmdstanpy - INFO - Chain [1] done processing
12:02:31 - cmdstanpy - INFO - Chain [1] start processing
12:02:31 - cmdstanpy - INFO - Chain [1] done processing
12:02:31 - cmdstanpy - INFO - Chain [1] start processing
12:02:31 - cmdstanpy - INFO - Chain [1] done processing
12:02:31 - cmdstanpy - INFO - Chain [1] start processing
12:02:31 - cmdstanpy - INFO - Chain [1] done processing
12:02:31 - cmdstanpy - INFO - Chain [1] start processing
12:02:31 - cmdstanpy - INFO - Chain [1] done processing
12:02:32 - cmdstanpy - INFO - Chain [1] start processing
12:02:32 - cmdstanpy - INFO - Chain [1] done processing
12:02:32 - cmdstanpy - INFO - Chain [1] start processing
12:02:32 - cmdstanpy - INFO - Chain [1] done processing
12:02:32 - cmdstanpy - INFO - Chain [1] start processing
12:02:32 - cmdstanpy - INFO - Chain [1] done processing
12:02:32 - cmdstanpy - INFO - Chain [1] start processing
12:02:32 - cmdstanpy - INFO - Chain [1]

Best MAPE: 0.058315070012684586
Best Params: {'changepoint_prior_scale': 0.1, 'holidays_prior_scale': 0.01, 'seasonality_prior_scale': 0.01}


In [11]:
final_model = Prophet(**best_params) 
final_model.fit(prague1) 

prague1_test["ds"] = pd.to_datetime(prague1_test["ds"])
forecast = final_model.predict(prague1_test[["ds"]]) 
predictions = forecast[["ds", "yhat"]] 

12:05:25 - cmdstanpy - INFO - Chain [1] start processing
12:05:25 - cmdstanpy - INFO - Chain [1] done processing


In [12]:
predictions

Unnamed: 0,ds,yhat
0,2024-03-16,9998.197142
1,2024-03-17,9972.224887
2,2024-03-18,10224.884871
3,2024-03-19,9956.118980
4,2024-03-20,9973.290418
...,...,...
56,2024-05-11,9963.348793
57,2024-05-12,9936.503736
58,2024-05-13,10182.592303
59,2024-05-14,9901.477842


In [13]:
predictions.to_csv("prague_1_fbprophet.csv",index=False) 

##### Good. Now let's automate the rest

In [15]:
regions = ["Brno_1", "Prague_2", "Prague_3", "Munich_1", "Frankfurt_1", "Budapest_1"] 

for region in regions:
    print(f"================== training for region: {region} ==================")
    cur_df = train[train["warehouse"]==region] 
    cur_df = cur_df[["date", "orders"]]  
    cur_df.rename(columns={"date":"ds", "orders":"y"}, inplace=True) 
    
    cur_df_test = test[test["warehouse"]==region] 
    cur_df_test = cur_df_test[["date"]] 
    cur_df_test.rename(columns={"date":"ds"}, inplace=True)
    
    validation_size = int(cur_df.shape[0] * 0.2)
    train_cv = cur_df[:-validation_size] 
    valid_cv = cur_df[-validation_size:] 
    
    # Define a grid of parameters to test
    param_grid = {
        'changepoint_prior_scale': [0.01, 0.1, 0.5, 1.0, 2.0],
        'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
        'holidays_prior_scale': [0.01, 0.1, 1.0, 10.0]
    }

    best_mape = float('inf') 
    best_params = {} 

    for params in ParameterGrid(param_grid): 
        model = Prophet(
            changepoint_prior_scale = params["changepoint_prior_scale"], 
            seasonality_prior_scale = params["seasonality_prior_scale"], 
            holidays_prior_scale = params["holidays_prior_scale"], 
        )
        model.fit(train_cv) 
        # validate on the validation part 
        forecast = model.predict(valid_cv[["ds"]]) 
        mape = mean_absolute_percentage_error(valid_cv["y"], forecast["yhat"]) 
        if mape < best_mape: 
            best_mape = mape 
            best_params = params  

    print(f"Best MAPE: {best_mape}") 
    print(f"Best Params: {best_params}") 
    
    final_model = Prophet(**best_params) 
    final_model.fit(cur_df) 

    prague1_test["ds"] = pd.to_datetime(cur_df["ds"])
    forecast = final_model.predict(cur_df_test[["ds"]]) 
    predictions = forecast[["ds", "yhat"]] 
    
    predictions.to_csv(f"{region}_fbprohpet.csv", index=False)
    

12:49:27 - cmdstanpy - INFO - Chain [1] start processing
12:49:27 - cmdstanpy - INFO - Chain [1] done processing




12:49:27 - cmdstanpy - INFO - Chain [1] start processing
12:49:27 - cmdstanpy - INFO - Chain [1] done processing
12:49:27 - cmdstanpy - INFO - Chain [1] start processing
12:49:27 - cmdstanpy - INFO - Chain [1] done processing
12:49:27 - cmdstanpy - INFO - Chain [1] start processing
12:49:27 - cmdstanpy - INFO - Chain [1] done processing
12:49:27 - cmdstanpy - INFO - Chain [1] start processing
12:49:27 - cmdstanpy - INFO - Chain [1] done processing
12:49:28 - cmdstanpy - INFO - Chain [1] start processing
12:49:28 - cmdstanpy - INFO - Chain [1] done processing
12:49:28 - cmdstanpy - INFO - Chain [1] start processing
12:49:28 - cmdstanpy - INFO - Chain [1] done processing
12:49:28 - cmdstanpy - INFO - Chain [1] start processing
12:49:28 - cmdstanpy - INFO - Chain [1] done processing
12:49:28 - cmdstanpy - INFO - Chain [1] start processing
12:49:28 - cmdstanpy - INFO - Chain [1] done processing
12:49:28 - cmdstanpy - INFO - Chain [1] start processing
12:49:28 - cmdstanpy - INFO - Chain [1]

Best MAPE: 0.05333525708158237
Best Params: {'changepoint_prior_scale': 0.1, 'holidays_prior_scale': 0.01, 'seasonality_prior_scale': 0.1}


12:49:50 - cmdstanpy - INFO - Chain [1] start processing
12:49:50 - cmdstanpy - INFO - Chain [1] done processing
12:49:50 - cmdstanpy - INFO - Chain [1] start processing
12:49:50 - cmdstanpy - INFO - Chain [1] done processing
12:49:50 - cmdstanpy - INFO - Chain [1] start processing
12:49:50 - cmdstanpy - INFO - Chain [1] done processing
12:49:50 - cmdstanpy - INFO - Chain [1] start processing
12:49:50 - cmdstanpy - INFO - Chain [1] done processing
12:49:50 - cmdstanpy - INFO - Chain [1] start processing
12:49:50 - cmdstanpy - INFO - Chain [1] done processing
12:49:51 - cmdstanpy - INFO - Chain [1] start processing
12:49:51 - cmdstanpy - INFO - Chain [1] done processing
12:49:51 - cmdstanpy - INFO - Chain [1] start processing
12:49:51 - cmdstanpy - INFO - Chain [1] done processing
12:49:51 - cmdstanpy - INFO - Chain [1] start processing
12:49:51 - cmdstanpy - INFO - Chain [1] done processing
12:49:51 - cmdstanpy - INFO - Chain [1] start processing
12:49:51 - cmdstanpy - INFO - Chain [1]

Best MAPE: 0.05652891012218773
Best Params: {'changepoint_prior_scale': 0.1, 'holidays_prior_scale': 0.01, 'seasonality_prior_scale': 0.01}


12:50:11 - cmdstanpy - INFO - Chain [1] start processing
12:50:11 - cmdstanpy - INFO - Chain [1] done processing




12:50:11 - cmdstanpy - INFO - Chain [1] start processing
12:50:11 - cmdstanpy - INFO - Chain [1] done processing
12:50:11 - cmdstanpy - INFO - Chain [1] start processing
12:50:11 - cmdstanpy - INFO - Chain [1] done processing
12:50:11 - cmdstanpy - INFO - Chain [1] start processing
12:50:11 - cmdstanpy - INFO - Chain [1] done processing
12:50:12 - cmdstanpy - INFO - Chain [1] start processing
12:50:12 - cmdstanpy - INFO - Chain [1] done processing
12:50:12 - cmdstanpy - INFO - Chain [1] start processing
12:50:12 - cmdstanpy - INFO - Chain [1] done processing
12:50:12 - cmdstanpy - INFO - Chain [1] start processing
12:50:12 - cmdstanpy - INFO - Chain [1] done processing
12:50:12 - cmdstanpy - INFO - Chain [1] start processing
12:50:12 - cmdstanpy - INFO - Chain [1] done processing
12:50:12 - cmdstanpy - INFO - Chain [1] start processing
12:50:12 - cmdstanpy - INFO - Chain [1] done processing
12:50:12 - cmdstanpy - INFO - Chain [1] start processing
12:50:12 - cmdstanpy - INFO - Chain [1]

Best MAPE: 0.058532568659646005
Best Params: {'changepoint_prior_scale': 0.1, 'holidays_prior_scale': 0.01, 'seasonality_prior_scale': 0.01}


12:50:32 - cmdstanpy - INFO - Chain [1] start processing
12:50:32 - cmdstanpy - INFO - Chain [1] done processing
12:50:33 - cmdstanpy - INFO - Chain [1] start processing




12:50:33 - cmdstanpy - INFO - Chain [1] done processing
12:50:33 - cmdstanpy - INFO - Chain [1] start processing
12:50:33 - cmdstanpy - INFO - Chain [1] done processing
12:50:33 - cmdstanpy - INFO - Chain [1] start processing
12:50:33 - cmdstanpy - INFO - Chain [1] done processing
12:50:33 - cmdstanpy - INFO - Chain [1] start processing
12:50:33 - cmdstanpy - INFO - Chain [1] done processing
12:50:33 - cmdstanpy - INFO - Chain [1] start processing
12:50:33 - cmdstanpy - INFO - Chain [1] done processing
12:50:33 - cmdstanpy - INFO - Chain [1] start processing
12:50:33 - cmdstanpy - INFO - Chain [1] done processing
12:50:33 - cmdstanpy - INFO - Chain [1] start processing
12:50:33 - cmdstanpy - INFO - Chain [1] done processing
12:50:33 - cmdstanpy - INFO - Chain [1] start processing
12:50:33 - cmdstanpy - INFO - Chain [1] done processing
12:50:34 - cmdstanpy - INFO - Chain [1] start processing
12:50:34 - cmdstanpy - INFO - Chain [1] done processing
12:50:34 - cmdstanpy - INFO - Chain [1] 

Best MAPE: 0.1610761941174431
Best Params: {'changepoint_prior_scale': 2.0, 'holidays_prior_scale': 0.01, 'seasonality_prior_scale': 10.0}


12:50:47 - cmdstanpy - INFO - Chain [1] done processing
12:50:47 - cmdstanpy - INFO - Chain [1] start processing
12:50:47 - cmdstanpy - INFO - Chain [1] done processing
12:50:47 - cmdstanpy - INFO - Chain [1] start processing
12:50:47 - cmdstanpy - INFO - Chain [1] done processing
12:50:47 - cmdstanpy - INFO - Chain [1] start processing




12:50:47 - cmdstanpy - INFO - Chain [1] done processing
12:50:47 - cmdstanpy - INFO - Chain [1] start processing
12:50:47 - cmdstanpy - INFO - Chain [1] done processing
12:50:47 - cmdstanpy - INFO - Chain [1] start processing
12:50:47 - cmdstanpy - INFO - Chain [1] done processing
12:50:47 - cmdstanpy - INFO - Chain [1] start processing
12:50:48 - cmdstanpy - INFO - Chain [1] done processing
12:50:48 - cmdstanpy - INFO - Chain [1] start processing
12:50:48 - cmdstanpy - INFO - Chain [1] done processing
12:50:48 - cmdstanpy - INFO - Chain [1] start processing
12:50:48 - cmdstanpy - INFO - Chain [1] done processing
12:50:48 - cmdstanpy - INFO - Chain [1] start processing
12:50:48 - cmdstanpy - INFO - Chain [1] done processing
12:50:48 - cmdstanpy - INFO - Chain [1] start processing
12:50:48 - cmdstanpy - INFO - Chain [1] done processing
12:50:48 - cmdstanpy - INFO - Chain [1] start processing
12:50:48 - cmdstanpy - INFO - Chain [1] done processing
12:50:48 - cmdstanpy - INFO - Chain [1] 

Best MAPE: 0.055893134800147314
Best Params: {'changepoint_prior_scale': 2.0, 'holidays_prior_scale': 0.01, 'seasonality_prior_scale': 10.0}


12:50:58 - cmdstanpy - INFO - Chain [1] start processing
12:50:58 - cmdstanpy - INFO - Chain [1] done processing
12:50:58 - cmdstanpy - INFO - Chain [1] start processing
12:50:58 - cmdstanpy - INFO - Chain [1] done processing
12:50:58 - cmdstanpy - INFO - Chain [1] start processing
12:50:58 - cmdstanpy - INFO - Chain [1] done processing
12:50:58 - cmdstanpy - INFO - Chain [1] start processing
12:50:58 - cmdstanpy - INFO - Chain [1] done processing
12:50:58 - cmdstanpy - INFO - Chain [1] start processing
12:50:58 - cmdstanpy - INFO - Chain [1] done processing
12:50:58 - cmdstanpy - INFO - Chain [1] start processing
12:50:59 - cmdstanpy - INFO - Chain [1] done processing
12:50:59 - cmdstanpy - INFO - Chain [1] start processing
12:50:59 - cmdstanpy - INFO - Chain [1] done processing
12:50:59 - cmdstanpy - INFO - Chain [1] start processing
12:50:59 - cmdstanpy - INFO - Chain [1] done processing
12:50:59 - cmdstanpy - INFO - Chain [1] start processing
12:50:59 - cmdstanpy - INFO - Chain [1]

Best MAPE: 0.03660693274465761
Best Params: {'changepoint_prior_scale': 0.1, 'holidays_prior_scale': 0.01, 'seasonality_prior_scale': 0.1}


In [16]:
submission = pd.read_csv("solution_example.csv") 
submission

Unnamed: 0,id,orders
0,Prague_1_2024-03-16,5000
1,Prague_1_2024-03-17,5000
2,Prague_1_2024-03-18,5000
3,Prague_1_2024-03-19,5000
4,Prague_1_2024-03-20,5000
...,...,...
392,Budapest_1_2024-05-11,5000
393,Budapest_1_2024-05-12,5000
394,Budapest_1_2024-05-13,5000
395,Budapest_1_2024-05-14,5000


In [17]:
prague_1 = pd.read_csv("prague_1_fbprophet.csv")["yhat"].values 
brno_1 = pd.read_csv("Brno_1_fbprohpet.csv")["yhat"].values 
prague_2 = pd.read_csv("Prague_2_fbprohpet.csv")["yhat"].values  
prague_3 = pd.read_csv("Prague_3_fbprohpet.csv")["yhat"].values  
munich_1 = pd.read_csv("Munich_1_fbprohpet.csv")["yhat"].values 
frankfurt_1 = pd.read_csv("Frankfurt_1_fbprohpet.csv")["yhat"].values  
budapest_1 = pd.read_csv("Budapest_1_fbprohpet.csv")["yhat"].values  

In [18]:
concatenated_predictions = np.concatenate([prague_1, brno_1, prague_2, prague_3, munich_1, frankfurt_1, budapest_1])

concatenated_predictions.shape

(397,)

In [19]:
submission["orders"] = concatenated_predictions 

In [20]:
submission

Unnamed: 0,id,orders
0,Prague_1_2024-03-16,9998.197142
1,Prague_1_2024-03-17,9972.224887
2,Prague_1_2024-03-18,10224.884871
3,Prague_1_2024-03-19,9956.118980
4,Prague_1_2024-03-20,9973.290418
...,...,...
392,Budapest_1_2024-05-11,6650.424669
393,Budapest_1_2024-05-12,6317.791325
394,Budapest_1_2024-05-13,6565.124700
395,Budapest_1_2024-05-14,6622.549472


In [21]:
submission.to_csv("fbpropht_final_submission.csv", index=False) 

print("done!") 

done!
