### Import useful libraries

In [136]:
%run "Imports.ipynb"
# %run "Helpers.ipynb"

### Load data

In [171]:
# To have same dataset for naive and Prophet predictions, I considered removing rows where naive prediction is missing
# but opted against it for now
# df_all = pd.read_pickle("df_all.pkl")
# df_all_dropna = df_all.dropna()
# data = pd.read_pickle("df_all_dropna.pkl")
data = pd.read_pickle("df_all.pkl")

In [156]:
# data.timestamp.value_counts()

### Helpers

In [54]:
def calculate_prophet_forecast(train, test):
    prophet = Prophet(growth="linear",holidays_prior_scale=10,yearly_seasonality=False,weekly_seasonality=False,daily_seasonality=False)
    prophet.add_country_holidays(country_name='PL'
                                ).add_seasonality(name='monthly', period=30.5, fourier_order=55
                                                 ).add_seasonality(name='weekly', period=7, fourier_order=10
                                                                  ).add_seasonality(name='daily', period=1, fourier_order=15) 
    prophet.fit(train)
    future = prophet.make_future_dataframe(periods=len(test), freq='H', include_history=False)
    forecast = prophet.predict(future)
    return forecast

In [55]:
def divide_into_train_and_test(df,cut_date):
    df = df[['timestamp','supply']]
    df.columns = ['ds','y']
    df = df.sort_values(by=['ds'])

    train = df[ df.ds < cut_date ]
    test = df[ df.ds >= cut_date ]    
    return train, test

In [58]:
def add_final_forecast(df,cut_date):
    grouped_by_zone = df.groupby('zone')
    
    for name, group in grouped_by_zone:  
        train, test = divide_into_train_and_test(group,cut_date)
        print(name)

        if train['y'].mean(axis = 0) > 0.26:
            prophet_forecast = calculate_prophet_forecast(train, test)
            
            i = 0
            for r_idx, row in test.iterrows():
                df.loc[r_idx, 'final_forecast'] = prophet_forecast.loc[i,'yhat']
                i = i + 1
        else:
            for r_idx, row in test.iterrows():
                df.loc[r_idx, 'final_forecast'] = df.loc[r_idx, 'forecast']
    return df       

In [168]:
def final_success_metric(df,forecast_column):
    results = []
    grouped_by_timestamp = df.groupby('timestamp')
    
    for name, group in grouped_by_timestamp:
        mean_supply = group['supply'].mean(axis = 0)
        sum_of_errors = round(abs(group[forecast_column] - group['supply']),10).sum()
        single_success_metric = sum_of_errors / (mean_supply * 100)
        results.append(single_success_metric)
    return results, np.percentile(results, 97)

### Calculate Prophet forecast

In [64]:
df_final = add_final_forecast(data,'2020-01-31 00:00:00')

In [66]:
# Save the Dataframe
df_final.to_pickle("df_final.pkl")

### Calculate success metrics

In [71]:
# Divide intro train and test sets
cut_date = '2020-01-31 00:00:00'
train = df_final[ df_final.timestamp < cut_date ]
test = df_final[ df_final.timestamp >= cut_date ]  

In [169]:
results1, naive_success_metric = final_success_metric(test,'forecast')
naive_success_metric

1.146272378516624

In [170]:
results2, prophet_success_metric = final_success_metric(test,'final_forecast')
prophet_success_metric

1.4823935936556363

In [149]:
test['diff_prophet'] = abs(test['supply'] - test['final_forecast'])
test['diff_naive'] = abs(test['supply'] - test['forecast'])
test['x_coordinate'] = test.apply (lambda row: row['zone'][:1], axis=1)
test['y_coordinate'] = test.apply (lambda row: row['zone'][2:3], axis=1)
test['diff'] = test['diff_naive'] - test['diff_prophet']