In [7]:
def read_data_from_file(csv_file):
    df = pd.read_csv(csv_file, parse_dates=['timestamp'])
    return df

In [2]:
def calculate_prophet_forecast(train, test):
    prophet = Prophet(growth="linear",holidays_prior_scale=10,yearly_seasonality=False,weekly_seasonality=False,daily_seasonality=False)
    prophet.add_country_holidays(country_name='PL'
                                ).add_seasonality(name='monthly', period=30.5, fourier_order=55
                                                 ).add_seasonality(name='weekly', period=7, fourier_order=10
                                                                  ).add_seasonality(name='daily', period=1, fourier_order=15) 
    prophet.fit(train)
    future = prophet.make_future_dataframe(periods=len(test), freq='H', include_history=False)
    forecast = prophet.predict(future)
    return forecast

In [9]:
def rename_columns_for_prophet(df):
    df = df[['timestamp','supply']]
    df.columns = ['ds','y']
    df = df.sort_values(by=['ds'])
    return df

In [8]:
def add_prophet_forecast(df,cut_date):
    grouped_by_zone = df.groupby('zone')
    
    for name, group in grouped_by_zone:  
        group = rename_columns_for_prophet(group)
        train, test = divide_into_train_and_test(group,cut_date,'ds')
        print(name)
        
        prophet_forecast = calculate_prophet_forecast(train, test)
            
        i = 0
        for r_idx, row in test.iterrows():
            df.loc[r_idx, 'prophet_forecast'] = prophet_forecast.loc[i,'yhat']
            i = i + 1
                
    return df 

In [4]:
def divide_into_train_and_test(df,cut_date,timestamp):
    train = df[ df[timestamp] < cut_date ]
    test = df[ df[timestamp] >= cut_date ]    
    return train, test

In [5]:
def calculate_zones_mean_supply(train):
    zone_names = train['zone'].unique()
    MeanSupplyDict = {elem : pd.DataFrame for elem in zone_names}

    for key in MeanSupplyDict.keys():
        MeanSupplyDict[key] = train['supply'][train.zone == key].mean()
        
    return MeanSupplyDict

In [14]:
def add_mean_supply(data,cutoff_date):
    train, test = divide_into_train_and_test(data,cutoff_date,'timestamp')
    test['supply'] = test['supply'].astype(float)
    
    mean_supply_dict = calculate_zones_mean_supply(train)
    data['zone_mean_supply'] = data['zone'].map(mean_supply_dict)
    return data

In [15]:
def calculate_prophet_success_metric(data, cutoff_date):
    
#   Calculate errors between real supply and prophet forecast
    train, test = divide_into_train_and_test(data,cutoff_date,'timestamp')

    test['errors_prophet'] = abs(test['supply'] - test['prophet_forecast']).round(0)    
    test['relative_errors_prophet'] = test['errors_prophet'] / test['zone_mean_supply']  

#   Calculate success metric by zone
    results_prophet = []
    for name, group in test.groupby('timestamp'):
        # ~100 rows, one for each zone
        single_prophet_result = group['relative_errors_prophet'].mean()
        results_prophet.append(single_prophet_result)    

#   Calculate 97 percentile of success metrics
    final_result = np.percentile(results_prophet, 97)
    
    return final_result

In [13]:
def calculate_naive_success_metric(data, cutoff_date):
    train, test = divide_into_train_and_test(data,cutoff_date,'timestamp')
    test['errors_naive'] = abs(test['supply'] - test['forecast'])
    test['relative_errors_naive'] = test['errors_naive'] / test['zone_mean_supply'] 

#   Calculate success metric by zone
    results_naive = []
    for name, group in test.groupby('timestamp'):
        # ~100 rows, one for each zone
        single_naive_result = group['relative_errors_naive'].mean()
        results_naive.append(single_naive_result)    

#   Calculate 97 percentile of success metrics
    result = np.percentile(results_naive, 97)
    
    return result