In [112]:
%run ./imports_models.py
%run ./dataset_preparation_v2.ipynb
%run -i ./utils.py

Stored 'df_complete' (DataFrame)
Stored 'df_grouped_dates' (DataFrame)
Stored 'series_grouped_dates' (Series)
Stored 'pandas_dataframe_groupby_site' (DataFrameGroupBy)
Stored 'sites_names' (list)


## Data

In [113]:
%store -r pandas_dataframe_groupby_site
%store -r sites_names

# Prepare Timeseries, Train and Forecast by group

* If all_sites is <strong>False</strong>, just one model will be trained with the specific group selected in site_index. 
* If all_sites is <strong>True</strong>, multiple models will be trained for all groups in dataset.

In [114]:
all_sites = False
site_index = 47
sites = []

if all_sites:
    sites = sites_names
else:
    sites = [sites_names[site_index]]

In [115]:
groups_info = {'timeseries':[],
               'models':[],
               'forecasts':[],
               'scaler':[]}
exec_times_info = {'training_exec_times':[],
                   'forecast_exec_times':[]}
overall_exec_time = 1

In [119]:
pandas_dataframe_groupby_site.get_group(sites_names[48])

Unnamed: 0,SITE,DATES,OCCUPANCY_COUNT
48,Product,8/31/2020,0
105,Product,9/1/2020,0
162,Product,9/2/2020,0
219,Product,9/3/2020,0
276,Product,9/4/2020,1
...,...,...,...
4038,Product,12/7/2020,0
4095,Product,12/8/2020,0
4152,Product,12/9/2020,0
4209,Product,12/10/2020,0


In [120]:
print('Multiple Training and Forecasting %s ...\n'%sites_names[site_index])

start_time_overall = time.time()
for site in tqdm(sites):
    print('\033[1m' + 'Group %s'%(site) + '\033[0m')
    site_df = pandas_dataframe_groupby_site.get_group(site)
    freq = 'D'
    nrows = len(site_df)
    site_train_test_timeseries_gluon = prepare_train_test_timeseries_gluon(site_df,
                                                                           int(nrows*0.7),
                                                                           data_column_name='OCCUPANCY_COUNT', 
                                                                           date_column_name='DATES', 
                                                                           freq=freq,
                                                                           perform_scale_data=False,
                                                                           start_day='8/31/2020')
    site_timeseries_gluon_train = site_train_test_timeseries_gluon[0]
    site_timeseries_gluon_test = site_train_test_timeseries_gluon[1]
    scaler = site_train_test_timeseries_gluon[2]
    site_entry_train = next(iter(site_timeseries_gluon_train))
    site_entry_test = next(iter(site_timeseries_gluon_test))
    site_series_train = to_pandas(site_entry_train)
    site_series_test = to_pandas(site_entry_test)
    
    
    print('Transform dataset...')
    prediction_length = len(site_series_test) - len(site_series_train)
    transformation = create_transformation(freq, 2 * prediction_length, prediction_length)
    train_tf = transformation(iter(site_timeseries_gluon_train), is_train=True)
    train_tf_entry = next(iter(train_tf))
    print("--- Transforming has ended ---")
    

    print('Training model...')
    start_time_training = time.time()
    estimator = SimpleFeedForwardEstimator(
        num_hidden_dimensions=[10],
        prediction_length=prediction_length,
        context_length=2*prediction_length,
        freq=freq,
        trainer=Trainer(ctx="cpu",
                        epochs=5,
                        learning_rate=1e-3,
                        hybridize=False,
                        num_batches_per_epoch=50
                       )
    )
    site_predictor = estimator.train(site_timeseries_gluon_train)
    training_exec_time = time.time() - start_time_training
    print("--- Training ended at %s seconds ---" % (training_exec_time))
    
    
    print('Forecasting...')
    start_time_forecast = time.time()
    forecast_it, ts_it = make_evaluation_predictions(
        dataset=site_timeseries_gluon_test,
        predictor=site_predictor,
        num_samples=100,
    )    
    site_forecasts = list(forecast_it)
    site_tss = list(ts_it)
    forecast_exec_time = time.time() - start_time_forecast
    print("--- Forecast ended at %s seconds ---\n" % (forecast_exec_time))    
    
    exec_times_info['training_exec_times'].append(training_exec_time)
    exec_times_info['forecast_exec_times'].append(forecast_exec_time)
    
    groups_info['timeseries'].append((site_timeseries_gluon_train, site_timeseries_gluon_test))
    groups_info['models'].append(site_predictor)
    groups_info['forecasts'].append((site_forecasts, site_tss))
    groups_info['scaler'].append(scaler)
    
    
overall_exec_time = time.time() - start_time_overall
print("Multiple Training and Forecasting ended at--- %s seconds ---" % (overall_exec_time))

Multiple Training and Forecasting Payroll ...



  0%|          | 0/1 [00:00<?, ?it/s]


  0%|                                                                                           | 0/50 [00:00<?, ?it/s][A

[1mGroup Payroll[0m
Transform dataset...
--- Transforming has ended ---
Training model...
learning rate from ``lr_scheduler`` has been overwritten by ``learning_rate`` in optimizer.


100%|██████████████████████████████████████████████████| 50/50 [00:01<00:00, 31.87it/s, epoch=1/5, avg_epoch_loss=2.83]

100%|██████████████████████████████████████████████████| 50/50 [00:01<00:00, 32.65it/s, epoch=2/5, avg_epoch_loss=1.51][A

100%|█████████████████████████████████████████████████| 50/50 [00:01<00:00, 34.86it/s, epoch=3/5, avg_epoch_loss=0.931][A

100%|█████████████████████████████████████████████████| 50/50 [00:01<00:00, 36.64it/s, epoch=4/5, avg_epoch_loss=0.727][A

100%|█████████████████████████████████████████████████| 50/50 [00:01<00:00, 37.40it/s, epoch=5/5, avg_epoch_loss=0.414][A

--- Training ended at 7.349899053573608 seconds ---
Forecasting...
--- Forecast ended at 0.01595902442932129 seconds ---

Multiple Training and Forecasting ended at--- 7.414998292922974 seconds ---





# Load info

In [121]:
site_entry_train = next(iter(groups_info['timeseries'][0][0]))
site_entry_test = next(iter(groups_info['timeseries'][0][1]))
site_series_train_scaled = to_pandas(site_entry_train)
site_series_test_scaled = to_pandas(site_entry_test)
forecasts = groups_info['forecasts'][0][0]
tss_scaled = groups_info['forecasts'][0][1]
forecast_entry = forecasts[0]
ts_entry = tss[0]

In [83]:
# site_series_train = scaler.inverse_transform(site_series_train_scaled.values.reshape(-1,1))
# site_series_test = scaler.inverse_transform(site_series_test_scaled.values.reshape(-1,1))
# tss = scaler.inverse_transform(tss_scaled[0].values.reshape(-1,1))

# Forecast evaluation metrics

In [63]:
# for i, item in enumerate(forecasts):
#     forecasts[i] = np.array(forecasts[i]).reshape(-1,1)

In [123]:
evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
agg_metrics, item_metrics = evaluator(iter(tss_scaled), iter(forecasts), num_series=len(groups_info['timeseries'][0][1]))

Running evaluation: 100%|████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 81.74it/s]


In [124]:
print(json.dumps(agg_metrics, indent=4))

{
    "MSE": 0.423625489939814,
    "abs_error": 12.528905868530273,
    "abs_target_sum": 14.0,
    "abs_target_mean": 0.6086956521739131,
    "seasonal_error": 0.49019607843137253,
    "MASE": 1.1112594770348592,
    "MAPE": 0.294204654900924,
    "sMAPE": 1.1237152475055157,
    "OWA": NaN,
    "MSIS": 14.58068091351053,
    "QuantileLoss[0.1]": 6.671433553099631,
    "Coverage[0.1]": 0.30434782608695654,
    "QuantileLoss[0.5]": 12.528905387967825,
    "Coverage[0.5]": 0.5652173913043478,
    "QuantileLoss[0.9]": 7.35172000080347,
    "Coverage[0.9]": 0.6956521739130435,
    "RMSE": 0.6508651856873388,
    "NRMSE": 1.069278519343485,
    "ND": 0.8949218477521624,
    "wQuantileLoss[0.1]": 0.4765309680785451,
    "wQuantileLoss[0.5]": 0.8949218134262732,
    "wQuantileLoss[0.9]": 0.5251228572002479,
    "mean_absolute_QuantileLoss": 8.850686313956976,
    "mean_wQuantileLoss": 0.6321918795683553,
    "MAE_Coverage": 0.15797101449275364
}


In [125]:
item_metrics.head()

Unnamed: 0,item_id,MSE,abs_error,abs_target_sum,abs_target_mean,seasonal_error,MASE,MAPE,sMAPE,OWA,MSIS,QuantileLoss[0.1],Coverage[0.1],QuantileLoss[0.5],Coverage[0.5],QuantileLoss[0.9],Coverage[0.9]
0,,0.423625,12.528906,14.0,0.608696,0.490196,1.111259,0.294205,1.123715,,14.580681,6.671434,0.304348,12.528905,0.565217,7.35172,0.695652


# Plots

#### Train and test split

In [None]:
fig, ax = plt.subplots(2, 1, sharex=True, sharey=True, figsize=(10, 7))

site_series_train.plot(ax=ax[0])
ax[0].grid(which="both")
ax[0].legend(["train series"], loc="upper left")

site_series_test.plot(ax=ax[1])
ax[1].axvline(site_series_train.index[-1], color='r') # end of train dataset
ax[1].grid(which="both")
ax[1].legend(["test series", "end of train series"], loc="upper left")

plt.show()

### Plot forecasting overall

In [None]:
plot_prob_forecasts(ts_entry, forecast_entry)