# Presenting and evaluating benchmark models

In [1]:
# Basic imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook
import os

# Evaluation scripts
from CompetitionEvaluation import load_data, structure_data, calculate_metrics
 

In [2]:
# Where to find files
username = os.getlogin()
Mydropbox = f'/Users/{username}/Dropbox (ViEWS)/ViEWS/'
overleafpath = f'/Users/{username}/Dropbox (ViEWS)/Apps/Overleaf/ViEWS predicting fatalities/Tables/'

print('Dropbox path set to',Mydropbox)
print('Overleaf path set to',overleafpath)

filepath = Mydropbox + 'Prediction_competition_2023/' 


Dropbox path set to /Users/havardhegre1/Dropbox (ViEWS)/ViEWS/
Overleaf path set to /Users/havardhegre1/Dropbox (ViEWS)/Apps/Overleaf/ViEWS predicting fatalities/Tables/


## Reading in actuals

In [20]:
df_cm_actuals = pd.read_parquet(filepath + 'cm_actuals.parquet')
df_pgm_actuals = pd.read_parquet(filepath + 'pgm_actuals.parquet')
df_cm_actuals.head(), df_pgm_actuals.head()


(                     ged_sb
 month_id country_id        
 445      1              0.0
          2              0.0
          3              0.0
          4              0.0
          5              0.0,
                        ged_sb
 month_id priogrid_gid        
 445      62356            0.0
          79599            0.0
          79600            0.0
          79601            0.0
          80317            0.0)

In [4]:
df_pgm_actuals.describe(percentiles=[.25,.50,.75,.90,.95,.99,.992,.995])

Unnamed: 0,ged_sb
count,629280.0
mean,0.15981
std,6.765771
min,0.0
25%,0.0
50%,0.0
75%,0.0
90%,0.0
95%,0.0
99%,0.0


## Reading in benchmark prediction models: 
1. cm model, ensemble-based

In [16]:
df_bm_cm_ensemble = pd.read_parquet(filepath + 'cm_benchmark_ensemble_550.parquet')
df_bm_cm_ensemble.describe()

Unnamed: 0,prediction
count,58088450.0
mean,6.827782
std,89.2469
min,-1.0
25%,0.0
50%,0.0
75%,0.0
max,64156.0


In [None]:
df_bm_cm_ensemble.head()

In [6]:
df_bm_pgm_historical_values = pd.read_parquet(filepath + 'pgm_benchmark_historical_values_step_3.parquet')
df_bm_pgm_historical_values.describe()

Unnamed: 0,prediction
count,251082700.0
mean,0.5868259
std,14.88181
min,0.0
25%,0.0
50%,0.0
75%,0.0
max,2680.0


In [17]:
#observed, predictions = load_data(args.o, args.p) # read parquet files to pandas
observed, predictions = structure_data(df_pgm_actuals, df_bm_pgm_historical_values) # structure data as xarrays that the xskillscore.crps_ensemble wants
metrics = calculate_metrics(observed, predictions) # calculates crps.

KeyboardInterrupt: 

In [21]:
#observed, predictions = load_data(args.o, args.p) # read parquet files to pandas
observed, predictions = structure_data(df_cm_actuals, df_bm_cm_ensemble) # structure data as xarrays that the xskillscore.crps_ensemble wants
metrics = calculate_metrics(observed, predictions) # calculates crps.

In [22]:
metrics

Unnamed: 0_level_0,crps
step,Unnamed: 1_level_1
3,17.223322
4,17.593686
5,17.644822
6,17.864944
7,17.878353
8,17.827381
9,17.781835
10,17.777691
11,17.811072
12,17.876347


In [None]:
# Read in for all 12 steps
from datetime import datetime
print("Cell started to run:", datetime.now())

df_pgm_hv = []
for step in range(3,14+1):
    df = pd.read_parquet(filepath + 'pgm_benchmark_historical_values_step_' + str(step) + '.parquet')
    print(step, df.describe())
    df_pgm_hv.append(df)
    
print("Cell run ended:", datetime.now())

In [None]:
print("Cell started to run:", datetime.now())
i = 3
for df in df_pgm_hv:
    print('step',i,datetime.now())
    observed, predictions = structure_data(df_pgm_actuals, df) # structure data as xarrays that the xskillscore.crps_ensemble wants
    metrics = calculate_metrics(observed, predictions) # calculates crps.
    print(metrics)
    i=i+1
print("Cell run ended:", datetime.now())



# Read in the sc-type prediction files


In [30]:
df_bm_pgm_ensemble2022 = pd.read_parquet(filepath + 'bm_pgm_ensemble_2022.parquet')
df_pgm_actuals_2022 = df_pgm_actuals.loc[505:516]
df_bm_pgm_ensemble2022.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,prediction
month_id,priogrid_gid,draw,step,Unnamed: 4_level_1
492,190496,11,14,0
492,190507,11,14,0
492,190508,11,14,0
492,190510,11,14,0
492,190511,11,14,0


In [28]:

observed, predictions = structure_data(df_pgm_actuals_2022, df_bm_pgm_ensemble2022) # structure data as xarrays that the xskillscore.crps_ensemble wants
metrics = calculate_metrics(observed, predictions) # calculates crps.
metrics

Unnamed: 0_level_0,crps
step,Unnamed: 1_level_1
3.0,
4.0,
5.0,
6.0,
7.0,
8.0,
9.0,
10.0,
11.0,
12.0,


# Creating samples based on point predictions

Assuming Poisson distributions

In [None]:
cm_ensemble_aggregated = pd.read_parquet(filepath + 'cm_benchmark_ensemble_550_aggregated.parquet')

print(cm_ensemble_aggregated.describe())
print(cm_ensemble_aggregated.head())

In [None]:
# Strip down to a year of sc predictions:
df_cm_ensemble = []
for step in range(3,14+1):
    df = cm_ensemble_aggregated['mean_log_prediction'].loc[442+step]
    df = pd.DataFrame(df[df.index.get_level_values('step').isin([step])])
    df['prediction'] = np.expm1(df['mean_log_prediction'])
    df_cm_ensemble.append(df)

df_cm_ensemble_stripped = pd.concat(df_cm_ensemble)
print(df_cm_ensemble_stripped.describe())
print(df_cm_ensemble_stripped.tail(40))


In [None]:
test = np.