# Benchmark model generation

In [1]:
# Basics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook
import os


# Views 3
import views_runs
from views_forecasts.extensions import *


In [2]:
# Common parameters:

dev_id = 'Fatalities002'
run_id = 'Fatalities002'
EndOfHistory = 508
level = 'cm'
get_future = False

username = os.getlogin()

steps = [*range(1, 36+1, 1)] # Which steps to train and predict for

fi_steps = [1,3,6,12,36]
# Specifying partitions

calib_partitioner_dict = {"train":(121,396),"predict":(397,444)}
test_partitioner_dict = {"train":(121,444),"predict":(445,492)}
future_partitioner_dict = {"train":(121,492),"predict":(493,504)}
calib_partitioner =  views_runs.DataPartitioner({"calib":calib_partitioner_dict})
test_partitioner =  views_runs.DataPartitioner({"test":test_partitioner_dict})
future_partitioner =  views_runs.DataPartitioner({"future":future_partitioner_dict})

Mydropbox = f'/Users/{username}/Dropbox (ViEWS)/ViEWS/'
overleafpath = f'/Users/{username}/Dropbox (ViEWS)/Apps/Overleaf/ViEWS predicting fatalities/Tables/'



print('Dropbox path set to',Mydropbox)
print('Overleaf path set to',overleafpath)

Dropbox path set to /Users/havardhegre1/Dropbox (ViEWS)/ViEWS/
Overleaf path set to /Users/havardhegre1/Dropbox (ViEWS)/Apps/Overleaf/ViEWS predicting fatalities/Tables/


In [3]:

# Fatalities002 stuff - contains the list of the current fatalities002 ensemble models

from ModelDefinitions import DefineEnsembleModels

ModelList = DefineEnsembleModels(level)
    
i = 0
for model in ModelList:
    print(i, model['modelname'], model['data_train'])
    i = i + 1

0 fatalities002_baseline_rf baseline002
1 fatalities002_conflicthistory_rf conflict_ln
2 fatalities002_conflicthistory_gbm conflict_ln
3 fatalities002_conflicthistory_hurdle_lgb conflict_ln
4 fatalities002_conflicthistory_long_xgb conflictlong_ln
5 fatalities002_vdem_hurdle_xgb vdem_short
6 fatalities002_wdi_rf wdi_short
7 fatalities002_topics_rf topics_002
8 fatalities002_topics_xgb topics_002
9 fatalities002_topics_hurdle_lgb topics_002
10 fatalities002_joint_broad_rf joint_broad
11 fatalities002_joint_broad_hurdle_rf joint_broad
12 fatalities002_joint_narrow_xgb joint_narrow
13 fatalities002_joint_narrow_hurdle_xgb joint_narrow
14 fatalities002_joint_narrow_hurdle_lgb joint_narrow
15 fatalities002_all_pca3_xgb all_features
16 fatalities002_aquastat_rf aquastat
17 fatalities002_faostat_rf faostat
18 fatalities002_faoprices_rf faoprices
19 fatalities002_imfweo_rf imfweo
20 fat_hh20_Markov_glm joint_narrow
21 fat_hh20_Markov_rf joint_narrow


In [4]:
# Retrieving the predictions for calibration and test partitions
# The ModelList contains the predictions organized by model
from Ensembling import CalibratePredictions, RetrieveStoredPredictions, mean_sd_calibrated, gam_calibrated

ModelList = RetrieveStoredPredictions(ModelList, steps, EndOfHistory, dev_id, level, get_future)

ModelList = CalibratePredictions(ModelList, EndOfHistory, steps)

0 fatalities002_baseline_rf
pr_46_cm_fatalities002_baseline_rf_calib.parquet
pr_46_cm_fatalities002_baseline_rf_test.parquet
1 fatalities002_conflicthistory_rf
pr_46_cm_fatalities002_conflicthistory_rf_calib.parquet
pr_46_cm_fatalities002_conflicthistory_rf_test.parquet
2 fatalities002_conflicthistory_gbm
pr_46_cm_fatalities002_conflicthistory_gbm_calib.parquet
pr_46_cm_fatalities002_conflicthistory_gbm_test.parquet
3 fatalities002_conflicthistory_hurdle_lgb
pr_46_cm_fatalities002_conflicthistory_hurdle_lgb_calib.parquet
pr_46_cm_fatalities002_conflicthistory_hurdle_lgb_test.parquet
4 fatalities002_conflicthistory_long_xgb
pr_46_cm_fatalities002_conflicthistory_long_xgb_calib.parquet
pr_46_cm_fatalities002_conflicthistory_long_xgb_test.parquet
5 fatalities002_vdem_hurdle_xgb
pr_46_cm_fatalities002_vdem_hurdle_xgb_calib.parquet
pr_46_cm_fatalities002_vdem_hurdle_xgb_test.parquet
6 fatalities002_wdi_rf
pr_46_cm_fatalities002_wdi_rf_calib.parquet
pr_46_cm_fatalities002_wdi_rf_test.parquet

In [5]:
ModelList[0]['predictions_test_df']
ModelList[0]

{'modelname': 'fatalities002_baseline_rf',
 'algorithm': XGBRFRegressor(base_score=None, booster=None, callbacks=None,
                colsample_bylevel=None, colsample_bytree=None,
                early_stopping_rounds=None, enable_categorical=False,
                eval_metric=None, gamma=None, gpu_id=None, grow_policy=None,
                importance_type=None, interaction_constraints=None, max_bin=None,
                max_cat_to_onehot=None, max_delta_step=None, max_depth=None,
                max_leaves=None, min_child_weight=None, missing=nan,
                monotone_constraints=None, n_estimators=300, n_jobs=12,
                num_parallel_tree=None, objective='reg:squarederror',
                predictor=None, random_state=None, reg_alpha=None,
                sampling_method=None, scale_pos_weight=None, ...),
 'depvar': 'ln_ged_sb_dep',
 'data_train': 'baseline002',
 'queryset': 'fatalities002_baseline',
 'preprocessing': 'float_it',
 'level': 'cm',
 'description': 'Baselin

In [6]:
# Dataframe with actuals
df_actuals = pd.DataFrame(ModelList[0]['predictions_test_df']['ln_ged_sb_dep'])
print(df_actuals.head())
print(df_actuals.tail())


                     ln_ged_sb_dep
month_id country_id               
445      1                     0.0
         2                     0.0
         3                     0.0
         4                     0.0
         5                     0.0
                     ln_ged_sb_dep
month_id country_id               
492      242                   0.0
         243                   0.0
         244                   0.0
         245                   0.0
         246                   0.0


In [9]:
def reshape_df(df, draw):
    df = df.drop('ln_ged_sb_dep',axis=1)
    df.reset_index(inplace=True)
    df['draw'] = draw
    df_long = pd.wide_to_long(df, 'step_pred_', i = ['month_id', 'country_id', 'draw'], j = 'step')
    return(df_long)
    
model_draw = 0
df = ModelList[model_draw]['predictions_test_df'].copy()
df_results_long = reshape_df(df,model_draw)
print('Starting with model/draw',model_draw, model['modelname'])
print(df_results_long.describe())


for model in ModelList[1:]:
    model_draw += 1
    print('Appending model/draw',model_draw, model['modelname'])
    df = ModelList[model_draw]['predictions_test_df'].copy()
    df_reshaped = reshape_df(df,model_draw)
    df_results_long = pd.concat([df_results_long ,df_reshaped], axis=0)
    

Starting with model/draw 0 fat_hh20_Markov_rf
          step_pred_
count  330048.000000
mean        0.510586
std         1.091645
min         0.004662
25%         0.011540
50%         0.036516
75%         0.191086
max         6.805649
Appending model/draw 1 fatalities002_conflicthistory_rf
Appending model/draw 2 fatalities002_conflicthistory_gbm
Appending model/draw 3 fatalities002_conflicthistory_hurdle_lgb
Appending model/draw 4 fatalities002_conflicthistory_long_xgb
Appending model/draw 5 fatalities002_vdem_hurdle_xgb
Appending model/draw 6 fatalities002_wdi_rf
Appending model/draw 7 fatalities002_topics_rf
Appending model/draw 8 fatalities002_topics_xgb
Appending model/draw 9 fatalities002_topics_hurdle_lgb
Appending model/draw 10 fatalities002_joint_broad_rf
Appending model/draw 11 fatalities002_joint_broad_hurdle_rf
Appending model/draw 12 fatalities002_joint_narrow_xgb
Appending model/draw 13 fatalities002_joint_narrow_hurdle_xgb
Appending model/draw 14 fatalities002_joint_narro

In [8]:
# Results file in long format
print(df_results_long.describe())
print(df_results_long.tail())


         step_pred_
count  7.261056e+06
mean   4.892335e-01
std    1.068978e+00
min   -2.236131e+00
25%    1.050091e-02
50%    2.718887e-02
75%    2.527420e-01
max    1.106909e+01
                               step_pred_
month_id country_id draw step            
492      246        21   32      2.186300
                         33      2.270486
                         34      1.990229
                         35      1.969436
                         36      1.934854
