# Benchmark models

In [None]:

# Imports
## Basics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook
import os
from functools import partial

## Views 3
import views_runs
from viewser.operations import fetch
from views_forecasts.extensions import *
from viewser import Queryset, Column



In [None]:

# Common parameters:

dev_id = 'Fatalities002'
run_id = 'Fatalities002'
EndOfHistory = 508
get_future = False

username = os.getlogin()

steps = [*range(1, 36+1, 1)] # Which steps to train and predict for

fi_steps = [1,3,6,12,36]
# Specifying partitions

calib_partitioner_dict = {"train":(121,396),"predict":(397,456)}
test_partitioner_dict = {"train":(121,444),"predict":(457,504)}
future_partitioner_dict = {"train":(121,492),"predict":(505,512)}
calib_partitioner =  views_runs.DataPartitioner({"calib":calib_partitioner_dict})
test_partitioner =  views_runs.DataPartitioner({"test":test_partitioner_dict})
future_partitioner =  views_runs.DataPartitioner({"future":future_partitioner_dict})

Mydropbox = f'/Users/{username}/Dropbox (ViEWS)/ViEWS/'
overleafpath = f'/Users/{username}/Dropbox (ViEWS)/Apps/Overleaf/Prediction competition 2023/'


print('Dropbox path set to',Mydropbox)
print('Overleaf path set to',overleafpath)

# Benchmark model parameters
filepath = Mydropbox + 'Prediction_competition_2023/'

year_list = [2018, 2019, 2020, 2021]
draws_cm = 1000
draws_pgm = 100

steps = [3,4,5,6,7,8,9,10,11,12,13,14]
stepcols = ['ln_ged_sb_dep']
for step in steps:
    stepcols.append('step_pred_' + str(step))
print(stepcols)



In [None]:
from BenchmarkModels import poisson_expand_single_point_predictions, describe_expanded
from BenchmarkModels import poisson_expand_multiple_point_predictions, save_models, save_actuals

# cm models
## Ensemble model predictions

In [None]:
# Expand ensemble predictions:
ensemble_df = pd.DataFrame.forecasts.read_store('cm_ensemble_genetic_test', run=dev_id)[stepcols]
ensemble_df.head()


sc_predictions_ensemble =  poisson_expand_single_point_predictions(ensemble_df=ensemble_df,level='cm',year_list=year_list,draws=1000)

describe_expanded(df=sc_predictions_ensemble[0]['prediction_df'], df_expanded=sc_predictions_ensemble[0]['expanded_df'], month=457, country=57)   
sc_predictions_ensemble[0]['expanded_df'].head()

# Last historical values

In [None]:
qs = (Queryset("benchmark_cm", "country_month")

   # target variable
   .with_column(Column("ged_sb", from_table="ged2_cm", from_column="ged_sb_best_sum_nokgi")
                .transform.missing.fill()
                .transform.missing.replace_na()
                )


   .with_theme("benchmark")
   .describe("""Data for empirical benchmark model, cm level

            """)
   )
df_cm_historical_values = qs.publish().fetch()
df_cm_historical_values['prediction'] = np.nan

for year in year_list:
    for month in range(1,12+1):
        this_month = ((year-1980)*12)+month
        last_month_with_data = ((year-1980)*12)-2
        this_shift = this_month - last_month_with_data
#        print(year,month,last_month_with_data, this_shift)
        
        df_cm_historical_values['prediction'].loc[this_month] = df_cm_historical_values.groupby(['country_id'])['ged_sb'].shift(this_shift)

df_cm_historical_values = df_cm_historical_values.loc[454:504]

In [None]:

sc_predictions_last_historical =  poisson_expand_single_point_predictions(ensemble_df=df_cm_historical_values,level='cm',year_list=year_list,draws=1000)

describe_expanded(df=sc_predictions_last_historical[0]['prediction_df'], df_expanded=sc_predictions_last_historical[0]['expanded_df'], month=457, country=57)   
sc_predictions_last_historical[0]['expanded_df'].head()

## Constituent model predictions

In [None]:
# Retrieve the model predictions
# Using code from the VIEWS fatalities002 system

# 20 models: 1 "draw" from each of 20 constituent models, expanded with 50 draws from Poisson distribution for each model.

from ModelDefinitions import DefineEnsembleModels

level = 'cm'
ModelList_cm = DefineEnsembleModels(level)
ModelList_cm = ModelList_cm[0:20] # Drop Markov models
#ModelList_cm = ModelList_cm[0:3] # For debugging

i = 0
for model in ModelList_cm:
    print(i, model['modelname'], model['data_train'])
    i = i + 1

# Retrieving the predictions for calibration and test partitions
# The ModelList contains the predictions organized by model
from Ensembling import CalibratePredictions, RetrieveStoredPredictions, mean_sd_calibrated, gam_calibrated

ModelList_cm = RetrieveStoredPredictions(ModelList_cm, steps, EndOfHistory, dev_id, level, get_future)

ModelList_cm = CalibratePredictions(ModelList_cm, EndOfHistory, steps)

In [None]:
sc_predictions_constituent =  poisson_expand_multiple_point_predictions(ModelList=ModelList_cm,level='cm',year_list=year_list,draws=1000)


In [None]:
sc_predictions_constituent[0]['expanded_df'].tail()

# Saving the cm benchmark models

In [None]:
model_names = ['ensemble','constituent']
model_list = [sc_predictions_ensemble,sc_predictions_constituent]

save_models('cm',model_names,model_list, filepath)
save_actuals('cm',ensemble_df, filepath, year_list)

# pgm level

In [None]:
# Expand ensemble predictions:
ensemble_df = pd.DataFrame.forecasts.read_store('pgm_ensemble_cm_calib_test', run=dev_id)[stepcols]
ensemble_df.head()


sc_predictions_ensemble_pgm = poisson_expand_single_point_predictions(ensemble_df=ensemble_df,year_list=year_list,draws=100,level='pgm')

sc_predictions_ensemble_pgm[0]['expanded_df'].head()



In [None]:
# Saving 
model_names = ['ensemble','constituent']
model_list = [sc_predictions_ensemble_pgm]

save_models('pgm',model_names,model_list, filepath)
#save_cm_actuals(ensemble_df, filepath, year_list)


