# Evaluate models

In [None]:
# Basics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook
# Views 3
from viewser.operations import fetch
from viewser import Queryset, Column
import views_runs
from views_partitioning import data_partitioner, legacy
from stepshift import views
import views_dataviz
from views_runs import storage, ModelMetadata
from views_runs.storage import store, retrieve, fetch_metadata
from views_forecasts.extensions import *
# Other packages
import pickle as pkl

# sklearn

from sklearn.metrics import mean_squared_error

In [None]:
# Common parameters:

dev_id = 'Fatalities002'
run_id = 'Fatalities002'
EndOfHistory = 508
RunGeneticAlgo = False
level = 'cm'

steps = [*range(1, 36+1, 1)] # Which steps to train and predict for

fi_steps = [1,3,6,12,36]
# Specifying partitions

calib_partitioner_dict = {"train":(121,396),"predict":(397,444)}
test_partitioner_dict = {"train":(121,444),"predict":(445,492)}
future_partitioner_dict = {"train":(121,492),"predict":(493,504)}
calib_partitioner =  views_runs.DataPartitioner({"calib":calib_partitioner_dict})
test_partitioner =  views_runs.DataPartitioner({"test":test_partitioner_dict})
future_partitioner =  views_runs.DataPartitioner({"future":future_partitioner_dict})

Mydropbox = '/Users/havardhegre/Dropbox (ViEWS)/ViEWS/'
overleafpath = '/Users/havardhegre/Dropbox (ViEWS)/Apps/Overleaf/ViEWS predicting fatalities/Tables/'
localpath = '/Users/havardhegre/Pickles/'




In [None]:
stored_modelname_test = level + '_' + 'ensemble_genetic' + '_test'

ensemble_test_df = pd.DataFrame.forecasts.read_store(stored_modelname_test, run=run_id)
ensemble_test_df.replace([np.inf, -np.inf], 0, inplace=True)  

In [None]:
# initialise QS
   
Fatality_cutoff = 10
Time_cutoff = 12
history_colname = "ts_ged_sb_f" + str(Fatality_cutoff) + "_t" + str(Time_cutoff)

queryset = Queryset("fatalities_history", "country_month")
    # target variable
queryset = queryset.with_column(Column("ln_ged_sb_dep", from_table = "ged2_cm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.ops.ln()
                 .transform.missing.fill()
                )   
queryset = queryset.with_column(Column(history_colname, from_table = "ged2_cm", from_column = "ged_sb_best_sum_nokgi")             
             .transform.missing.replace_na()
             .transform.bool.gte(Fatality_cutoff)
             .transform.temporal.time_since()
             .transform.missing.replace_na()
             .transform.bool.gte(Time_cutoff)
                               )
history_df = queryset.publish().fetch()
history_df.describe()

In [None]:
# NOTE: assumption is that panels are balanced!
NumberOfMonths = 48
for step in steps:
    colname = history_colname + '_s' + str(step)
    fromdate = test_partitioner_dict['predict'][0] - step
    todate = test_partitioner_dict['predict'][1] - step
#    print(step, fromdate, todate, colname)
    ensemble_test_df[colname] = history_df[history_colname].loc[fromdate:todate]
ensemble_test_df.describe()

In [None]:
colname = history_colname + '_s3'
print(ensemble_test_df[colname].value_counts())
grouped = ensemble_test_df.groupby(colname)
percentiles = (0.25,0.5,0.75,0.90,0.95,0.98,0.99,0.995)
for name, group in grouped:
    print(name)
    print(group['ln_ged_sb_dep'].describe(percentiles = percentiles))

In [None]:
ensemble_test_df[['decay_ged_sb_5','history_class']].head()

In [None]:
Evaluation_results = [] # list to hold evaluation results

stepcols = ['ln_ged_sb_dep']
for step in steps:
    col = 'step_pred_' + str(step)
    mse_test_all = mean_squared_error(ensemble_test_df[col], ensemble_test_df['ln_ged_sb_dep'])
    colname = history_colname + '_s' + str(step)
    print(ensemble_test_df[colname].value_counts())
    grouped_dfs = ensemble_test_df.groupby(colname)
    percentiles = (0.25,0.5,0.75,0.90,0.95,0.98,0.99,0.995)
    for name, group in grouped_dfs:
        if name == 1:
            mse_test_lowconflict = mean_squared_error(group[col], group['ln_ged_sb_dep'])
#            print('0', name, mse_lowconflict)
        if name == 0:
            mse_test_highconflict = mean_squared_error(group[col], group['ln_ged_sb_dep'])
#            print(name)
#    print(col, mse_test_all, mse_lowconflict, mse_highconflict)
    Results = {
        'MSE_all':  mse_test_all,
        'RMSE_all': np.sqrt(mse_test_all),
        'MSE_lowconflict':  mse_test_lowconflict,
        'RMSE_lowconflict': np.sqrt(mse_test_lowconflict),
        'MSE_highconflict':  mse_test_highconflict,
        'RMSE_highconflict': np.sqrt(mse_test_highconflict),
    }
    Evaluation_results.append(Results)

Evaluation_results_df = pd.DataFrame(Evaluation_results)

In [None]:
Evaluation_results_df

In [None]:
np.exp(13)