# Evaluate models

In [None]:
# Basics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook
# Views 3
from viewser.operations import fetch
from viewser import Queryset, Column
import views_runs
from views_partitioning import data_partitioner, legacy
from stepshift import views
import views_dataviz
from views_runs import storage, ModelMetadata
from views_runs.storage import store, retrieve, fetch_metadata
from views_forecasts.extensions import *
# Other packages
import pickle as pkl

# sklearn

from sklearn.metrics import mean_squared_error

In [None]:
# Common parameters:

dev_id = 'Fatalities002'
run_id = 'Fatalities002'
EndOfHistory = 508
RunGeneticAlgo = False
level = 'cm'

steps = [*range(1, 36+1, 1)] # Which steps to train and predict for

fi_steps = [1,3,6,12,36]
# Specifying partitions

calib_partitioner_dict = {"train":(121,396),"predict":(397,444)}
test_partitioner_dict = {"train":(121,444),"predict":(445,492)}
future_partitioner_dict = {"train":(121,492),"predict":(493,504)}
calib_partitioner =  views_runs.DataPartitioner({"calib":calib_partitioner_dict})
test_partitioner =  views_runs.DataPartitioner({"test":test_partitioner_dict})
future_partitioner =  views_runs.DataPartitioner({"future":future_partitioner_dict})

Mydropbox = '/Users/havardhegre/Dropbox (ViEWS)/ViEWS/'
overleafpath = '/Users/havardhegre/Dropbox (ViEWS)/Apps/Overleaf/ViEWS predicting fatalities/Tables/'
localpath = '/Users/havardhegre/Pickles/'




In [None]:
stored_modelname_test = level + '_' + 'ensemble_genetic' + '_test'

ensemble_test_df = pd.DataFrame.forecasts.read_store(stored_modelname_test, run=run_id)
ensemble_test_df.replace([np.inf, -np.inf], 0, inplace=True)  

In [None]:
# initialise QS
   
Fatality_cutoff = 5
Time_cutoff = 6

queryset = Queryset("fatalities_history", "country_month")
    # target variable
queryset = queryset.with_column(Column("ln_ged_sb_dep", from_table = "ged2_cm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.ops.ln()
                 .transform.missing.fill()
                )   
queryset = queryset.with_column(Column("ts_ged_sb_f" + str(Fatality_cutoff) + "_t" + str(Time_cutoff), from_table = "ged2_cm", from_column = "ged_sb_best_sum_nokgi")             
             .transform.missing.replace_na()
             .transform.bool.gte(Fatality_cutoff)
             .transform.temporal.time_since()
             .transform.missing.replace_na()
             .transform.bool.gte(Time_cutoff)
                               )
history_df = queryset.publish().fetch()
history_df.describe()

In [None]:
fromdate = test_partitioner_dict['predict'][0]
todate = test_partitioner_dict['predict'][1]
history_test_df = history_df.loc[fromdate:todate]

history_test_df.describe()
#ensemble_test_df['ts_ged_sb_5'] = history_test_df['ts_ged_sb_5']
#ensemble_test_df['history_class'] = pd.cut(ensemble_test_df['ts_ged_sb_5'], [0, 6, 999])

In [None]:
for step in SurrogateModelSteps:
        # Columns to use in surrogate models, with name in predictions dataset (item 0) and in source dataset (item 1)
        colnames = [
            ['libdem_s_' + str(step),'vdem_v2x_libdem'],
            ['depvar_s_' + str(step),'ln_ged_sb_dep'],
            ['pop_s_' + str(step),'wdi_sp_pop_totl'],
            ['imr_s_' + str(step),'wdi_sp_dyn_imrt_in'],
            ['nb_conflict_s_' + str(step),'splag_1_decay_ged_sb_5'],
            ['ste10_conflict_s_' + str(step),'ste_theta10'],
            ['ste10stock_conflict_s_' + str(step),'ste_theta10_stock']       
            
        ]
        for col in colnames:
            Ensemble_df[col[0]] = np.nan
            # Reverse stepshifting:
            for m in monthrange:
                Ensemble_df.loc[m, col[0]] = np.array(data_df[col[1]].loc[m-step])


In [None]:
# Read in baseline model queryset to do history-contingent evaluation, group cases in two groups

# GED, baseline, ln versions of predictors
# log variables

qs = (Queryset("fatalities_history", "country_month")

    # target variable
    .with_column(Column("ln_ged_sb_dep", from_table = "ged2_cm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.ops.ln()
                 .transform.missing.fill()
                )                   

    # timelag 0 of target variable
    .with_column(Column("ln_ged_sb", from_table = "ged2_cm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.ops.ln()
                 .transform.missing.fill()
                )
    # Decay functions
    # sb
    .with_column(Column("ts_ged_sb_5", from_table = "ged2_cm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(5)
                 .transform.temporal.time_since()
                 .transform.missing.replace_na()
                )
        .with_theme("fatalities")
        .describe("""Fatalities conflict history, cm level

            For use in evaluation

        """)
    )
history_df = qs.publish().fetch()

print(f"fatalities_history; "
      f"A dataset with {len(history_df.columns)} columns, with "
      f"data between t {min(history_df.index.get_level_values(0))} "
      f"and {max(history_df.index.get_level_values(0))}. "
      f"({len(np.unique(history_df.index.get_level_values(1)))} units)"
     )
fromdate = test_partitioner_dict['predict'][0]
todate = test_partitioner_dict['predict'][1]
history_test_df = history_df.loc[fromdate:todate]

history_test_df.describe()
ensemble_test_df['ts_ged_sb_5'] = history_test_df['ts_ged_sb_5']
ensemble_test_df['history_class'] = pd.cut(ensemble_test_df['ts_ged_sb_5'], [0, 6, 999])


In [None]:
print(ensemble_test_df['history_class'].value_counts())
grouped = ensemble_test_df.groupby('history_class')
percentiles = (0.25,0.5,0.75,0.90,0.95,0.98,0.99,0.995)
for name, group in grouped:
    print(name)
    print(group['ln_ged_sb_dep'].describe(percentiles = percentiles))

In [None]:
ensemble_test_df[['decay_ged_sb_5','history_class']].head()

In [None]:
Evaluation_results = [] # list to hold evaluation results

stepcols = ['ln_ged_sb_dep']
for step in steps:
    stepcols.append('step_pred_' + str(step))
    
for col in stepcols[1:]:
    mse_test = mean_squared_error(ensemble_test_df[col], ensemble_test_df['ln_ged_sb_dep'])
    print(col, mse_test)
    Results = {
        'MSE':  mse_test,
        'RMSE': np.sqrt(mse_test)
    }
    Evaluation_results.append(Results)

Evaluation_results_df = pd.DataFrame(Evaluation_results)

In [None]:
Evaluation_results_df

In [None]:
np.exp(.93)

In [None]:
    model['mse_test'].append(mse_test)
    test_all_line.append(mse_test)

    mse_zeros = mean_squared_error(df_test[col].loc[df_test['ln_ged_sb_dep'] == 0], df_test['ln_ged_sb_dep'].loc[df_test['ln_ged_sb_dep'] == 0])
    model['mse_test_zeros'].append(mse_zeros)
    test_zeros_line.append(mse_zeros)

    mse_nonzeros = mean_squared_error(df_test[col].loc[df_test['ln_ged_sb_dep'] > 0], df_test['ln_ged_sb_dep'].loc[df_test['ln_ged_sb_dep'] > 0])
    model['mse_test_nonzeros'].append(mse_nonzeros)
    test_nonzeros_line.append(mse_nonzeros)

    mse_test_exp = mean_squared_error(df_test_exp[col], target['y_test'])
    model['mse_test_exp'].append(mse_test_exp)
    test_exp_all_line.append(mse_test_exp)