# Evaluate models

In [1]:
# Basics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook
# Views 3
from viewser.operations import fetch
from viewser import Queryset, Column
import views_runs
from views_partitioning import data_partitioner, legacy
from stepshift import views
import views_dataviz
from views_runs import storage, ModelMetadata
from views_runs.storage import store, retrieve, fetch_metadata
from views_forecasts.extensions import *
# Other packages
import pickle as pkl

# sklearn

from sklearn.metrics import mean_squared_error

In [2]:
# Common parameters:

dev_id = 'Fatalities002'
run_id = 'Fatalities002'
EndOfHistory = 508
RunGeneticAlgo = False
level = 'cm'

steps = [*range(1, 36+1, 1)] # Which steps to train and predict for

fi_steps = [1,3,6,12,36]
# Specifying partitions

calib_partitioner_dict = {"train":(121,396),"predict":(397,444)}
test_partitioner_dict = {"train":(121,444),"predict":(445,492)}
future_partitioner_dict = {"train":(121,492),"predict":(493,504)}
calib_partitioner =  views_runs.DataPartitioner({"calib":calib_partitioner_dict})
test_partitioner =  views_runs.DataPartitioner({"test":test_partitioner_dict})
future_partitioner =  views_runs.DataPartitioner({"future":future_partitioner_dict})

Mydropbox = '/Users/havardhegre/Dropbox (ViEWS)/ViEWS/'
overleafpath = '/Users/havardhegre/Dropbox (ViEWS)/Apps/Overleaf/ViEWS predicting fatalities/Tables/'
localpath = '/Users/havardhegre/Pickles/'




In [3]:
def FatalitiesHistory(Fatality_cutoff=10,Time_cutoff=12):
    ''' Function to retrieve from viewser a dataframe with dependent variable and a characterisation of recent conflict history  '''

    # initialise QS
    history_colname = "ts_ged_sb_f" + str(Fatality_cutoff) + "_t" + str(Time_cutoff)

    queryset = Queryset("fatalities_history", "country_month")
        # target variable
    queryset = queryset.with_column(Column("ln_ged_sb_dep", from_table = "ged2_cm", from_column = "ged_sb_best_sum_nokgi")
                     .transform.ops.ln()
                     .transform.missing.fill()
                    )   
    queryset = queryset.with_column(Column(history_colname, from_table = "ged2_cm", from_column = "ged_sb_best_sum_nokgi")             
                 .transform.missing.replace_na()
                 .transform.bool.gte(Fatality_cutoff)
                 .transform.temporal.time_since()
                 .transform.missing.replace_na()
                 .transform.bool.gte(Time_cutoff)
                                   )
    history_df = queryset.publish().fetch()
    return history_df, history_colname


def CreateEvaluationDf(stored_modelname_test,Fatality_cutoff=10,Time_cutoff=12,NumberOfMonths=48):
    test_df = pd.DataFrame.forecasts.read_store(stored_modelname_test, run=run_id)
    test_df.replace([np.inf, -np.inf], 0, inplace=True) 
    history_df,history_colname = FatalitiesHistory(Fatality_cutoff,Time_cutoff)
    # NOTE: assumption is that panels are balanced!
    NumberOfMonths = 48
    for step in steps:
        colname = history_colname + '_s' + str(step)
        fromdate = test_partitioner_dict['predict'][0] - step
        todate = test_partitioner_dict['predict'][1] - step
        test_df[colname] = history_df[history_colname].loc[fromdate:todate]
    return test_df,history_colname

In [13]:
EnsembleList = []
genetic = {
        'modelname': 'ensemble_genetic',
        'algorithm': '',
        'depvar': "ln_ged_sb_dep",
        'predstore_calib': 'cm_ensemble_genetic_calib',
        'predstore_test': 'cm_ensemble_genetic_test'   
    }    

EnsembleList.append(genetic)

In [15]:
for model in EnsembleList:
    stored_modelname_test = model['predstore_test']
    ensemble_test_df,history_colname = CreateEvaluationDf(stored_modelname_test,Fatality_cutoff=10,Time_cutoff=12,NumberOfMonths=48)

pr_46_cm_ensemble_genetic_test.parquet
 .    

In [24]:
ModelList[8]['Evaluation_results_df']

Unnamed: 0,MSE_all,RMSE_all,MSE_lowconflict,RMSE_lowconflict,MSE_highconflict,RMSE_highconflict
0,0.291571,0.539974,0.049157,0.221715,1.475364,1.214646
1,0.339523,0.582686,0.057925,0.240676,1.666161,1.290798
2,0.372521,0.610345,0.059076,0.243056,1.824965,1.350913
3,0.365187,0.604307,0.045221,0.212652,1.81402,1.346856
4,0.389349,0.623979,0.054372,0.233177,1.901517,1.378955
5,0.387965,0.622868,0.048372,0.219936,1.91761,1.384778
6,0.379794,0.616274,0.052967,0.230146,1.852842,1.361191
7,0.37894,0.615581,0.054054,0.232495,1.874619,1.369167
8,0.38723,0.622278,0.058698,0.242276,1.954584,1.398064
9,0.397444,0.630432,0.055146,0.234833,2.019635,1.421138


In [25]:
def EvaluateModel(df,colname):
#    print(df[colname].value_counts())
    grouped = df.groupby(colname)

    Evaluation_results = [] # list to hold evaluation results

    stepcols = ['ln_ged_sb_dep']
    for step in steps:
        col = 'step_pred_' + str(step)
        mse_test_all = mean_squared_error(df[col], df['ln_ged_sb_dep'])
        colname = history_colname + '_s' + str(step)
#        print(df[colname].value_counts())
        grouped_dfs = df.groupby(colname)
        percentiles = (0.25,0.5,0.75,0.90,0.95,0.98,0.99,0.995)
        for name, group in grouped_dfs:
            if name == 1:
                mse_test_lowconflict = mean_squared_error(group[col], group['ln_ged_sb_dep'])
    #            print('0', name, mse_lowconflict)
            if name == 0:
                mse_test_highconflict = mean_squared_error(group[col], group['ln_ged_sb_dep'])
    #            print(name)
    #    print(col, mse_test_all, mse_lowconflict, mse_highconflict)
        Results = {
            'MSE_all':  mse_test_all,
            'RMSE_all': np.sqrt(mse_test_all),
            'MSE_lowconflict':  mse_test_lowconflict,
            'RMSE_lowconflict': np.sqrt(mse_test_lowconflict),
            'MSE_highconflict':  mse_test_highconflict,
            'RMSE_highconflict': np.sqrt(mse_test_highconflict),
        }
        Evaluation_results.append(Results)

    Evaluation_results_df = pd.DataFrame(Evaluation_results)
    return Evaluation_results_df

colname = history_colname + '_s3'
df = ensemble_test_df
Evaluation_ensemble_df = EvaluateModel(df,colname)


In [None]:
colname = history_colname + '_s3'
print(ensemble_test_df[colname].value_counts())
grouped = ensemble_test_df.groupby(colname)
percentiles = (0.25,0.5,0.75,0.90,0.95,0.98,0.99,0.995)
for name, group in grouped:
    print(name)
    print(group['ln_ged_sb_dep'].describe(percentiles = percentiles))

In [26]:
Evaluation_ensemble_df

Unnamed: 0,MSE_all,RMSE_all,MSE_lowconflict,RMSE_lowconflict,MSE_highconflict,RMSE_highconflict
0,0.256294,0.506255,0.044005,0.209774,1.288491,1.135117
1,0.309799,0.556596,0.050024,0.223662,1.517458,1.231852
2,0.376539,0.613627,0.056287,0.23725,1.83121,1.353222
3,0.407686,0.638503,0.050333,0.224349,2.003283,1.415374
4,0.420071,0.648129,0.05411,0.232616,2.022993,1.42232
5,0.51835,0.719966,0.058426,0.241716,2.626057,1.620511
6,0.481055,0.693581,0.06222,0.24944,2.4337,1.560032
7,0.498532,0.706068,0.06618,0.257255,2.603833,1.61364
8,0.551926,0.742917,0.072319,0.268922,2.929368,1.71154
9,0.504876,0.710546,0.068079,0.260919,2.698632,1.642751


In [30]:
from ModelDefinitions import DefineEnsembleModels

ModelList = DefineEnsembleModels(level)
evaluation_allmodels = Evaluation_ensemble_df[['MSE_all','MSE_lowconflict','MSE_highconflict']]
    
i = 0
for model in ModelList:
    print(i, model['modelname'], model['data_train'])
    stored_modelname_test = model['predstore_test']
    model['test_df'],model['history_colname'] = CreateEvaluationDf(stored_modelname_test,Fatality_cutoff=10,Time_cutoff=12,NumberOfMonths=48)
    colname = model['history_colname'] + '_s3'
    model['Evaluation_results_df'] = EvaluateModel(model['test_df'],colname)
    cn_all = 'MSE_all_' + model['modelname']
    evaluation_allmodels[cn_all] = model['Evaluation_results_df']['MSE_all']
    cn_lc = 'MSE_lowconflict_' + model['modelname']
    evaluation_allmodels[cn_lc] = model['Evaluation_results_df']['MSE_lowconflict']
    cn_hc = 'MSE_highconflict_' + model['modelname']
    evaluation_allmodels[cn_hc] = model['Evaluation_results_df']['MSE_highconflict']
    i = i + 1

0 fatalities002_baseline_rf baseline002
pr_46_cm_fatalities002_baseline_rf_test.parquet
 .    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  evaluation_allmodels[cn_all] = model['Evaluation_results_df']['MSE_all']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  evaluation_allmodels[cn_lc] = model['Evaluation_results_df']['MSE_lowconflict']


1 fat_conflicthistory_rf conflict_ln
pr_46_cm_fat_conflicthistory_rf_test.parquet
 .    2 fat_conflicthistory_gbm conflict_ln
pr_46_cm_fat_conflicthistory_gbm_test.parquet
 .    3 fat_conflicthistory_hurdle_lgb conflict_ln
pr_46_cm_fat_conflicthistory_hurdle_lgb_test.parquet
 .    4 fat_conflicthistory_long_xgb conflictlong_ln
pr_46_cm_fat_conflicthistory_long_xgb_test.parquet
 .    5 fat_vdem_hurdle_xgb vdem_short
pr_46_cm_fat_vdem_hurdle_xgb_test.parquet
 .    6 fat_wdi_rf wdi_short
pr_46_cm_fat_wdi_rf_test.parquet
 .    7 fatalities002_topics_rf topics_002
pr_46_cm_fatalities002_topics_rf_test.parquet
 .    8 fat_topics_rf topics_short
pr_46_cm_fat_topics_rf_test.parquet
 .    9 fatalities002_topics_hurdle_lgb topics_002
pr_46_cm_fatalities002_topics_hurdle_lgb_test.parquet
 .    10 fat_topics_histgbm topics_short
pr_46_cm_fat_topics_histgbm_test.parquet
 .    11 fat_broad_xgb broad
pr_46_cm_fat_broad_xgb_test.parquet
 .    12 fatalities002_greatest_hits_hurdle_rf gh
pr_46_cm_fatali

In [39]:
ColsToShow = ['MSE_all','MSE_lowconflict',
              'MSE_all_fatalities002_topics_rf','MSE_lowconflict_fatalities002_topics_rf',
              'MSE_all_fatalities002_topics_hurdle_lgb','MSE_lowconflict_fatalities002_topics_hurdle_lgb',
             'MSE_all_fat_topics_rf','MSE_lowconflict_fat_topics_rf',]

evaluation_allmodels[ColsToShow]

Unnamed: 0,MSE_all,MSE_lowconflict,MSE_all_fatalities002_topics_rf,MSE_lowconflict_fatalities002_topics_rf,MSE_all_fatalities002_topics_hurdle_lgb,MSE_lowconflict_fatalities002_topics_hurdle_lgb,MSE_all_fat_topics_rf,MSE_lowconflict_fat_topics_rf
0,0.256294,0.044005,0.285011,0.051479,0.360462,0.048687,0.291571,0.049157
1,0.309799,0.050024,0.343488,0.054439,0.401959,0.048899,0.339523,0.057925
2,0.376539,0.056287,0.387679,0.056147,0.427478,0.05277,0.372521,0.059076
3,0.407686,0.050333,0.388408,0.043474,0.441918,0.046552,0.365187,0.045221
4,0.420071,0.05411,0.382578,0.04964,0.454029,0.050953,0.389349,0.054372
5,0.51835,0.058426,0.399485,0.045,0.466228,0.052068,0.387965,0.048372
6,0.481055,0.06222,0.39507,0.050853,0.457609,0.05334,0.379794,0.052967
7,0.498532,0.06618,0.399515,0.050744,0.464286,0.048761,0.37894,0.054054
8,0.551926,0.072319,0.400018,0.058774,0.471808,0.057267,0.38723,0.058698
9,0.504876,0.068079,0.400829,0.047947,0.470871,0.054555,0.397444,0.055146
