
# ViEWS 3 ensembles
## Fatalities project, cm level
This notebook evaluates the broad list of constituent models for the FCDO fatalities project. The final, stripped-down ensemble is computed in the cm_compute_ensemble notebook

## Importing modules

In [None]:
# Basics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook
# sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn import linear_model
from sklearn.metrics import mean_squared_error
# Views 3
from viewser.operations import fetch
from viewser import Queryset, Column
import views_runs
from views_partitioning import data_partitioner, legacy
from stepshift import views
import views_dataviz
from views_runs import storage, ModelMetadata
from views_runs.storage import store, retrieve, fetch_metadata
from views_forecasts.extensions import *


In [None]:
# Packages from Predicting Fatalies repository

from HurdleRegression import *
from Ensembling import CalibratePredictions, RetrieveStoredPredictions, mean_sd_calibrated, gam_calibrated

from FetchData import FetchData, RetrieveFromList

In [None]:
# Common parameters:

run_id = 'Fatalities001'
dev_id = 'Fatalities001'
EndOfHistory = 507
RunGeneticAlgo = False
level = 'cm'

steps = [*range(1, 36+1, 1)] # Which steps to train and predict for

#steps = [1,2,3,4,5,6,7,8,9,10,11,12,15,18,21,24] # Which steps to train and predict for
#fi_steps = [1,3,6,12,36] # Which steps to present feature importances for
#steps = [1,12,24,36]
fi_steps = [1,3,6,12,36]
#steps = [1,6,36]
#fi_steps = [1,6,36]

# Specifying partitions

calib_partitioner_dict = {"train":(121,396),"predict":(397,444)}
test_partitioner_dict = {"train":(121,444),"predict":(445,492)}
future_partitioner_dict = {"train":(121,492),"predict":(493,504)}
calib_partitioner =  views_runs.DataPartitioner({"calib":calib_partitioner_dict})
test_partitioner =  views_runs.DataPartitioner({"test":test_partitioner_dict})
future_partitioner =  views_runs.DataPartitioner({"future":future_partitioner_dict})

Mydropbox = '/Users/havardhegre/Dropbox (ViEWS)/ViEWS'
overleafpath = '/Users/havardhegre/Dropbox (ViEWS)/Apps/Overleaf/ViEWS predicting fatalities/Tables/'


In [None]:
# Use 3 decimal places in output display
pd.set_option("display.precision", 3)

# Don't wrap repr(DataFrame) across additional lines
pd.set_option("display.expand_frame_repr", False)

## Retrieve models and predictions

In [None]:
# Defining the ensemble
# First item in dictionary is model name, second run id for development run
ShortList = True
# Short list of models:
ModelsToRead = ['fat_baseline_rf',
                'fat_conflicthistory_hurdle_lgb',
                'fat_topics_rf',
                'fat_hh20_Markov_glm']

ModelList = []

# Read in list with models from model development notebook

gitname = 'ModelList_cm_wide_' + dev_id + '.csv'
ModelList_df = pd.read_csv(gitname)
#ModelList_df.head(40)

if ShortList:
    ModelList_df = ModelList_df[ModelList_df['modelname'].isin(ModelsToRead)].copy()
# Int
ModelList = ModelList_df.to_dict('records')

In [None]:
i=0
for model in ModelList:
    print(i,model['modelname'])
    i=i+1

In [None]:
# Retrieving the predictions for calibration and test partitions
# The ModelList contains the predictions organized by model

ModelList = RetrieveStoredPredictions(ModelList, steps, EndOfHistory, run_id)

ModelList = CalibratePredictions(ModelList, EndOfHistory, steps)

In [None]:
ModelList

In [None]:
# Prediction target
# In this particular ensemble available in model 0
stored_modelname_calib = level + '_' + ModelList[0]['modelname'] + '_calib'
stored_modelname_test = level + '_' + ModelList[0]['modelname'] + '_test'
target = {
        'y_calib':  pd.DataFrame.forecasts.read_store(stored_modelname_calib, run=run_id)['ln_ged_sb_dep'],
        'y_test':  pd.DataFrame.forecasts.read_store(stored_modelname_test, run=run_id)['ln_ged_sb_dep']
    }

In [None]:
# Illustrating calibration
model = ModelList[0]
print(model['modelname'])
col = 'step_pred_1'
period = 'test'

print(model[f'{period}_df_calibrated'][col].describe())
print(model[f'predictions_{period}_df'][col].describe())

plt.scatter(model[f'predictions_{period}_df'][col],model[f'{period}_df_calibrated'][col])
#plt.show()

overleafpath = '/Users/havardhegre/Dropbox (ViEWS)/Apps/Overleaf/ViEWS predicting fatalities/Figures/PredictionPlots/'
filename = overleafpath + 'Calibration_example_' + model['modelname'] + '.png'
plt.savefig(filename, dpi=300)
#overleafpath = '~/Dropbox (ViEWS)/Apps/Overleaf/ViEWS predicting fatalities/Figures/PredictionPlots/'
#filename = overleafpath + 'Calibration_example' + model['modelname'] + '.png'
#plt.savefig(filename, dpi=300)

In [None]:
# Saving revised model list in data frame form
gitname = 'EnsembleMetaData_broad_cm_' + dev_id + '.csv'
ModelList_df.to_csv(gitname)

In [None]:
cols_to_inspect = ['step_pred_1','step_pred_6','step_pred_12','step_pred_36']
ModelList[2]['predictions_calib_df'][cols_to_inspect].describe()

In [None]:
# Create unweighted average ensemble
# The gam calibrated is basis currently
     
ensemble = {
    'modelname': 'ensemble_unweighted',
    'depvar': "ln_ged_sb_dep",
    'loggeddepvar': True,
    'predictions_file_calib': "",
    'predictions_file_test': "",
    'calib_df_calibrated':  ModelList[0]['calib_df_calibrated'].copy(),
    'test_df_calibrated':   ModelList[0]['test_df_calibrated'].copy(),
#        'future_df_calibrated': ModelList[0]['future_df_calibrated'].copy(),
    'calib_df_cal_expand':  ModelList[0]['calib_df_cal_expand'].copy(),
    'test_df_cal_expand':   ModelList[0]['test_df_cal_expand'].copy(),
#        'future_df_cal_expand': ModelList[0]['future_df_cal_expand'].copy(),
    'calibration_gam': []
}

n_models = 1
for model in ModelList:
    ensemble['calib_df_calibrated'] = ensemble['calib_df_calibrated'].add(model['calib_df_calibrated'])
    ensemble['test_df_calibrated'] = ensemble['test_df_calibrated'].add(model['test_df_calibrated'])
    ensemble['calib_df_cal_expand'] = ensemble['calib_df_cal_expand'].add(model['calib_df_cal_expand'])
    ensemble['test_df_cal_expand'] = ensemble['test_df_cal_expand'].add(model['test_df_cal_expand'])
    n_models = n_models + 1
#n_models = 1
    
ensemble['calib_df_calibrated'] = ensemble['calib_df_calibrated'].divide(n_models)
ensemble['test_df_calibrated'] = ensemble['test_df_calibrated'].divide(n_models)
ensemble['calib_df_cal_expand'] = ensemble['calib_df_cal_expand'].divide(n_models)
ensemble['test_df_cal_expand'] = ensemble['test_df_cal_expand'].divide(n_models)

ModelList.append(ensemble)

# Save ensemble predictions

predstore_calib = level +  '_' + ensemble['modelname'] + '_calib'
ensemble['calib_df_calibrated'].forecasts.set_run(run_id)
ensemble['calib_df_calibrated'].forecasts.to_store(name=predstore_calib, overwrite = True)
predstore_test = level +  '_' + ensemble['modelname'] + '_test'
ensemble['test_df_calibrated'].forecasts.set_run(run_id)
ensemble['test_df_calibrated'].forecasts.to_store(name=predstore_test, overwrite = True)


# Estimating ensembles

In [None]:
target['y_calib'].head()

In [None]:
# Checking missingness
N=51
df = ModelList[0]['predictions_test_df']
#df = pd.DataFrame(target['y_test'])
for col in df.iloc[: , :N].columns:
    print(col,len(df[col]), 'missing:', df[col].isnull().sum(), 'infinity:', np.isinf(df).values.sum())



In [None]:
# Compute ablation MSE, calibration partition

from numpy import array
from numpy.linalg import norm

def ensemble_predictions(yhats, weights):
    # make predictions
    yhats = np.array(yhats)
    # weighted sum across ensemble members
    result = np.dot(weights,yhats)
    return result

def evaluate_ensemble(yhats, weights, test_y):
    ensemble_y = ensemble_predictions(yhats,weights)
    return mean_squared_error(ensemble_y, test_y)

# normalize a vector to have unit norm
def normalize(weights):
    # calculate l1 vector norm
    result = norm(weights, 1)
    # check for a vector of all zeros
    if result == 0.0:
        return weights
    # return normalized vector (unit norm)
    return weights / result


stepcols = ['ln_ged_sb_dep']
for step in steps:
    stepcols.append('step_pred_' + str(step))
ensemble_mses = [] # List to hold unweighted ensemble mses 

# Count models, set up lists
number_of_models = 0
mlist = []
for model in ModelList[0:-1]:
    number_of_models = number_of_models + 1
    model['Ablation_MSE']=[0] * (len(steps)+1)
    mlist.append(model['modelname'])
print('Models:',number_of_models)        

# Compute unweighted ensemble mses
for col in stepcols:
#    print(col)
    yhats = []
    weights = []
    for model in ModelList[0:-1]:
        df_calib = model['calib_df_calibrated'][~np.isinf(model['calib_df_calibrated'][col])].fillna(0)
        yhats.append(df_calib[col])
        weights.append(1/number_of_models)
    emse = evaluate_ensemble(yhats, weights, df_calib['ln_ged_sb_dep'])
    ensemble_mses.append(emse)

#print('Unweighted ensemble MSEs:',ensemble_mses)

# Compute ablation scores
colno = 0
for col in stepcols:
    print('Step',col)
    weights = []
    for model in ModelList[0:-1]: # Assuming the ablated ensemble exists!
        model['calib_df_calibrated'] = model['calib_df_calibrated'].fillna(0)
#        print('Model to compute ablation MSE for',model['modelname'])
        yhats = []
        weights = []
        for abl_model in ModelList[0:-1]:
            abl_model['calib_df_calibrated'] = abl_model['calib_df_calibrated'].fillna(0) # Not sure what is best to do with NAs
            y = model['calib_df_calibrated']['ln_ged_sb_dep'][~np.isinf(model['calib_df_calibrated'][col])]
            if model['modelname'] != abl_model['modelname']:
#                print('Model in ablated ensemble', abl_model['modelname'])
                df_calib = abl_model['calib_df_calibrated'][~np.isinf(abl_model['calib_df_calibrated'][col])]
                yhats.append(df_calib[col])
                weights.append(1/(number_of_models-1))
        ablated_mse = evaluate_ensemble(yhats, weights, y)
        Ablation_MSE = ensemble_mses[colno] - ablated_mse
        
#        print(model['modelname'], 'ablated_mse:', ablated_mse, 'ensemble mse:',ensemble_mses[colno],'Ablation:' ,Ablation_MSE)
        model['Ablation_MSE'][colno] = Ablation_MSE
    colno = colno + 1
    

In [None]:
from statistics import mean
mean(ModelList[0]['Ablation_MSE'][1:])

In [None]:
# Go through the ablation MSEfor pre-screening of ensemble inclusion
# model['Include'] set to False if contribution is not positive in any of four step segments

from statistics import mean, stdev
m = 0
for model in ModelList[:-1]:
    model['Include'] = False
    m_all = mean(model['Ablation_MSE'][1:36])
    m_1 = mean(model['Ablation_MSE'][1:6])
    m_2 = mean(model['Ablation_MSE'][7:12])
    m_3 = mean(model['Ablation_MSE'][13:24])
    m_4 = mean(model['Ablation_MSE'][25:36])
    for pmean in [m_all, m_1, m_2, m_3, m_4]:
        if pmean < 0:
            model['Include'] = True
    
    print(m, model['Include'], model['modelname'], ', aMSE steps all, 1-6, 7-12, 13-24, 25-36', 
          f'{m_all:.4f}',f'{m_1:.4f}',f'{m_2:.4f}',f'{m_3:.4f}',f'{m_4:.4f}',)
    m = m + 1

In [None]:
# Constructing dfs to hold the predictions
# A list of dictionaries organizing predictions and information as one step per entry,
# including a dataframe for each step with one column per prediction model
StepEnsembles = []
for col in stepcols[1:]:
    Step_prediction = {
        'step_pred': col,
        'df_calib': pd.DataFrame(target['y_calib']),
        'df_test': pd.DataFrame(target['y_test']),
        'ensembles_calib': pd.DataFrame(target['y_calib']),
        'ensembles_test': pd.DataFrame(target['y_test'])
    }
    for model in ModelList:
        modelname = model['modelname']
        Step_prediction['df_calib'][modelname] = model['calib_df_calibrated'][col]
        Step_prediction['df_test'][modelname] = model['test_df_calibrated'][col]
    StepEnsembles.append(Step_prediction)

# Calculating unweighted average ensembles
i = 0
for col in stepcols[1:]:
    # Unweighted average
    StepEnsembles[i]['ensembles_test']['unweighted_average'] = StepEnsembles[i]['df_test'].drop('ln_ged_sb_dep', axis=1).mean(axis=1)
    StepEnsembles[i]['ensembles_calib']['unweighted_average'] = StepEnsembles[i]['df_calib'].drop('ln_ged_sb_dep', axis=1).mean(axis=1)
    i = i + 1

    

# Correlation of predictions

In [None]:
# Calculate 
meancorr_df = pd.DataFrame(StepEnsembles[0]['df_calib'].corr().mean(axis=1))
for step in [2,5,11,23,35]:
    colname = 'step_' + str(step+1)
    meancorr_df[colname] = StepEnsembles[step]['df_calib'].corr().mean(axis=1)
meancorr_df['average'] = meancorr_df.mean(axis=1)
meancorr_df
# Save the corr dfs
dflist = [
    (meancorr_df,'meancorr_df'), 
]

path = '/Users/havardhegre/Dropbox (ViEWS)/ViEWS/Projects/PredictingFatalities/MSEs/'
for df in dflist:
    filename = path + df[1] + '.csv'
    df[0].to_csv(filename)
    

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
level = 'cm'

cols_to_see = ['lndepvar', 'fat_hh20_xgb', 'fat_hh20_hurdle_xgb','fat_hh20_rf','fat_hh20_xgbrf']

sns.set_context("notebook")
sns.set() # Setting seaborn as default style even if use only matplotlib
path = '/Users/havardhegre/Dropbox (ViEWS)/ViEWS/Projects/PredictingFatalities/PredictionPlots/'
overleafpath = '/Users/havardhegre/Dropbox (ViEWS)/Apps/Overleaf/ViEWS predicting fatalities/Figures/'
cm = "YlGnBu" 
#cm = 'mako'
#cm = 'rocket'
fig, ((ax1, ax3), (ax6,ax12), (ax24,ax36)) = plt.subplots(3, 2, figsize=(30,30))
for subplot in [(ax1,0),(ax3,2),(ax6,5),(ax12,11),(ax24,23),(ax36,35)]:
    subplot[0].set_box_aspect(1)
    sns.heatmap(StepEnsembles[subplot[1]]['df_calib'].corr(), ax=subplot[0], cmap=cm) 
    subplot_title=('step ' + str(subplot[1]+1))
    subplot[0].set_title = subplot_title

plt.tight_layout()
plt.show()
figname = overleafpath + 'Correlations/PredictionCorrelations_calib_' + level + '.png'
fig.savefig(figname, dpi=300)

In [None]:
corrdf = StepEnsembles[0]['df_test'].corr()
corrdf


In [None]:
meancorr_df.head()

# Create sc predictions and country prediction tables

In [None]:
ModelList[0]['test_df_calibrated'].describe()

In [None]:
# Construct a set of step-combined series starting from a series of start months
# -- to see how predictions react to events at different calendar times
for model in ModelList:
    print(model['modelname'])
    df = model['test_df_calibrated']
    last_in_training = 444
    last_in_test = 492
    # t_range specifies the duration of the step-combined series to construct
    t_range = range(0, 48)
    step_range = range(1,36)
    model['sc_df'] = pd.DataFrame(df['ln_ged_sb_dep'])
    for month in t_range:
        # Create a column to hold predictions starting from a given "last observed" month
        col = 'sc_' + str(last_in_training + month)
#        print(col)
        model['sc_df'][col] = np.NaN
        for step in step_range:
            if (last_in_training + month+step) <= last_in_test: # To avoid generating series beyond last month in partition
                predcol = 'step_pred_' + str(step) # The column in the in-df that contains predictions for this step
#                print('For month', last_in_training + month + step, 'use', predcol, last_in_training + step + month)
                model['sc_df'][col].loc[[last_in_training + month + step], :] = df[predcol].loc[last_in_training + step + month,:].values
    model['sc_df_smooth']=model['sc_df'].rolling(3,center=True).mean().groupby(level=1)   
    # Sub-list of predictions by country 

    model['CountryList'] = []
    countries = model['test_df_calibrated'].index.unique(level='country_id').tolist()
    for cnt in range(250):
        cntdict = {
        'country_id': cnt,
        'country_name': ''
        }
        if cnt in countries:
            cntdict['country_id'] = cnt
            cntdict['test_df_calibrated'] = model['test_df_calibrated'].xs(cnt, level='country_id').copy()
            cntdict['sc_df'] = model['sc_df'].xs(cnt, level='country_id')
            cntdict['sc_df_smooth'] = cntdict['sc_df'].copy()
            predcols = cntdict['sc_df_smooth'].columns[1:]
            cntdict['sc_df_smooth'][predcols] = cntdict['sc_df'][predcols].rolling(3,center=True).mean()
            cntdict['sc_df_smooth'] = cntdict['sc_df_smooth'].fillna(cntdict['sc_df'])
        model['CountryList'].append(cntdict)
    

## Evaluation of constituent models

In [None]:
model['calib_df_calibrated'].describe()

In [None]:
# Evaluation of constituent models
calculate_grpMSEs = False

MSE_calib_all = []
MSE_calib_zeros = []
MSE_calib_nonzeros = []
if calculate_grpMSEs:
    MSE_calib_grp0 = []
    MSE_calib_grp1 = []
    MSE_calib_grp2 = []
    MSE_calib_grp3 = []
MSE_test_all = []
MSE_test_zeros = []
MSE_test_nonzeros = []
MSE_test_exp_all = []
MSE_test_exp_zeros = []
MSE_test_exp_nonzeros = []

for model in ModelList:
    calib_all_line = [model['modelname']]
    calib_zeros_line = [model['modelname']]
    calib_nonzeros_line = [model['modelname']]
    test_all_line = [model['modelname']]
    test_zeros_line = [model['modelname']]
    test_nonzeros_line = [model['modelname']]
    test_exp_all_line = [model['modelname']]
    test_exp_zeros_line = [model['modelname']]
    test_exp_nonzeros_line = [model['modelname']]
    print(model['modelname'])
    model['mse_calib'] = []
    model['mse_calib_zeros'] = []
    model['mse_calib_nonzeros'] = []
    model['mse_test'] = []
    model['mse_test_zeros'] = []
    model['mse_test_nonzeros'] = []
    model['mse_test_exp'] = []
    model['mse_test_exp_zeros'] = []
    model['mse_test_exp_nonzeros'] = []
    if calculate_grpMSEs:
        calib_grp0_line = [model['modelname']]
        calib_grp1_line = [model['modelname']]
        calib_grp2_line = [model['modelname']]
        calib_grp3_line = [model['modelname']]
        model['mse_calib_grp0'] = []
        model['mse_calib_grp1'] = []
        model['mse_calib_grp2'] = []
        model['mse_calib_grp3'] = []
    for cnt in model['CountryList']:
        if cnt['country_id'] in countries:
            cnt['mse'] = []
    for col in stepcols[1:]:
        # Remove from evaluation rows where [col] has infinite values (due to the 2011 split of Sudan)
        df_calib = model['calib_df_calibrated'][~np.isinf(model['calib_df_calibrated'][col])]
        df_test = model['test_df_calibrated'][~np.isinf(model['test_df_calibrated'][col])]
        df_test_exp = model['test_df_cal_expand'][~np.isinf(model['test_df_cal_expand'][col])]

        mse_calib = mean_squared_error(df_calib[col], df_calib['ln_ged_sb_dep'])
        model['mse_calib'].append(mse_calib)
        calib_all_line.append(mse_calib)
        
        mse_calib_zeros = mean_squared_error(df_calib[col].loc[df_calib['ln_ged_sb_dep'] == 0], df_calib['ln_ged_sb_dep'].loc[df_calib['ln_ged_sb_dep'] == 0])
        model['mse_calib_zeros'].append(mse_calib_zeros)
        calib_zeros_line.append(mse_calib_zeros)
        
        mse_calib_nonzeros = mean_squared_error(df_calib[col].loc[df_calib['ln_ged_sb_dep'] > 0], df_calib['ln_ged_sb_dep'].loc[df_calib['ln_ged_sb_dep'] > 0])
        model['mse_calib_nonzeros'].append(mse_calib_nonzeros)
        calib_nonzeros_line.append(mse_calib_nonzeros)
        
        
        if calculate_grpMSEs:
            # MSE for groups of cases based on baseline model predictions:
            # Group 0
            df_calib_grp0 = df_calib[onset_mask_calib[col]=='grp0']
            mse_calib_grp0 = mean_squared_error(df_calib_grp0[col], df_calib_grp0['ln_ged_sb_dep'])
            model['mse_calib_grp0'].append(mse_calib_grp0)
            calib_grp0_line.append(mse_calib_grp0)

            # Group 1
            df_calib_grp1 = df_calib[onset_mask_calib[col]=='grp1']
            mse_calib_grp1 = mean_squared_error(df_calib_grp1[col], df_calib_grp1['ln_ged_sb_dep'])
            model['mse_calib_grp1'].append(mse_calib_grp1)
            calib_grp1_line.append(mse_calib_grp1)

            # Group 2
            df_calib_grp2 = df_calib[onset_mask_calib[col]=='grp2']
            mse_calib_grp2 = mean_squared_error(df_calib_grp2[col], df_calib_grp2['ln_ged_sb_dep'])
            model['mse_calib_grp2'].append(mse_calib_grp2)
            calib_grp2_line.append(mse_calib_grp2)

            # Group 3
            df_calib_grp3 = df_calib[onset_mask_calib[col]=='grp3']
            mse_calib_grp3 = mean_squared_error(df_calib_grp3[col], df_calib_grp3['ln_ged_sb_dep'])
            model['mse_calib_grp3'].append(mse_calib_grp3)
            calib_grp3_line.append(mse_calib_grp3)
        
        
#        mse_test = mean_squared_error(model['predictions_test_df'][col], model['predictions_test_df']['ln_ged_sb_dep'])
        mse_test = mean_squared_error(df_test[col], target['y_test'])
        model['mse_test'].append(mse_test)
        test_all_line.append(mse_test)
        
        mse_zeros = mean_squared_error(df_test[col].loc[df_test['ln_ged_sb_dep'] == 0], df_test['ln_ged_sb_dep'].loc[df_test['ln_ged_sb_dep'] == 0])
        model['mse_test_zeros'].append(mse_zeros)
        test_zeros_line.append(mse_zeros)
        
        mse_nonzeros = mean_squared_error(df_test[col].loc[df_test['ln_ged_sb_dep'] > 0], df_test['ln_ged_sb_dep'].loc[df_test['ln_ged_sb_dep'] > 0])
        model['mse_test_nonzeros'].append(mse_nonzeros)
        test_nonzeros_line.append(mse_nonzeros)

        mse_test_exp = mean_squared_error(df_test_exp[col], target['y_test'])
        model['mse_test_exp'].append(mse_test_exp)
        test_exp_all_line.append(mse_test_exp)
        
        mse_exp_zeros = mean_squared_error(df_test_exp[col].loc[df_test_exp['ln_ged_sb_dep'] == 0], df_test_exp['ln_ged_sb_dep'].loc[df_test_exp['ln_ged_sb_dep'] == 0])
        model['mse_test_exp_zeros'].append(mse_exp_zeros)
        test_exp_zeros_line.append(mse_exp_zeros)
        
        mse_exp_nonzeros = mean_squared_error(df_test_exp[col].loc[df_test_exp['ln_ged_sb_dep'] > 0], df_test_exp['ln_ged_sb_dep'].loc[df_test_exp['ln_ged_sb_dep'] > 0])
        model['mse_test_exp_nonzeros'].append(mse_exp_nonzeros)
        test_exp_nonzeros_line.append(mse_exp_nonzeros)


        countries = model['test_df_calibrated'].index.unique(level='country_id').tolist()
        for cnt in model['CountryList']:
            if cnt['country_id'] in countries:
                df_test = cnt['test_df_calibrated'][~np.isinf(cnt['test_df_calibrated'][col])]            
                cnt_mse = mean_squared_error(df_test[col], df_test['ln_ged_sb_dep'])
                cnt['mse'].append(cnt_mse)
        
    MSE_calib_all.append(calib_all_line)
    MSE_calib_zeros.append(calib_zeros_line)
    MSE_calib_nonzeros.append(calib_nonzeros_line)
    if calculate_grpMSEs:
        MSE_calib_grp0.append(calib_grp0_line)
        MSE_calib_grp1.append(calib_grp1_line)
        MSE_calib_grp2.append(calib_grp2_line)
        MSE_calib_grp3.append(calib_grp3_line)
    MSE_test_all.append(test_all_line)
    MSE_test_zeros.append(test_zeros_line)
    MSE_test_nonzeros.append(test_nonzeros_line)
    MSE_test_exp_all.append(test_exp_all_line)
    MSE_test_exp_zeros.append(test_exp_zeros_line)
    MSE_test_exp_nonzeros.append(test_exp_nonzeros_line)
    
MSE_calib_all_df = pd.DataFrame(MSE_calib_all, columns=stepcols) 
MSE_calib_all_df.set_index('ln_ged_sb_dep', inplace=True)
MSE_calib_zeros_df = pd.DataFrame(MSE_calib_zeros, columns=stepcols) 
MSE_calib_zeros_df.set_index('ln_ged_sb_dep', inplace=True)
MSE_calib_nonzeros_df = pd.DataFrame(MSE_calib_nonzeros, columns=stepcols) 
MSE_calib_nonzeros_df.set_index('ln_ged_sb_dep', inplace=True)
if calculate_grpMSEs:
    MSE_calib_grp0_df = pd.DataFrame(MSE_calib_grp0, columns=stepcols) 
    MSE_calib_grp1_df = pd.DataFrame(MSE_calib_grp1, columns=stepcols) 
    MSE_calib_grp2_df = pd.DataFrame(MSE_calib_grp2, columns=stepcols) 
    MSE_calib_grp3_df = pd.DataFrame(MSE_calib_grp3, columns=stepcols) 
MSE_test_all_df = pd.DataFrame(MSE_test_all, columns=stepcols)  
MSE_test_zeros_df = pd.DataFrame(MSE_test_zeros, columns=stepcols)  
MSE_test_nonzeros_df = pd.DataFrame(MSE_test_nonzeros, columns=stepcols)  
MSE_test_exp_all_df = pd.DataFrame(MSE_test_exp_all, columns=stepcols)  
MSE_test_exp_zeros_df = pd.DataFrame(MSE_test_exp_zeros, columns=stepcols)  
MSE_test_exp_nonzeros_df = pd.DataFrame(MSE_test_exp_nonzeros, columns=stepcols)  

print('All models done')

In [None]:
# Save the MSE dfs
dflist = [
    (MSE_calib_all_df,'MSE_calib_all_df'),
    (MSE_calib_zeros_df,'MSE_calib_zeros_df'),
    (MSE_calib_nonzeros_df,'MSE_calib_nonzeros_df'),
    (MSE_test_all_df,'MSE_test_all_df'),
    (MSE_test_zeros_df,'MSE_test_zeros_df'),
    (MSE_test_nonzeros_df,'MSE_test_nonzeros_df'),   
    (MSE_test_exp_all_df,'MSE_test_exp_all_df'),
    (MSE_test_exp_zeros_df,'MSE_test_exp_zeros_df'),
    (MSE_test_exp_nonzeros_df,'MSE_test_exp_nonzeros_df')   
]

path = '/Users/havardhegre/Dropbox (ViEWS)/ViEWS/Projects/PredictingFatalities/MSEs/'
for df in dflist:
    filename = path + df[1] + '.csv'
    df[0].to_csv(filename)
    

## List global MSEs

In [None]:
for model in ModelList:
    print(model['modelname'])
    print('MSE calibration partition:', model['mse_calib'])
    print('MSE test partition:', model['mse_test'])
    print('MSE test partition, zeros:', model['mse_test_zeros'])
    print('MSE test partition, non-zeros:', model['mse_test_nonzeros'])

# Plotting performance as heatmaps

## Ablation MSE

In [None]:
# Heatmap of ablation MSEs
# df to hold ablation MSEs
import seaborn as sns
abl_df = pd.DataFrame(0.0, index=np.arange(len(ModelList[0]['Ablation_MSE'])), columns=mlist)    
for model in ModelList[0:-2]: # Assuming the ablated ensemble exists
    abl_df[model['modelname']] = model['Ablation_MSE']
abl_df = abl_df[1:].T
abl_df.columns=stepcols[1:]

#palette = 'Spectral'
#plt.figure()
palette = 'vlag'
fig, ax =plt.subplots(1,figsize=(16,11))
ax = sns.heatmap(abl_df, center=0, xticklabels=2, linewidths=.5, cmap=palette,square=True)

overleafpath = '/Users/havardhegre/Dropbox (ViEWS)/Apps/Overleaf/ViEWS predicting fatalities/Figures/Pred_Eval/'
filename = overleafpath + 'Ablation_MSEs.png'
plt.savefig(filename, dpi=300)


## Dataframes with country-specific MSEs

In [None]:
# Making a dataframe for each model, plotting with Matshow
Countries_to_plot = [41,42,43,47,48,49,50,52,53,54,55,56,60,62,67,69,70,124,155,156,157,162]
Namelist = ['Cote dIvoire','Ghana','Liberia','47','48','49','50',
            '52','53','54','55','56','60','62','67',
            '69','70','124','155','156','157','162']


for model in ModelList:
    listdata = []
#    countries = list(EnsembleList[0]['test_df_calibrated'].index.unique(level='country_id'))
    for cnt in Countries_to_plot:
        row = [cnt] + model['CountryList'][cnt]['mse']
        listdata.append(row)
    colnames = ['Country'] + stepcols[1:]
    model['CC_MSEs'] = pd.DataFrame(listdata,columns=colnames) 
    
    plt.matshow(model['CC_MSEs'][stepcols[1:]])
    cb = plt.colorbar()
    cb.ax.tick_params(labelsize=14)
    plt.xticks(steps)
#    plt.yticks(Namelist)
#    ax.set_yticks(Countries_to_plot)
#    ax.set_yticklabels(NameList)

    plt.show()
    
    


In [None]:
# Plotting prediction vs actuals
#ModelSelection=[0,2,3,6,8,9,13,17:24,26:27,31,35, 37:38]


plx = 2
ply = 2
fig, axs = plt.subplots(plx, ply, sharey=True,sharex=True,figsize=(13,12))
#to do log scales, use numpy package for the scale value to plot in xticks
log_scale_value = np.array([np.log1p(0), np.log1p(1), np.log1p(10), np.log1p(100), np.log1p(1000), np.log1p(10000)])
log_scale_naming = ['0','1', '10', '100', '1000','10000']

month = [487,488,489,490,491,492]
step = 13
predvar = 'step_pred_' + str(step)
size = 20 

fpx = 0
fpy = 0
model_data = []
for model in [ModelList[-1],ModelList[-1],ModelList[-1],ModelList[-1]]:
    print(model['modelname'], fpx, fpy)
    print('Prediction mean: ', model['test_df_calibrated'][predvar].mean())
    axs[fpx,fpy].scatter( model['test_df_calibrated']['ln_ged_sb_dep'].loc[month], model['test_df_calibrated'][predvar].loc[month],s=size, alpha=0.5)
    axs[fpx,fpy].set_ylabel(model['modelname'], fontsize=10)
    axs[fpx,fpy].set_xlabel('Actually observed', fontsize=10)
    plt.xticks(log_scale_value, log_scale_naming, rotation=30)
    plt.yticks(log_scale_value, log_scale_naming, rotation=30)
    axs[fpx,fpy].grid(True)
#    axs[fpx,fpy].
#    axs[fpx,fpy].
    if fpx==plx-1:
        fpx = 0
        fpy = fpy + 1
    else:
        fpx = fpx + 1

fig.tight_layout()

#plt.show()

overleafpath = '/Users/havardhegre/Dropbox (ViEWS)/Apps/Overleaf/ViEWS predicting fatalities/Figures/PredictionPlots/'
filename = overleafpath + 'PredictionVsActuals_cm_s' + str(step) + '.png'
plt.savefig(filename, dpi=300)

In [None]:
# Identify the outliers
df = model['test_df_calibrated']
cols = ['ln_ged_sb_dep','step_pred_2']
print(df[cols].loc[df['ln_ged_sb_dep']>np.log1p(1000)].loc[month].head(20))
# 57: Ethiopia
# 120: Somalia
# 126: Azerbaijan (Nagorno-Karabakh)
# 133: Afghanistan
# 167: DRC
# 220: Mali

for i in [np.log1p(10), np.log1p(25), np.log1p(100), np.log1p(1000)]:
    print('exp(', str(i),')', np.rint(np.expm1(i)))

In [None]:
model['sc_df'].head()
for month_id, cnt_df in model['sc_df'].groupby(level=1):
    country_id = cnt_df.index.values[0][1]
#    print(country_id)
    model['sc_df_smooth']=model['sc_df'].rolling(3,center=True).mean()

In [None]:
    # Make a smoothed version
    model['sc_df_smooth']=model['sc_df'].rolling(3,center=True).mean().groupby(level=1)
#    model['sc_df_smooth'] = model['sc_df'].copy()
#    for col in model['sc_df_smooth'].columns[1:]:
#        model['sc_df_smooth'].col

In [None]:
# Plot figures with predictions as they evolve over time
# New version

from matplotlib import cm

#ModelSelection = [1,3,5,9,11]

plt.rcParams["figure.figsize"] = (6, 6)
overleafpath = '/Users/havardhegre/Dropbox (ViEWS)/ViEWS/Projects/PredictingFatalities/PredictionPlots/'
path = '/Users/havardhegre/Dropbox (ViEWS)/ViEWS/Projects/PredictingFatalities/PredictionPlots/'

log_scale_value = np.array([np.log1p(0), np.log1p(1), np.log1p(3), np.log1p(10), np.log1p(30), np.log1p(100),np.log1p(300),np.log1p(1000),np.log1p(3000)])
log_scale_naming = ['0','1','3','10','30','100','300','1000','3000']
month_value = np.array([445,451,457,463,469,475,481,487,492])
month_name = ['Jan-17','Jul-17','Jan-18','Jul-18','Jan-19','Jul-19','Jan-20','Jul-20','Dec-20']
first_month = 444

CountryList = [
    ('Angola',165,500),
    ('Botswana',154,500),
    ('BurkinaFaso',47,2000),
    ('Burundi',155,2000),
#    ('Cameroon',211,2000),
    ('Chad',214,2000),
    ('Congo',166,2000),
    ('DR Congo',167,20000),
    ('Egypt',222,5000),
    ('Ethiopia',57,2000),
    ('Gabon',169,500),
    ('Iran',128,2000),
    ('Israel',218,2000),
    ('Jordan',62,2000),
    ('Kenya',237,2000),
    ('Lebanon',94,2000),
    ('Libya',213,5000),
    ('Madagascar',172,2000),
    ('Mali',50,20000),
    ('Mauritania',244,500),
    ('Morocco',243,500),
    ('Mozambique',162,2000),
    ('Namibia',170,500),
    ('Niger',78,2000),
    ('Nigeria',79,20000),
    ('Oman',119,2000),
    ('Rwanda',156,2000),
    ('Saudi Arabia',131,500),
    ('South Africa',163,2000),
    ('South Sudan',246,5000),
    ('Sudan',245,2000),
    ('Syria',220,50000),
    ('Tanzania',242,500),
    ('Uganda',235,2000),
    ('Yemen',124,20000),
    ('Zimbabwe',158,5000),
]
#t_range = range(0, 23)
t_range = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36]
#t_range = [0,3,6,9,12,15,18,21,24,27,30,33,36]
    
for model in ModelList:
    print(model['modelname'])
    # Calculate non-logged and cumulative series
    for cnt in CountryList:
#        print(cnt)
        sc_df = model['CountryList'][cnt[1]]['sc_df_smooth']
        months = sc_df.index.to_series()
        sc_df_exp = sc_df.copy()
        sc_df_cum = sc_df.copy()
        # Loop over all steps for each country
        for column in sc_df.columns:
            sc_df_exp[column]=np.rint(np.expm1(sc_df[column]))
            sc_df_cum[column]=sc_df_exp[column].cumsum(axis=0, skipna = True)
        sc_df_cumtemp = sc_df_cum.copy()
        # Set first value of an sc series to the cumulated count up to t-1 (cct1 below)
        cumdepvar = sc_df_cum['ln_ged_sb_dep']#.shift(1) # A  cumulative dependent variable series
        i = first_month + 1
        for column in sc_df.columns[2:]:
            cct1 = cumdepvar[i]
            sc_df_cum[column]=sc_df_cum[column]+cct1
            i = i + 1
            
        plt.clf()
#        print('Country',cnt[0])
        plt.bar(months, 'ln_ged_sb_dep', data=sc_df, color='.8')
        for m in t_range:
            series = 'sc_' + str(444+m)
            plt.plot(months, series, data=sc_df, c=cm.hot(np.abs((m/60)+.2)))
        plt.ylabel('Number of fatalities')
        plt.yticks(log_scale_value, log_scale_naming, rotation=30)
        plt.xticks(month_value, month_name, rotation=30)
        plt.grid(axis='y')
        plt.ylim([0,np.log1p(3000)])
        plt.title = cnt[0]
        filename = path + 'OverTime/' + model['modelname'] + '_' + cnt[0] + '.png'
        plt.savefig(filename, dpi=200)

        plt.clf()
        plt.title = cnt[0]
        plt.bar(months, 'ln_ged_sb_dep', data=sc_df_cum, color='.8')
        for m in t_range:
            series = 'sc_' + str(444+m)
            plt.plot(months, series, data=sc_df_cum, c=cm.hot(np.abs((m/60)+.2)))
        plt.ylabel('Cumulative (non-logged) fatalities')
        plt.xticks(month_value, month_name, rotation=30)
        plt.grid(axis='y')
        plt.ylim([0,cnt[2]])
    #    plt.show()
        filename = path + 'Cumulative/' + model['modelname'] + '_' + cnt[0] + '.png'
        plt.savefig(filename, dpi=200)

print('All done')

In [None]:
# Figures showing development over time for Mali and Burkina Faso

CountryList = [
    ('BurkinaFaso',47,2000),
    ('Mali',50,20000),
    ('Ethiopia',57,2000),
    ('Nigeria',79,20000),
]
t_range = [0] + steps

   
for model in ModelList:
    print(model['modelname'])
    # Calculate non-logged and cumulative series
    for cnt in CountryList:
#        print(cnt)
        sc_df = model['CountryList'][cnt[1]]['sc_df_smooth']
        months = sc_df.index.to_series()
        sc_df_exp = sc_df.copy()
        sc_df_cum = sc_df.copy()
        # Loop over all steps for each country
        for column in sc_df.columns:
            sc_df_exp[column]=np.rint(np.expm1(sc_df[column]))
            sc_df_cum[column]=sc_df_exp[column].cumsum(axis=0, skipna = True)
        sc_df_cumtemp = sc_df_cum.copy()
        # Set first value of an sc series to the cumulated count up to t-1 (cct1 below)
        cumdepvar = sc_df_cum['ln_ged_sb_dep']#.shift(1) # A  cumulative dependent variable series
        i = first_month + 1
        for column in sc_df.columns[2:]:
            cct1 = cumdepvar[i]
            sc_df_cum[column]=sc_df_cum[column]+cct1
            i = i + 1
            
        plt.clf()
#        print('Country',cnt[0])
        plt.ylabel('Number of fatalities')
        plt.yticks(log_scale_value, log_scale_naming, rotation=30)
        plt.xticks(month_value, month_name, rotation=30)
        plt.grid(axis='y')
        plt.ylim([0,np.log1p(3000)])
        plt.title = cnt[0]
        for m in t_range:
            
            plt.clf()
    #        print('Country',cnt[0])
            plt.ylabel('Number of fatalities')
            plt.yticks(log_scale_value, log_scale_naming, rotation=30)
            plt.xticks(month_value, month_name, rotation=30)
            plt.grid(axis='y')
            plt.ylim([0,np.log1p(3000)])
            sc_df['truncated_ged_sb'] = sc_df['ln_ged_sb_dep'][0:m]
            predseries = 'sc_' + str(444+m)
            plt.title = cnt[0]
            plt.bar(months, 'truncated_ged_sb', data=sc_df, color='.8')
            plt.plot(months, predseries, data=sc_df, c=cm.hot(np.abs((m/60)+.2)))

            filename = path + 'OverTime/Rolling/' + model['modelname'] + '_' + cnt[0] + '_' + str(444+m) + '.png'
            plt.savefig(filename, dpi=200)
print('All done')

# Uncertainty of predictions

In [None]:
ModelList[-1]['test_df_calibrated'].describe()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
months = [487, 488,489,490,491,492]
plt.rcParams["figure.figsize"] = (6, 6)
overleafpath = '/Users/havardhegre/Dropbox (ViEWS)/ViEWS/Projects/PredictingFatalities/PredictionPlots/'
path = '/Users/havardhegre/Dropbox (ViEWS)/ViEWS/Projects/PredictingFatalities/PredictionPlots/'

df = ModelList[-1]['test_df_calibrated'].loc[months]
                      

bins = pd.IntervalIndex.from_tuples([(-1, 0.3), (0.3, 1.05), (1.05, 1.89), (1.89, 2.92), (2.92, 4.02), (4.02, 5.15), (5.15, 6.3), (6.3, 7.45), (7.45, 10)])
df['fatalitybins_1'] = pd.cut(df['step_pred_1'],bins)
df['fatalitybins_1'].describe()
i = 0
for value in [1,3,10,30,100,300,1000,3000]:
    print('Count:', value, 'logged:', np.log1p(value))

In [None]:
percentiles = (0.05, 0.10, 0.25,0.5,0.75, 0.9,0.95, 0.99)

df['ged_sb_dep'] = np.expm1(df['ln_ged_sb_dep'])
df['ged_sb_dep'].describe(percentiles = percentiles)
df['exp_pred_2'] = np.expm1(df['step_pred_2'])
df['exp_pred_13'] = np.expm1(df['step_pred_13'])
bins2 = pd.IntervalIndex.from_tuples([(0, 3), (3, 10), (10, 30), (30, 100), (100, 300), (300, 1000), (1000, 100000)])
bins2 = pd.IntervalIndex.from_tuples([(10, 30), (100, 300),  (1000, 100000)])
bins2 = pd.IntervalIndex.from_tuples([(3, 10), (30, 100),  (300, 1000)])


df['fatalitybins2_2'] = pd.cut(df['exp_pred_2'],bins2)
df['fatalitybins2_2'].describe()

df['fatalitybins2_13'] = pd.cut(df['exp_pred_13'],bins2)
df['fatalitybins2_13'].describe()


In [None]:
for bin in bins:
    print(bin)
    print( df['ln_ged_sb_dep'][df['fatalitybins_1']==bin].describe(percentiles = percentiles))

df.head()

In [None]:
sns.set_theme(style="ticks")
step=13
# Initialize the figure with a logarithmic x axis
f, ax = plt.subplots(figsize=(7, 6))
ax.set_xscale("log")

# Plot the orbital period with horizontal boxes
sns.boxplot(x="ged_sb_dep", y="fatalitybins2_13", data=df,
            whis=[5, 95], width=.9, palette="vlag")

# Add in points to show each observation
sns.stripplot(x="ged_sb_dep", y="fatalitybins2_13", data=df,
              size=4, color=".3", linewidth=0)

# Tweak the visual presentation
ax.xaxis.grid(True)
ax.set(ylabel="Predicted number of fatalities")
ax.set(xlabel="Observed number of fatalities")
sns.despine(trim=True, left=True)

overleafpath = '/Users/havardhegre/Dropbox (ViEWS)/Apps/Overleaf/ViEWS predicting fatalities/Figures/PredictionPlots/'
filename = overleafpath + 'PredictionUncertainty_cm_s' + str(step) + '.png'
plt.savefig(filename, dpi=300)
                      

# Mapping

In [None]:
from matplotlib import pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import contextily as ctx

from views_dataviz import color
from views_dataviz.map import utils
from views_dataviz.map.presets import ViewsMap

import sqlalchemy as sa
from ingester3.config import source_db_path
from ingester3.Country import Country
from ingester3.extensions import *
from ingester3.ViewsMonth import ViewsMonth

import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import geopandas as gpd
import pandas as pd
import numpy as np

class Mapper2:
    """
    `Map` takes basic properties and allows the user to consecutively add
    layers to the Map object. This makes it possible to prepare mapping
    "presets" at any level of layeredness that can be built on further.
    
    Mapper2 allows for the customizable addition of scaling to the map. 
    -re-add the code for labels later when i can test it

    Attributes
    ----------
    width: Integer value for width in inches.
    height: Integer value for height in inches.
    bbox: List for the bbox per [xmin, xmax, ymin, ymax].
    frame_on: Bool for whether to draw a frame around the map.
    title: Optional default title at matplotlib's default size.
    figure: Optional tuple of (fig, size) to use if you want to plot into an
        already existing fig and ax, rather than making a new one.
    """

    def __init__(
        self,
        width,
        height,
        bbox=None,
        cmap=None,
        frame_on=True,
        title="",  # Default title without customization. (?)
        figure=None,
    ):
        self.width = width
        self.height = height
        self.bbox = bbox  # xmin, xmax, ymin, ymax
        self.cmap = cmap
        if figure is None:
            self.fig, self.ax = plt.subplots(figsize=(self.width, self.height))
        else:
            self.fig, self.ax = figure
        self.texts = []
        self.ax.set_title(title)

        if frame_on:  # Remove axis ticks only.
            self.ax.tick_params(
                top=False,
                bottom=False,
                left=False,
                right=False,
                labelleft=False,
                labelbottom=False,
            )
        else:
            self.ax.axis("off")

        if bbox is not None:
            self.ax.set_xlim((self.bbox[0], self.bbox[1]))
            self.ax.set_ylim((self.bbox[2], self.bbox[3]))

    def add_layer(self, gdf, map_scale=False, map_dictionary=False, cmap=None, inform_colorbar=False, **kwargs):
        """Add a geopandas plot to a new layer.

        Parameters
        ----------
        gdf: Geopandas GeoDataFrame to plot.
        cmap: Optional matplotlib colormap object or string reference
            (e.g. "viridis").
        inform_colorbar: Set or overwrite colorbar with the current layer.
            Not applicable when `color` is supplied in the kwargs.
        map_scale: set a manual scale for the map. If missing defaults to the Remco procedure. 
        map_dictionary: set manual labels for the map. If missing defaults to the default labels.
        **kwargs: Geopandas `.plot` keyword arguments.
        """
        if "color" in kwargs:
            colormap = None
        else:
            colormap = self.cmap if cmap is None else cmap
            if inform_colorbar and "column" in kwargs:
                if hasattr(self, "cax"):
                    self.cax.remove()
                if "vmin" not in kwargs:
                    self.vmin = gdf[kwargs["column"]].min()
                else:
                    self.vmin = kwargs["vmin"]
                if "vmax" not in kwargs:
                    self.vmax = gdf[kwargs["column"]].max()
                else:
                    self.vmax = kwargs["vmax"]
        
        try: Mapper2.add_colorbar(self, colormap, min(map_scale), max(map_scale))
        except: Mapper2.add_colorbar(self, colormap, self.vmin, self.vmax)
        
        try:
            self.ax = gdf.plot(ax=self.ax, cmap=colormap, vmin=min(map_scale), vmax=max(map_scale), **kwargs)
        except: 
            self.ax = gdf.plot(ax=self.ax, cmap=colormap, **kwargs)

                
        return self
    
    def add_colorbar(
        self,
        cmap,
        vmin,
        vmax,
        location="right",
        size="5%",
        pad=0.1,
        alpha=1,
        labelsize=16,
        tickparams=None,
    ):
        """Add custom colorbar to Map.

        Needed since GeoPandas legend and plot axes do not align, see:
        https://geopandas.readthedocs.io/en/latest/docs/user_guide/mapping.html

        Parameters
        ----------
        cmap: Matplotlib colormap object or string reference (e.g. "viridis").
        vmin: Minimum value of range colorbar.
        vmax: Maximum value of range colorbar.
        location: String for location of colorbar: "top", "bottom", "left"
            or "right".
        size: Size in either string percentage or number of pixels.
        pad: Float for padding between the plot's frame and colorbar.
        alpha: Float for alpha to apply to colorbar.
        labelsize: Integer value for the text size of the ticklabels.
        tickparams: Dictionary containing value-label pairs. For example:
            {0.05: "5%", 0.1: "10%"}
        """
        norm = plt.Normalize(vmin, vmax)
        if isinstance(cmap, str):
            cmap = plt.get_cmap(cmap)
        cmap = color.force_alpha_colormap(cmap=cmap, alpha=alpha)
        scalar_to_rgba = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
        divider = make_axes_locatable(self.ax)
        self.cax = divider.append_axes(location, size, pad)
        self.cax.tick_params(labelsize=labelsize)
        tickvalues = (
            list(tickparams.keys()) if tickparams is not None else None
        )
        self.cbar = plt.colorbar(
            scalar_to_rgba, cax=self.cax, ticks=tickvalues
        )
        if tickparams is not None:
            self.cbar.set_ticklabels(list(tickparams.values()))
        return self
    
    def save(
        self, path, dpi=200, **kwargs
    ):  # Just some defaults to reduce work.
        """Save Map figure to file.
        Parameters
        ----------
        path: String path, e.g. "./example.png".
        dpi: Integer dots per inch. Increase for higher resolution figures.
        **kwargs: Matplotlib `savefig` keyword arguments.
        """
        self.fig.savefig(path, dpi=dpi, bbox_inches="tight", **kwargs)
        plt.close(self.fig)

In [None]:
def vid2date(i):
    year=str(ViewsMonth(i).year)
    month=str(ViewsMonth(i).month)
    return year+'/'+month

In [None]:
times = [445, 447, 450, 468]
allsteps = [1, 3, 6, 24]
titles = [vid2date(i) for i in times]
#note the zip function occured earlier
standard_scale = [np.log1p(0),np.log1p(10), np.log1p(50), np.log1p(100), np.log1p(1000), np.log1p(10000)]
standard_scale_labels = ['0','10', '50','100', '1000', '10000']

small_scale=[np.log1p(0),np.log1p(10), np.log1p(50), np.log1p(100), np.log1p(500)]
small_scale_labels = ['0','10', '50','100', '500']

In [None]:
# Prepare the gdf
engine = sa.create_engine(source_db_path)
gdf_base = gpd.GeoDataFrame.from_postgis(
    "SELECT id as country_id, in_africa, in_me, geom FROM prod.country", 
    engine, 
    geom_col='geom'
)
gdf = gdf_base.copy()

In [None]:
gdf.head()

In [None]:
# Test partition maps, predictions, rolling
times_steps = [1, 3]
lastmonthwithdata = 444
path = '/Users/havardhegre/Dropbox (ViEWS)/ViEWS/Projects/PredictingFatalities/maps/cm_rolling/'

model = ModelList[-1]

gdf2 = gdf_base.copy()
df = model['test_df_calibrated'].copy()
df = df.join(gdf2.set_index("country_id"))
gdf = gpd.GeoDataFrame(df, geometry="geom")

for step in times_steps:
    for tshift in [0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45]:
        month = step + tshift + lastmonthwithdata
        modelname = model['modelname']
        m=Mapper2(
        width=10,
        height=10,
        frame_on=True,
        title='Model: '+ model['modelname'] + ', predictions as of ' + vid2date(lastmonthwithdata + tshift) + ', ' + str(step) + ' months ahead',
        bbox=[-18.5, 64.0, -35.5, 43.0], 
        ).add_layer(
        gdf=gdf.loc[month],
        map_scale=standard_scale,
        cmap="rainbow",
        edgecolor="black",
        linewidth=0.5,
        column=f"step_pred_{step}", 
        inform_colorbar=True
        )
        m.cbar.set_ticks(standard_scale)
        m.cbar.set_ticklabels(standard_scale_labels)

        m.save(f'{path}cm_{modelname}_standard_scale_s{step}_t{tshift}_m{month}.png')

In [None]:
# Test partition maps, actuals
lastmonthwithdata = 444
path = '/Users/havardhegre/Dropbox (ViEWS)/ViEWS/Projects/PredictingFatalities/maps/cm_actuals/'

model = ModelList[-1]

gdf2 = gdf_base.copy()
df = model['test_df_calibrated'].copy()
df = df.join(gdf2.set_index("country_id"))
gdf = gpd.GeoDataFrame(df, geometry="geom")

for step in steps:
    month = step + lastmonthwithdata
    modelname = model['modelname']
    m=Mapper2(
    width=10,
    height=10,
    frame_on=True,
    title='Actually recorded fatalities, month ' + vid2date(month),
    bbox=[-18.5, 64.0, -35.5, 43.0], 
    ).add_layer(
    gdf=gdf.loc[month],
    map_scale=standard_scale,
    cmap="rainbow",
    edgecolor="black",
    linewidth=0.5,
    column=f"ln_ged_sb_dep", 
    inform_colorbar=True
    )
    m.cbar.set_ticks(standard_scale)
    m.cbar.set_ticklabels(standard_scale_labels)

    m.save(f'{path}cm_actuals_standard_scale_s{step}_m{month}.png')