
# ViEWS 3 ensembles: future predictions
ViEWS monthly updates, cm level
Fatalities002 version

This notebook produces future predictions for a set of models defined in the list of dictionaries ModelList and the weights stored as iweights_df.csv. Both of these are produced by the notebook fatal_cm_compute_ensemble in this repository. 

The notebook draws on the following .py script files in this repository:

Ensembling.py

FetchData.py

ViewsEstimators.py

It also requires the list of models included in the ensemble, in the following file:

ModelDefinitions.py

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Basics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook
# sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn import linear_model
from sklearn.metrics import mean_squared_error

import warnings
warnings.filterwarnings('ignore')

# Views 3
from viewser.operations import fetch
from viewser import Queryset, Column
import views_runs
from views_partitioning import data_partitioner, legacy
from stepshift import views
import views_dataviz
from views_runs import storage, ModelMetadata
from views_runs.storage import store, retrieve, fetch_metadata
from views_forecasts.extensions import *

# Mapper
import geopandas as gpd

from views_dataviz.map import mapper, utils
from views_dataviz import color
from views_dataviz.map.presets import ViewsMap

import sqlalchemy as sa
#from ingester3.config import source_db_path

# Other packages
import pickle as pkl

#Parallelization
from joblib import Parallel, delayed, cpu_count
from functools import partial

from pathlib import Path

# Packages from this repository, Tools folder
import sys
sys.path.append('../')
sys.path.append('../Tools')
sys.path.append('../Intermediates')
sys.path.append('../SystemUpdates')
import os
from pathlib import Path

from Ensembling import CalibratePredictions, RetrieveStoredPredictions, mean_sd_calibrated, gam_calibrated

from FetchData import FetchData, RetrieveFromList, ReturnQsList
from ViewsEstimators import *

In [3]:
# Common parameters:

dev_id = 'Fatalities002'
run_id = dev_id 
EndOfHistory = 509
prod_id = '2022_04_t01'
RunGeneticAlgo = False
level = 'cm'
WriteToOverleaf = False
get_future = False

username = os.getlogin()

steps = [*range(1, 36+1, 1)] # Which steps to train and predict for

#steps = [1,2,3,4,5,6,7,8,9,10,11,12,15,18,21,24] # Which steps to train and predict for
#fi_steps = [1,3,6,12,36] # Which steps to present feature importances for
#steps = [1,12,24,36]
fi_steps = [1,3,6,12,36]
#steps = [1,6,36]
#fi_steps = [1,6,36]

# Specifying partitions

calib_partitioner_dict = {"train":(121,396),"predict":(397,444)}
test_partitioner_dict = {"train":(121,444),"predict":(445,492)}
future_partitioner_dict = {"train":(121,492),"predict":(493,504)}
calib_partitioner =  views_runs.DataPartitioner({"calib":calib_partitioner_dict})
test_partitioner =  views_runs.DataPartitioner({"test":test_partitioner_dict})
future_partitioner =  views_runs.DataPartitioner({"future":future_partitioner_dict})

# Specifying paths - note these have to be set to conform to individual setups!

Mydropbox = f'/Users/{username}/Dropbox (ViEWS)/ViEWS/'
localgitpath = f'/Users/{username}/views3/'
notebookpath = os.getcwd()
markovpath = str(Path(notebookpath).parent.absolute())+'/Tools/markov/'

if WriteToOverleaf:
    if EndOfHistory==508:
        overleafpath = f'/Users/{username}/Dropbox (ViEWS)/Apps/Overleaf/ViEWS_Presentations_2021/Figures/Forecasts/Apr2022/'
    if EndOfHistory==509:
        overleafpath = f'/Users/{username}/Dropbox (ViEWS)/Apps/Overleaf/ViEWS_Presentations_2021/Figures/Forecasts/Apr2022/'
    
    print('Overleaf path set to',overleafpath)

print('Dropbox path set to',Mydropbox)
print('Markov code path set to',markovpath)


Dropbox path set to /Users/havardhegre1/Dropbox (ViEWS)/ViEWS/
Markov code path set to /Users/havardhegre1/views3/viewsforecasting/Tools/markov/


# Retrieve models and predictions

In [4]:
from ModelDefinitions import DefineEnsembleModels

ModelList = DefineEnsembleModels(level)
    
i = 0
for model in ModelList:
    print(i, model['modelname'], model['data_train'])
    i = i + 1

0 fatalities002_baseline_rf baseline002
1 fatalities002_conflicthistory_rf conflict_ln
2 fatalities002_conflicthistory_gbm conflict_ln
3 fatalities002_conflicthistory_hurdle_lgb conflict_ln
4 fatalities002_conflicthistory_long_xgb conflictlong_ln
5 fatalities002_vdem_hurdle_xgb vdem_short
6 fatalities002_wdi_rf wdi_short
7 fatalities002_topics_rf topics_002
8 fatalities002_topics_xgb topics_002
9 fatalities002_topics_hurdle_lgb topics_002
10 fatalities002_joint_broad_rf joint_broad
11 fatalities002_joint_broad_hurdle_rf joint_broad
12 fatalities002_joint_narrow_xgb joint_narrow
13 fatalities002_joint_narrow_hurdle_xgb joint_narrow
14 fatalities002_joint_narrow_hurdle_lgb joint_narrow
15 fatalities002_all_pca3_xgb all_features
16 fatalities002_aquastat_rf aquastat
17 fatalities002_faostat_rf faostat
18 fatalities002_faoprices_rf faoprices
19 fatalities002_imfweo_rf imfweo
20 fat_hh20_Markov_glm joint_narrow
21 fat_hh20_Markov_rf joint_narrow


# Retrieve and calibrate predictions and data

In [None]:
## Running and saving David's models
# Import subprocess to run Rscript
import subprocess

# Fetch and save data (can perhaps be simplified?)
qs = Queryset('hh_20_features','country_month')
qs.fetch().to_parquet(markovpath + 'tmp.parquet')

# Set commands and arguments. R-scripts located in 'Markov'-folder
command ='Rscript'
#path2script ='../Tools/markov/omm_ranger_hh20_fcdo_py.R'
path2script = markovpath + 'omm_ranger_hh20_fcdo_py.R'

cmd = [command, path2script]
data_path = markovpath + 'tmp.parquet'
save_path = Mydropbox + 'Projects/PredictingFatalities/Predictions/cm/preds/'
args = [str(EndOfHistory),data_path,save_path,]

# Run subprocess. Saves the predictions as csv-files to the save_path location with prefix vmm_[estimator]_hh20_[EndOfHistory]
subprocess.call(cmd+args)


 .    Installing ranger... 


also installing the dependencies ‘lattice’, ‘Rcpp’, ‘Matrix’, ‘RcppEigen’

trying URL 'https://cloud.r-project.org/src/contrib/lattice_0.20-45.tar.gz'
Content type 'application/x-gzip' length 399470 bytes (390 KB)
downloaded 390 KB

trying URL 'https://cloud.r-project.org/src/contrib/Rcpp_1.0.9.tar.gz'
Content type 'application/x-gzip' length 2957812 bytes (2.8 MB)
downloaded 2.8 MB

trying URL 'https://cloud.r-project.org/src/contrib/Matrix_1.4-1.tar.gz'
Content type 'application/x-gzip' length 2862737 bytes (2.7 MB)
downloaded 2.7 MB

trying URL 'https://cloud.r-project.org/src/contrib/RcppEigen_0.3.3.9.2.tar.gz'
Content type 'application/x-gzip' length 1476004 bytes (1.4 MB)
downloaded 1.4 MB

trying URL 'https://cloud.r-project.org/src/contrib/ranger_0.14.1.tar.gz'
Content type 'application/x-gzip' length 197762 bytes (193 KB)
downloaded 193 KB

* installing *source* package ‘lattice’ ...
** package ‘lattice’ successfully unpacked and MD5 sums checked
** using staged installation
*

arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG   -D_FORTIFY_SOURCE=2 -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -mmacosx-version-min=11.0 -I/Users/havardhegre1/mambaforge/envs/viewser/include   -fPIC  -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1659993955429/work=/usr/local/src/conda/r-base-4.2.1 -fdebug-prefix-map=/Users/havardhegre1/mambaforge/envs/viewser=/usr/local/src/conda-prefix  -c init.c -o init.o
arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG   -D_FORTIFY_SOURCE=2 -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -mmacosx-version-min=11.0 -I/Users/havardhegre1/mambaforge/envs/viewser/include   -fPIC  -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/havardhegre1/ma

installing to /Users/havardhegre1/mambaforge/envs/viewser/lib/R/library/00LOCK-lattice/00new/lattice/libs
** R
** data
*** moving datasets to lazyload DB
** demo
** inst
** byte-compile and prepare package for lazy loading
** help
*** installing help indices
** building package indices
** testing if installed package can be loaded from temporary location
** checking absolute paths in shared objects and dynamic libraries
** testing if installed package can be loaded from final location
** testing if installed package keeps a record of temporary installation path
* DONE (lattice)
* installing *source* package ‘Rcpp’ ...
** package ‘Rcpp’ successfully unpacked and MD5 sums checked
** using staged installation
** libs


arm64-apple-darwin20.0.0-clang++ -std=gnu++14 -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -I../inst/include/  -D_FORTIFY_SOURCE=2 -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -mmacosx-version-min=11.0 -I/Users/havardhegre1/mambaforge/envs/viewser/include   -fPIC  -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -stdlib=libc++ -fvisibility-inlines-hidden -fmessage-length=0 -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1659993955429/work=/usr/local/src/conda/r-base-4.2.1 -fdebug-prefix-map=/Users/havardhegre1/mambaforge/envs/viewser=/usr/local/src/conda-prefix  -c api.cpp -o api.o
arm64-apple-darwin20.0.0-clang++ -std=gnu++14 -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -I../inst/include/  -D_FORTIFY_SOURCE=2 -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -mmacosx-version-min=11.0 -I/Users/havardhegre1/mambafor

installing to /Users/havardhegre1/mambaforge/envs/viewser/lib/R/library/00LOCK-Rcpp/00new/Rcpp/libs
** R
** inst
** byte-compile and prepare package for lazy loading


code for methods in class “C++OverloadedMethods” was not checked for suspicious field assignments (recommended package ‘codetools’ not available?)
code for methods in class “RcppClass” was not checked for suspicious field assignments (recommended package ‘codetools’ not available?)
code for methods in class “RcppClass” was not checked for suspicious field assignments (recommended package ‘codetools’ not available?)


** help
*** installing help indices
** building package indices
** installing vignettes
** testing if installed package can be loaded from temporary location
** checking absolute paths in shared objects and dynamic libraries
** testing if installed package can be loaded from final location
** testing if installed package keeps a record of temporary installation path
* DONE (Rcpp)
* installing *source* package ‘Matrix’ ...
** package ‘Matrix’ successfully unpacked and MD5 sums checked
** using staged installation
** libs


arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -DNTIMER -I./SuiteSparse_config -DUSE_FC_LEN_T  -D_FORTIFY_SOURCE=2 -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -mmacosx-version-min=11.0 -I/Users/havardhegre1/mambaforge/envs/viewser/include   -fPIC  -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1659993955429/work=/usr/local/src/conda/r-base-4.2.1 -fdebug-prefix-map=/Users/havardhegre1/mambaforge/envs/viewser=/usr/local/src/conda-prefix  -c CHMfactor.c -o CHMfactor.o
arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -DNTIMER -I./SuiteSparse_config -DUSE_FC_LEN_T  -D_FORTIFY_SOURCE=2 -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -mmacosx-version-min=11.0 -I/Users/havardhegre1/mambaforge/envs/viewser/include   

arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -DNTIMER -I./SuiteSparse_config -DUSE_FC_LEN_T  -D_FORTIFY_SOURCE=2 -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -mmacosx-version-min=11.0 -I/Users/havardhegre1/mambaforge/envs/viewser/include   -fPIC  -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1659993955429/work=/usr/local/src/conda/r-base-4.2.1 -fdebug-prefix-map=/Users/havardhegre1/mambaforge/envs/viewser=/usr/local/src/conda-prefix  -c dpoMatrix.c -o dpoMatrix.o
arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -DNTIMER -I./SuiteSparse_config -DUSE_FC_LEN_T  -D_FORTIFY_SOURCE=2 -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -mmacosx-version-min=11.0 -I/Users/havardhegre1/mambaforge/envs/viewser/include   

arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -DNTIMER -I./SuiteSparse_config -DUSE_FC_LEN_T  -D_FORTIFY_SOURCE=2 -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -mmacosx-version-min=11.0 -I/Users/havardhegre1/mambaforge/envs/viewser/include   -fPIC  -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1659993955429/work=/usr/local/src/conda/r-base-4.2.1 -fdebug-prefix-map=/Users/havardhegre1/mambaforge/envs/viewser=/usr/local/src/conda-prefix  -c sparseQR.c -o sparseQR.o
arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -DNTIMER -I./SuiteSparse_config -DUSE_FC_LEN_T  -D_FORTIFY_SOURCE=2 -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -mmacosx-version-min=11.0 -I/Users/havardhegre1/mambaforge/envs/viewser/include   -f

                xlnz = MIN (xlnz, Size_max) ;
                       ~~~~~~~~~~~^~~~~~~~~
../Include/cholmod_internal.h:207:19: note: expanded from macro 'Size_max'
#define Size_max ((size_t) (-1))
                  ^~~~~~~~~~~~~
../Include/cholmod_internal.h:98:27: note: expanded from macro 'MIN'
#define MIN(a,b) (((a) < (b)) ? (a) : (b))
                       ~  ^
                xlnz = MIN (xlnz, Size_max) ;
                     ~            ^~~~~~~~
../Include/cholmod_internal.h:207:19: note: expanded from macro 'Size_max'
#define Size_max ((size_t) (-1))
                  ^~~~~~~~~~~~~
../Include/cholmod_internal.h:98:40: note: expanded from macro 'MIN'
#define MIN(a,b) (((a) < (b)) ? (a) : (b))
                                       ^
            xlnz = MIN (xlnz, Size_max) ;
                   ~~~~~~~~~~~^~~~~~~~~
../Include/cholmod_internal.h:207:19: note: expanded from macro 'Size_max'
#define Size_max ((size_t) (-1))
                  ^~~~~~~~~~~~~
../Include/cholmod_interna

arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -I../../AMD/Include -I../../AMD/Source -I../../COLAMD/Include -I../Include -I../../SuiteSparse_config -DNPARTITION -DNPRINT  -D_FORTIFY_SOURCE=2 -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -mmacosx-version-min=11.0 -I/Users/havardhegre1/mambaforge/envs/viewser/include   -fPIC  -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1659993955429/work=/usr/local/src/conda/r-base-4.2.1 -fdebug-prefix-map=/Users/havardhegre1/mambaforge/envs/viewser=/usr/local/src/conda-prefix  -c ../Core/cholmod_common.c -o cholmod_common.o
arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -I../../AMD/Include -I../../AMD/Source -I../../COLAMD/Include -I../Include -I../../SuiteSparse_config -DNPARTITION -

        if (xneed > Size_max ||
                  ~ ^~~~~~~~
../Include/cholmod_internal.h:207:19: note: expanded from macro 'Size_max'
#define Size_max ((size_t) (-1))
                  ^~~~~~~~~~~~~


arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -I../../AMD/Include -I../../AMD/Source -I../../COLAMD/Include -I../Include -I../../SuiteSparse_config -DNPARTITION -DNPRINT  -D_FORTIFY_SOURCE=2 -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -mmacosx-version-min=11.0 -I/Users/havardhegre1/mambaforge/envs/viewser/include   -fPIC  -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1659993955429/work=/usr/local/src/conda/r-base-4.2.1 -fdebug-prefix-map=/Users/havardhegre1/mambaforge/envs/viewser=/usr/local/src/conda-prefix  -c ../Core/cholmod_sparse.c -o cholmod_sparse.o
arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -I../../AMD/Include -I../../AMD/Source -I../../COLAMD/Include -I../Include -I../../SuiteSparse_config -DNPARTITION -

arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -I../../AMD/Include -I../../AMD/Source -I../../COLAMD/Include -I../Include -I../../SuiteSparse_config -DNPARTITION -DNPRINT  -D_FORTIFY_SOURCE=2 -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -mmacosx-version-min=11.0 -I/Users/havardhegre1/mambaforge/envs/viewser/include   -fPIC  -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1659993955429/work=/usr/local/src/conda/r-base-4.2.1 -fdebug-prefix-map=/Users/havardhegre1/mambaforge/envs/viewser=/usr/local/src/conda-prefix  -c ../Cholesky/cholmod_postorder.c -o cholmod_postorder.o
arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -I../../AMD/Include -I../../AMD/Source -I../../COLAMD/Include -I../Include -I../../SuiteSparse_config -DNP

arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -I../../AMD/Include -I../../AMD/Source -I../../COLAMD/Include -I../Include -I../../SuiteSparse_config -DNPARTITION -DNPRINT  -D_FORTIFY_SOURCE=2 -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -mmacosx-version-min=11.0 -I/Users/havardhegre1/mambaforge/envs/viewser/include   -fPIC  -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1659993955429/work=/usr/local/src/conda/r-base-4.2.1 -fdebug-prefix-map=/Users/havardhegre1/mambaforge/envs/viewser=/usr/local/src/conda-prefix  -c ../MatrixOps/cholmod_ssmult.c -o cholmod_ssmult.o
arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -I../../AMD/Include -I../../AMD/Source -I../../COLAMD/Include -I../Include -I../../SuiteSparse_config -DNPARTIT

arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -I../../AMD/Include -I../../AMD/Source -I../../COLAMD/Include -I../Include -I../../SuiteSparse_config -DNPARTITION -DNPRINT  -D_FORTIFY_SOURCE=2 -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -mmacosx-version-min=11.0 -I/Users/havardhegre1/mambaforge/envs/viewser/include   -fPIC  -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1659993955429/work=/usr/local/src/conda/r-base-4.2.1 -fdebug-prefix-map=/Users/havardhegre1/mambaforge/envs/viewser=/usr/local/src/conda-prefix  -DDLONG -c  ../Core/cholmod_change_factor.c -o cholmod_l_change_factor.o


                xlnz = MIN (xlnz, Size_max) ;
                       ~~~~~~~~~~~^~~~~~~~~
../Include/cholmod_internal.h:207:19: note: expanded from macro 'Size_max'
#define Size_max ((size_t) (-1))
                  ^~~~~~~~~~~~~
../Include/cholmod_internal.h:98:27: note: expanded from macro 'MIN'
#define MIN(a,b) (((a) < (b)) ? (a) : (b))
                       ~  ^
                xlnz = MIN (xlnz, Size_max) ;
                     ~            ^~~~~~~~
../Include/cholmod_internal.h:207:19: note: expanded from macro 'Size_max'
#define Size_max ((size_t) (-1))
                  ^~~~~~~~~~~~~
../Include/cholmod_internal.h:98:40: note: expanded from macro 'MIN'
#define MIN(a,b) (((a) < (b)) ? (a) : (b))
                                       ^
            xlnz = MIN (xlnz, Size_max) ;
                   ~~~~~~~~~~~^~~~~~~~~
../Include/cholmod_internal.h:207:19: note: expanded from macro 'Size_max'
#define Size_max ((size_t) (-1))
                  ^~~~~~~~~~~~~
../Include/cholmod_interna

arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -I../../AMD/Include -I../../AMD/Source -I../../COLAMD/Include -I../Include -I../../SuiteSparse_config -DNPARTITION -DNPRINT  -D_FORTIFY_SOURCE=2 -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -mmacosx-version-min=11.0 -I/Users/havardhegre1/mambaforge/envs/viewser/include   -fPIC  -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1659993955429/work=/usr/local/src/conda/r-base-4.2.1 -fdebug-prefix-map=/Users/havardhegre1/mambaforge/envs/viewser=/usr/local/src/conda-prefix  -DDLONG -c  ../Core/cholmod_common.c -o cholmod_l_common.o
arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -I../../AMD/Include -I../../AMD/Source -I../../COLAMD/Include -I../Include -I../../SuiteSparse_config -DN

        if (xneed > Size_max ||
                  ~ ^~~~~~~~
../Include/cholmod_internal.h:207:19: note: expanded from macro 'Size_max'
#define Size_max ((size_t) (-1))
                  ^~~~~~~~~~~~~


arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -I../../AMD/Include -I../../AMD/Source -I../../COLAMD/Include -I../Include -I../../SuiteSparse_config -DNPARTITION -DNPRINT  -D_FORTIFY_SOURCE=2 -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -mmacosx-version-min=11.0 -I/Users/havardhegre1/mambaforge/envs/viewser/include   -fPIC  -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1659993955429/work=/usr/local/src/conda/r-base-4.2.1 -fdebug-prefix-map=/Users/havardhegre1/mambaforge/envs/viewser=/usr/local/src/conda-prefix  -DDLONG -c  ../Core/cholmod_transpose.c -o cholmod_l_transpose.o
arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -I../../AMD/Include -I../../AMD/Source -I../../COLAMD/Include -I../Include -I../../SuiteSparse_conf

            if (nitems < 2 || nitems > 4 || l1 > Int_max || l2 > Int_max)
                                                               ~ ^~~~~~~
../Include/cholmod_internal.h:222:17: note: expanded from macro 'Int_max'
#define Int_max SuiteSparse_long_max
                ^~~~~~~~~~~~~~~~~~~~
../../SuiteSparse_config/SuiteSparse_config.h:66:30: note: expanded from macro 'SuiteSparse_long_max'
#define SuiteSparse_long_max LONG_MAX
                             ^~~~~~~~
/Users/havardhegre1/mambaforge/envs/viewser/lib/clang/14.0.4/include/limits.h:47:19: note: expanded from macro 'LONG_MAX'
#define LONG_MAX  __LONG_MAX__
                  ^~~~~~~~~~~~
<built-in>:48:22: note: expanded from here
#define __LONG_MAX__ 9223372036854775807L
                     ^~~~~~~~~~~~~~~~~~~~
            if (nitems < 2 || nitems > 4 || l1 > Int_max || l2 > Int_max)
                                               ~ ^~~~~~~
../Include/cholmod_internal.h:222:17: note: expanded from macro 'Int_max'
#define Int

arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -I../../AMD/Include -I../../AMD/Source -I../../COLAMD/Include -I../Include -I../../SuiteSparse_config -DNPARTITION -DNPRINT  -D_FORTIFY_SOURCE=2 -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -mmacosx-version-min=11.0 -I/Users/havardhegre1/mambaforge/envs/viewser/include   -fPIC  -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1659993955429/work=/usr/local/src/conda/r-base-4.2.1 -fdebug-prefix-map=/Users/havardhegre1/mambaforge/envs/viewser=/usr/local/src/conda-prefix  -DDLONG -c  ../Cholesky/cholmod_analyze.c -o cholmod_l_analyze.o
arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -I../../AMD/Include -I../../AMD/Source -I../../COLAMD/Include -I../Include -I../../SuiteSparse_conf

arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -I../../AMD/Include -I../../AMD/Source -I../../COLAMD/Include -I../Include -I../../SuiteSparse_config -DNPARTITION -DNPRINT  -D_FORTIFY_SOURCE=2 -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -mmacosx-version-min=11.0 -I/Users/havardhegre1/mambaforge/envs/viewser/include   -fPIC  -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1659993955429/work=/usr/local/src/conda/r-base-4.2.1 -fdebug-prefix-map=/Users/havardhegre1/mambaforge/envs/viewser=/usr/local/src/conda-prefix  -DDLONG -c  ../MatrixOps/cholmod_scale.c -o cholmod_l_scale.o
arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -I../../AMD/Include -I../../AMD/Source -I../../COLAMD/Include -I../Include -I../../SuiteSparse_config 

arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -I../../AMD/Include -I../../AMD/Source -I../../COLAMD/Include -I../Include -I../../SuiteSparse_config -DNPARTITION -DNPRINT  -D_FORTIFY_SOURCE=2 -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -mmacosx-version-min=11.0 -I/Users/havardhegre1/mambaforge/envs/viewser/include   -fPIC  -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1659993955429/work=/usr/local/src/conda/r-base-4.2.1 -fdebug-prefix-map=/Users/havardhegre1/mambaforge/envs/viewser=/usr/local/src/conda-prefix  -DDLONG -c  ../Supernodal/cholmod_super_symbolic.c -o cholmod_l_super_symbolic.o
arm64-apple-darwin20.0.0-ar -rucs ../../CHOLMOD.a cholmod_aat.o cholmod_add.o cholmod_band.o cholmod_change_factor.o cholmod_common.o cholmod_complex.o cholmod_copy.o cholmod_dense.o ch

                        (xtotsize < Int_max / sizeof (double)) ;
                                  ~ ~~~~~~~~^~~~~~~~~~~~~~~~~
        if (ssize < 0 ||(find_xsize && xxsize > Int_max))
                                              ~ ^~~~~~~
../Include/cholmod_internal.h:222:17: note: expanded from macro 'Int_max'
#define Int_max SuiteSparse_long_max
                ^~~~~~~~~~~~~~~~~~~~
../../SuiteSparse_config/SuiteSparse_config.h:66:30: note: expanded from macro 'SuiteSparse_long_max'
#define SuiteSparse_long_max LONG_MAX
                             ^~~~~~~~
/Users/havardhegre1/mambaforge/envs/viewser/lib/clang/14.0.4/include/limits.h:47:19: note: expanded from macro 'LONG_MAX'
#define LONG_MAX  __LONG_MAX__
                  ^~~~~~~~~~~~
<built-in>:48:22: note: expanded from here
#define __LONG_MAX__ 9223372036854775807L
                     ^~~~~~~~~~~~~~~~~~~~


make[2]: Leaving directory '/private/var/folders/bz/1cx7kmbj2919xmdqxy83mbs80000gp/T/Rtmp6Y8YVd/R.INSTALL14c9c97fd9ee/Matrix/src/CHOLMOD/Lib'
make[1]: Leaving directory '/private/var/folders/bz/1cx7kmbj2919xmdqxy83mbs80000gp/T/Rtmp6Y8YVd/R.INSTALL14c9c97fd9ee/Matrix/src/CHOLMOD'
make[1]: Entering directory '/private/var/folders/bz/1cx7kmbj2919xmdqxy83mbs80000gp/T/Rtmp6Y8YVd/R.INSTALL14c9c97fd9ee/Matrix/src/COLAMD'
( cd Source ; make -f "/Users/havardhegre1/mambaforge/envs/viewser/lib/R/etc/Makeconf" -f Makefile lib )
make[2]: Entering directory '/private/var/folders/bz/1cx7kmbj2919xmdqxy83mbs80000gp/T/Rtmp6Y8YVd/R.INSTALL14c9c97fd9ee/Matrix/src/COLAMD/Source'
arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -I../Include -I../../SuiteSparse_config  -D_FORTIFY_SOURCE=2 -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -mmacosx-version-min=11.0 -I/Users/havardhegre1/mambaforge/envs/viewser/include   -fPIC  -ftree-vectorize -f

arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -I../Include -I../../SuiteSparse_config  -D_FORTIFY_SOURCE=2 -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -mmacosx-version-min=11.0 -I/Users/havardhegre1/mambaforge/envs/viewser/include   -fPIC  -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1659993955429/work=/usr/local/src/conda/r-base-4.2.1 -fdebug-prefix-map=/Users/havardhegre1/mambaforge/envs/viewser=/usr/local/src/conda-prefix  -I../Include -DDINT -c amd_defaults.c -o amd_i_defaults.o
arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -I../Include -I../../SuiteSparse_config  -D_FORTIFY_SOURCE=2 -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -mmacosx-version-min=11.0 -I/Users/havardhegre1/mambaforge/envs/view

arm64-apple-darwin20.0.0-clang -I"/Users/havardhegre1/mambaforge/envs/viewser/lib/R/include" -DNDEBUG -I../Include -I../../SuiteSparse_config  -D_FORTIFY_SOURCE=2 -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -mmacosx-version-min=11.0 -I/Users/havardhegre1/mambaforge/envs/viewser/include   -fPIC  -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/havardhegre1/mambaforge/envs/viewser/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1659993955429/work=/usr/local/src/conda/r-base-4.2.1 -fdebug-prefix-map=/Users/havardhegre1/mambaforge/envs/viewser=/usr/local/src/conda-prefix  -I../Include -DDLONG -c amd_dump.c -o amd_l_dump.o
arm64-apple-darwin20.0.0-ar -rucs ../../AMD.a amd_i_aat.o amd_l_aat.o amd_i_1.o amd_l_1.o amd_i_2.o amd_l_2.o amd_i_postorder.o amd_l_postorder.o amd_i_post_tree.o amd_l_post_tree.o amd_i_defaults.o amd_l_defaults.o amd_i_order.o amd_l_order.o amd_i_control.o amd_l_control.o amd_i_info.o amd_l_info

installing to /Users/havardhegre1/mambaforge/envs/viewser/lib/R/library/00LOCK-Matrix/00new/Matrix/libs
** R
** data
** inst
** byte-compile and prepare package for lazy loading
** help
*** installing help indices
** building package indices
** installing vignettes
** testing if installed package can be loaded from temporary location
** checking absolute paths in shared objects and dynamic libraries
** testing if installed package can be loaded from final location
** testing if installed package keeps a record of temporary installation path
* DONE (Matrix)
* installing *source* package ‘RcppEigen’ ...
** package ‘RcppEigen’ successfully unpacked and MD5 sums checked
** using staged installation
** libs
In file included from RcppEigen.cpp:22:
In file included from ../inst/include/RcppEigen.h:25:
In file included from ../inst/include/RcppEigenForward.h:30:
In file included from ../inst/include/Eigen/Dense:1:
In file included from ../inst/include/Eigen/Core:374:
../inst/include/Eigen/src/

make: *** [/Users/havardhegre1/mambaforge/envs/viewser/lib/R/etc/Makeconf:177: RcppEigen.o] Error 1
ERROR: compilation failed for package ‘RcppEigen’
* removing ‘/Users/havardhegre1/mambaforge/envs/viewser/lib/R/library/RcppEigen’
ERROR: dependency ‘RcppEigen’ is not available for package ‘ranger’
* removing ‘/Users/havardhegre1/mambaforge/envs/viewser/lib/R/library/ranger’

The downloaded source packages are in
	‘/private/var/folders/bz/1cx7kmbj2919xmdqxy83mbs80000gp/T/Rtmp5oCJob/downloaded_packages’
Updating HTML index of packages in '.Library'
Making 'packages.html' ... done
Loading required package: ranger
also installing the dependencies ‘colorspace’, ‘sys’, ‘bit’, ‘ps’, ‘sass’, ‘cachem’, ‘memoise’, ‘base64enc’, ‘fastmap’, ‘nlme’, ‘farver’, ‘munsell’, ‘rappdirs’, ‘askpass’, ‘bit64’, ‘processx’, ‘highr’, ‘yaml’, ‘xfun’, ‘bslib’, ‘htmltools’, ‘jquerylib’, ‘tinytex’, ‘backports’, ‘ellipsis’, ‘glue’, ‘blob’, ‘lifecycle’, ‘tidyselect’, ‘vctrs’, ‘data.table’, ‘digest’, ‘isoband’, ‘MASS’

downloaded 1.6 MB

trying URL 'https://cloud.r-project.org/src/contrib/MASS_7.3-58.1.tar.gz'
Content type 'application/x-gzip' length 514882 bytes (502 KB)
downloaded 502 KB

trying URL 'https://cloud.r-project.org/src/contrib/mgcv_1.8-40.tar.gz'
Content type 'application/x-gzip' length 1175920 bytes (1.1 MB)
downloaded 1.1 MB

trying URL 'https://cloud.r-project.org/src/contrib/scales_1.2.1.tar.gz'
Content type 'application/x-gzip' length 270609 bytes (264 KB)
downloaded 264 KB

trying URL 'https://cloud.r-project.org/src/contrib/gargle_1.2.1.tar.gz'
Content type 'application/x-gzip' length 312943 bytes (305 KB)
downloaded 305 KB

trying URL 'https://cloud.r-project.org/src/contrib/uuid_1.1-0.tar.gz'
Content type 'application/x-gzip' length 77959 bytes (76 KB)
downloaded 76 KB

trying URL 'https://cloud.r-project.org/src/contrib/cellranger_1.1.0.tar.gz'
Content type 'application/x-gzip' length 63857 bytes (62 KB)
downloaded 62 KB

trying URL 'https://cloud.r-project.org/src/contrib/cu

downloaded 2.0 MB

trying URL 'https://cloud.r-project.org/src/contrib/reprex_2.0.2.tar.gz'
Content type 'application/x-gzip' length 1088076 bytes (1.0 MB)
downloaded 1.0 MB

trying URL 'https://cloud.r-project.org/src/contrib/rlang_1.0.5.tar.gz'
Content type 'application/x-gzip' length 741683 bytes (724 KB)
downloaded 724 KB

trying URL 'https://cloud.r-project.org/src/contrib/rvest_1.0.3.tar.gz'
Content type 'application/x-gzip' length 94659 bytes (92 KB)
downloaded 92 KB

trying URL 'https://cloud.r-project.org/src/contrib/stringr_1.4.1.tar.gz'
Content type 'application/x-gzip' length 136607 bytes (133 KB)
downloaded 133 KB

trying URL 'https://cloud.r-project.org/src/contrib/tibble_3.1.8.tar.gz'
Content type 'application/x-gzip' length 672336 bytes (656 KB)
downloaded 656 KB

trying URL 'https://cloud.r-project.org/src/contrib/tidyr_1.2.1.tar.gz'
Content type 'application/x-gzip' length 746056 bytes (728 KB)
downloaded 728 KB

trying URL 'https://cloud.r-project.org/src/contrib/xml

* installing *source* package ‘rappdirs’ ...
** package ‘rappdirs’ successfully unpacked and MD5 sums checked
** using staged installation
** libs
installing to /Users/havardhegre1/mambaforge/envs/viewser/lib/R/library/00LOCK-rappdirs/00new/rappdirs/libs
** R
** byte-compile and prepare package for lazy loading
** help
*** installing help indices
** building package indices
** testing if installed package can be loaded from temporary location
** checking absolute paths in shared objects and dynamic libraries
** testing if installed package can be loaded from final location
** testing if installed package keeps a record of temporary installation path
* DONE (rappdirs)
* installing *source* package ‘yaml’ ...
** package ‘yaml’ successfully unpacked and MD5 sums checked
** using staged installation
** libs
            result = emit_factor(emitter, event, s_obj, tag, implicit_tag);
                                                        ^~~
            result = emit_string(emitter, event, 

In [None]:
# Retrieve David's models from dropbox and store in prediction storage
path = Mydropbox + 'Projects/PredictingFatalities/Predictions/cm/preds/'

DRList = [
    {
        'modelname': 'fat_hh20_Markov_glm',
        'filename': path + 'vmm_glm_hh20_' + str(EndOfHistory) + '.csv'
    },
    
    {
        'modelname': 'fat_hh20_Markov_rf',
        'filename': path + 'vmm_rf_hh20_' + str(EndOfHistory) + '.csv'
    }
]
    
for model in DRList:
    df_future = pd.read_csv(model['filename'],index_col=['month_id','country_id'])
    df_future['ln_ged_sb_dep'] = np.nan # Empty dependent variable column for consistency/required by prediction storage function
    stored_modelname = level + '_' + model['modelname'] + '_f' + str(EndOfHistory)
    df_future.forecasts.set_run(dev_id)
    df_future.forecasts.to_store(name=stored_modelname, overwrite=True)    

In [None]:
# Retrieving the predictions for calibration and test partitions
# The ModelList contains the predictions organized by model

ModelList = RetrieveStoredPredictions(ModelList, steps, EndOfHistory, dev_id, level, get_future)

ModelList = CalibratePredictions(ModelList, EndOfHistory, steps)

In [None]:
# Run querysets and postprocessing (e.g. PCA) to obtain data for future prediction
qslist = ReturnQsList(level)
from FetchData import fetch_cm_data_from_model_def

Datasets=fetch_cm_data_from_model_def(qslist)

In [None]:
# EndOfHistory can be reset here to facilitate rerunning several months without rereading input data
# Remove '#' and reset
#EndOfHistory = 506

In [None]:


from views_runs import Storage, StepshiftedModels
from views_partitioning.data_partitioner import DataPartitioner
from viewser import Queryset, Column
from views_runs import operations
from views_runs.run_result import RunResult

from pygam import LogisticGAM, LinearGAM, s, te

RewritePredictions = False # Set this to True to rewrite predictions even if they exist

def RetrainAndPredict(modelname):
    force_retrain = False
    modelstore = storage.Storage()
    # Predictions for true future
    ct = datetime.now()
    print('Future', ct)
    modelstore = storage.Storage()
    model['RunResult_future']  = RunResult.retrain_or_retrieve(
            retrain            = force_retrain,
            store              = modelstore,
            partitioner        = DataPartitioner({"test":future_partitioner_dict}),
            stepshifted_models = StepshiftedModels(model['algorithm'], steps, model['depvar']),
            dataset            = RetrieveFromList(Datasets,model['data_train']),
            queryset_name      = model['queryset'],
            partition_name     = "test",
            timespan_name      = "train",
            storage_name       = model['modelname'] + '_future',
            author_name        = "HH",
    )       
    predictions_future = model['RunResult_future'].run.future_point_predict(EndOfHistory,model['RunResult_future'].data)
    return predictions_future



i = 0
print('Computing predictions, production run ' + prod_id + ', development run ' + dev_id)
for model in ModelList:

    # Loop that checks whether (1) this a model trained outside the main system, 
    # (2) retrieves the prediction if it exists in prediction storage,
    # (3) if not checks whether the trained model exists, retrains if not, 
    # Then calibrates the predictions and stores them if they have not been stored before for this run.
    # To do: set the data_preprocessing to the function in the model dictionary
    
    model['predstorename_ncal'] = level +  '_' + model['modelname'] + '_noncalibrated' + '_f' + str(EndOfHistory)
    model['predstorename_cal'] = level +  '_' + model['modelname'] + '_calibrated' + '_f' + str(EndOfHistory)

    
    if 'Markov' not in model['modelname']: # Only Markov models are currently exceptions
        print(i, model['modelname'])

        ct = datetime.now()
        print('Trying to retrieve non-calibrated predictions', ct)
        if RewritePredictions:
            model['future_df_noncalibrated'] = RetrainAndPredict(model['predstorename_ncal'])
        else:
            try:
                model['future_df_noncalibrated'] = pd.DataFrame.forecasts.read_store(run=run_id, name=model['predstorename_ncal'])
                print('Predictions for ', model['predstorename_ncal'], ', run', run_id, 'exist, retrieving from prediction storage')

            except KeyError:
                print(model['predstorename_ncal'], ', run', run_id, 'does not exist, predicting')
                model['future_df_noncalibrated'] = RetrainAndPredict(model['predstorename_ncal'])

        # Calibrating and storing   
        # Storing non-calibrated
        
        model['future_df_noncalibrated'].forecasts.set_run(run_id)
        model['future_df_noncalibrated'].forecasts.to_store(name=model['predstorename_ncal'], overwrite=True)   
        print('Calibrating')
        model['future_df_calibrated'] = model['future_df_noncalibrated'].copy()
        for step in steps:
            thismonth = EndOfHistory + step
            
            model['future_df_calibrated'].loc[thismonth,'step_combined'] = pd.DataFrame(model['calibration_gams'][step-1]['calibration_GAM'].predict(model['future_df_noncalibrated'].loc[thismonth])).values
         # Storing calibrated
        model['future_df_calibrated'].forecasts.set_run(run_id)
        model['future_df_calibrated'].forecasts.to_store(name=model['predstorename_cal'], overwrite=True)   
            
    else: # If one of David's Markov models
        print(i, model['modelname'])
            
        model['predstorename_noncalibrated'] = level +  '_' + model['modelname'] + '_noncalibrated' + '_f' + str(EndOfHistory)
        print(model['predstorename_noncalibrated'], ', run', run_id, 'is being retrieved from dropbox')
        path = Mydropbox + 'Projects/PredictingFatalities/Predictions/cm/preds/'

        if model['modelname'] == 'fat_hh20_Markov_glm':
            DR_filename = path + 'vmm_glm_hh20_' + str(EndOfHistory) + '.csv'
            model['future_df_calibrated'] = pd.read_csv(DR_filename,index_col=['month_id','country_id'])
        if model['modelname'] == 'fat_hh20_Markov_rf':
            DR_filename = path + 'vmm_rf_hh20_' + str(EndOfHistory) + '.csv'
            model['future_df_calibrated'] = pd.read_csv(DR_filename,index_col=['month_id','country_id'])
            
        model['predstorename_cal'] = level +  '_' + model['modelname'] + '_calibrated' + '_f' + str(EndOfHistory)

        model['future_df_calibrated'].forecasts.set_run(run_id)
        model['future_df_calibrated'].forecasts.to_store(name=model['predstorename_cal'], overwrite=True)   


    i = i + 1

print('All done')
        
        

In [None]:
i=0
for ds in Datasets:
    print(i,ds['Name'])
    i = i + 1

In [None]:
df = Datasets[8]['df']
df.loc[508]['general_efficiency_t48'].describe()

In [None]:
EnsembleList = [] # Separate list of dictionaries for ensembles!

Ensemble = {
    'modelname':            'genetic_ensemble',
    'algorithm':            [],
    'depvar':               'ln_ged_sb_dep',
    'data_train':           [],
    'Algorithm_text':       '',
    'calibration_gams':     [],
    'future_df_calibrated': [],
}
EnsembleList.append(Ensemble)


In [None]:
# Collecting in one df, one column per model
ConstituentModels_df = pd.DataFrame(ModelList[0]['future_df_calibrated']['step_combined'])
ConstituentModels_df.columns = [ModelList[0]['modelname']]
for model in ModelList[1:]:
    ConstituentModels_df[model['modelname']] = pd.DataFrame(model['future_df_calibrated']['step_combined'])


In [None]:
# Retrieve genetic algorithm results
i_weights_df = pd.read_csv('../Intermediates/GeneticWeights.csv')

# Retrieve ensemble predictions for test partition to create categorical predictions

In [None]:
stored_modelname_test = level + '_' + 'ensemble_genetic' + '_test'

ensemble_test_df = pd.DataFrame.forecasts.read_store(stored_modelname_test, run=run_id)
ensemble_test_df.replace([np.inf, -np.inf], 0, inplace=True)  

ensemble_test_df.head()

In [None]:
# Generate dichotomous version of dependent variable
ensemble_test_df['ged_gte_25'] = ensemble_test_df['ln_ged_sb_dep'].apply(lambda x: 1 if x >= np.log1p(25) else 0)
# Generate multiclass version for uncertainty estimation
def ged_categorical(x):
    if x < np.log1p(0.5):
        return 0
    elif x < np.log1p(10):
        return 1
    elif x < np.log1p(100):
        return 2
    elif x < np.log1p(1000):
        return 3
    else :
        return 4

ensemble_test_df['ged_multi'] = ensemble_test_df['ln_ged_sb_dep'].apply(ged_categorical)

ensemble_test_df.describe()

In [None]:
plt.scatter(ensemble_test_df['ln_ged_sb_dep'],ensemble_test_df['ged_multi'])

In [None]:
# Train model to transform predictions from  fatalities to (1) dichotomous and (2) multiclass
from sklearn.linear_model import LogisticRegression
from sklearn.calibration import CalibratedClassifierCV
dichotomous_classifiers = []
multi_classifiers = []
for step in steps:
    X = np.array(ensemble_test_df[f'step_pred_{step}'])
    X = X.reshape(-1,1)
    # Dichotomous
    y_dich = np.array(ensemble_test_df['ged_gte_25']).reshape(-1, 1)
    dich_clf = LogisticRegression(random_state=0).fit(X, y_dich)
    p_dich = dich_clf.predict_proba(X)
    ensemble_test_df[f'dich_step_{step}_logit'] = p_dich[:,1].ravel()
    # Calibrated
    calibrated_dich_clf = CalibratedClassifierCV(base_estimator=dich_clf, cv=3)
    calibrated_dich_clf.fit(X, y_dich)
    p_dich_cal = calibrated_dich_clf.predict_proba(X)
    dichotomous_classifiers.append(calibrated_dich_clf)
    ensemble_test_df[f'dich_cal_step_{step}_logit'] = p_dich_cal[:,1].ravel()
    # Multiclass
    y_multi = np.array(ensemble_test_df['ged_multi']).reshape(-1, 1)
    multi_clf = LogisticRegression(random_state=0).fit(X, y_multi)
    multi_classifiers.append(multi_clf)
    p_multi = multi_clf.predict_proba(X)
    for cls in [0,1,2,3,4]:
        ensemble_test_df[f'multi_{cls}_step_{step}_logit'] = p_multi[:,cls].ravel()

ensemble_test_df[['dich_step_3_logit','dich_cal_step_3_logit']].describe()

In [None]:
plt.scatter(ensemble_test_df['dich_step_3_logit'],ensemble_test_df['dich_cal_step_3_logit'])

# Calculating and storing ensemble future predictions

In [None]:
# Setting up a placeholder df for ensemble predictions
EnsembleList[0]['future_df_calibrated'] = ModelList[0]['future_df_calibrated'].copy() # Copy from baseline
EnsembleList[0]['future_df_dichotomous'] = ModelList[0]['future_df_calibrated'].copy() # Copy from baseline

ConstituentModels_df_w = ConstituentModels_df.copy()

for step in steps:
    month = EndOfHistory + step
    weightcol = 'step_pred_' + str(step)
    weights = np.array(pd.DataFrame(i_weights_df[weightcol]))
    EnsembleList[0]['future_df_calibrated'].loc[month] = ConstituentModels_df_w.loc[month].dot(weights).values
    x_d = np.array(EnsembleList[0]['future_df_calibrated'].loc[month]).reshape(-1,1)
    pred_step = dichotomous_classifiers[step-1].predict_proba(x_d)
    EnsembleList[0]['future_df_dichotomous']['step_combined'].loc[month] = pred_step[:,1]

In [None]:
# Storing the ensemble future predictions
predstore_future = level +  '_' + EnsembleList[0]['modelname'] + '_f' + str(EndOfHistory)
EnsembleList[0]['future_df_calibrated'].forecasts.set_run(run_id)
EnsembleList[0]['future_df_calibrated'].forecasts.to_store(name=predstore_future, overwrite = True) 
predstore_future_dich = level +  '_' + EnsembleList[0]['modelname'] + '_dich_f' + str(EndOfHistory)
EnsembleList[0]['future_df_dichotomous'].forecasts.set_run(run_id)
EnsembleList[0]['future_df_dichotomous'].forecasts.to_store(name=predstore_future_dich, overwrite = True) 


In [None]:
ViewsMetadata().with_name('genetic').fetch()

# Mapping future predictions [cells under this heading will be deleted when present_results.ipynb is ready]

In [1]:
from matplotlib import pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import contextily as ctx

from views_dataviz import color
from views_dataviz.map import utils
from views_dataviz.map.presets import ViewsMap

import sqlalchemy as sa
#from ingester3.config import source_db_path
#from ingester3.Country import Country
#from ingester3.extensions import *
#from ingester3.ViewsMonth import ViewsMonth

import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import geopandas as gpd
import pandas as pd
import numpy as np

class Mapper2:
    """
    `Map` takes basic properties and allows the user to consecutively add
    layers to the Map object. This makes it possible to prepare mapping
    "presets" at any level of layeredness that can be built on further.
    
    Mapper2 allows for the customizable addition of scaling to the map. 
    -re-add the code for labels later when i can test it

    Attributes
    ----------
    width: Integer value for width in inches.
    height: Integer value for height in inches.
    bbox: List for the bbox per [xmin, xmax, ymin, ymax].
    frame_on: Bool for whether to draw a frame around the map.
    title: Optional default title at matplotlib's default size.
    figure: Optional tuple of (fig, size) to use if you want to plot into an
        already existing fig and ax, rather than making a new one.
    """

    def __init__(
        self,
        width,
        height,
        bbox=None,
        cmap=None,
        frame_on=True,
        title="",  # Default title without customization. (?)
        figure=None,
    ):
        self.width = width
        self.height = height
        self.bbox = bbox  # xmin, xmax, ymin, ymax
        self.cmap = cmap
        if figure is None:
            self.fig, self.ax = plt.subplots(figsize=(self.width, self.height))
        else:
            self.fig, self.ax = figure
        self.texts = []
        self.ax.set_title(title)

        if frame_on:  # Remove axis ticks only.
            self.ax.tick_params(
                top=False,
                bottom=False,
                left=False,
                right=False,
                labelleft=False,
                labelbottom=False,
            )
        else:
            self.ax.axis("off")

        if bbox is not None:
            self.ax.set_xlim((self.bbox[0], self.bbox[1]))
            self.ax.set_ylim((self.bbox[2], self.bbox[3]))

    def add_layer(self, gdf, map_scale=False, map_dictionary=False, cmap=None, inform_colorbar=False, **kwargs):
        """Add a geopandas plot to a new layer.

        Parameters
        ----------
        gdf: Geopandas GeoDataFrame to plot.
        cmap: Optional matplotlib colormap object or string reference
            (e.g. "viridis").
        inform_colorbar: Set or overwrite colorbar with the current layer.
            Not applicable when `color` is supplied in the kwargs.
        map_scale: set a manual scale for the map. If missing defaults to the Remco procedure. 
        map_dictionary: set manual labels for the map. If missing defaults to the default labels.
        **kwargs: Geopandas `.plot` keyword arguments.
        """
        if "color" in kwargs:
            colormap = None
        else:
            colormap = self.cmap if cmap is None else cmap
            if inform_colorbar and "column" in kwargs:
                if hasattr(self, "cax"):
                    self.cax.remove()
                if "vmin" not in kwargs:
                    self.vmin = gdf[kwargs["column"]].min()
                else:
                    self.vmin = kwargs["vmin"]
                if "vmax" not in kwargs:
                    self.vmax = gdf[kwargs["column"]].max()
                else:
                    self.vmax = kwargs["vmax"]
        
        try: Mapper2.add_colorbar(self, colormap, min(map_scale), max(map_scale))
        except: Mapper2.add_colorbar(self, colormap, self.vmin, self.vmax)
        
        try:
            self.ax = gdf.plot(ax=self.ax, cmap=colormap, vmin=min(map_scale), vmax=max(map_scale), **kwargs)
        except: 
            self.ax = gdf.plot(ax=self.ax, cmap=colormap, **kwargs)

                
        return self
    
    def add_colorbar(
        self,
        cmap,
        vmin,
        vmax,
        location="right",
        size="5%",
        pad=0.1,
        alpha=1,
        labelsize=16,
        tickparams=None,
    ):
        """Add custom colorbar to Map.

        Needed since GeoPandas legend and plot axes do not align, see:
        https://geopandas.readthedocs.io/en/latest/docs/user_guide/mapping.html

        Parameters
        ----------
        cmap: Matplotlib colormap object or string reference (e.g. "viridis").
        vmin: Minimum value of range colorbar.
        vmax: Maximum value of range colorbar.
        location: String for location of colorbar: "top", "bottom", "left"
            or "right".
        size: Size in either string percentage or number of pixels.
        pad: Float for padding between the plot's frame and colorbar.
        alpha: Float for alpha to apply to colorbar.
        labelsize: Integer value for the text size of the ticklabels.
        tickparams: Dictionary containing value-label pairs. For example:
            {0.05: "5%", 0.1: "10%"}
        """
        norm = plt.Normalize(vmin, vmax)
        if isinstance(cmap, str):
            cmap = plt.get_cmap(cmap)
        cmap = color.force_alpha_colormap(cmap=cmap, alpha=alpha)
        scalar_to_rgba = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
        divider = make_axes_locatable(self.ax)
        self.cax = divider.append_axes(location, size, pad)
        self.cax.tick_params(labelsize=labelsize)
        tickvalues = (
            list(tickparams.keys()) if tickparams is not None else None
        )
        self.cbar = plt.colorbar(
            scalar_to_rgba, cax=self.cax, ticks=tickvalues
        )
        if tickparams is not None:
            self.cbar.set_ticklabels(list(tickparams.values()))
        return self
    
    def save(
        self, path, dpi=200, **kwargs
    ):  # Just some defaults to reduce work.
        """Save Map figure to file.
        Parameters
        ----------
        path: String path, e.g. "./example.png".
        dpi: Integer dots per inch. Increase for higher resolution figures.
        **kwargs: Matplotlib `savefig` keyword arguments.
        """
        self.fig.savefig(path, dpi=dpi, bbox_inches="tight", **kwargs)
        plt.close(self.fig)
        
def vid2date(i):
    year=str(1980 + i//12)
    month=str(i%12)
    return year+'/'+month
        
#def vid2date(i):
#    year=str(ViewsMonth(i).year)
#    month=str(ViewsMonth(i).month)
#    return year+'/'+month

#note the zip function occured earlier
standard_scale = [np.log1p(0),np.log1p(3),np.log1p(10), np.log1p(30), np.log1p(100),  np.log1p(300), np.log1p(1000), np.log1p(3000),  np.log1p(10000)]
standard_scale_labels = ['0', '3','10', '30','100', '300', '1000', '3000', '10000']

small_scale=[np.log1p(0),np.log1p(3),np.log1p(10), np.log1p(30), np.log1p(100),  np.log1p(300), np.log1p(1000)]


small_scale_labels = ['0', '3','10', '30','100', '300', '1000']

small_scale_nolabels = ['', '','', '','', '', '']

In [None]:
# Prepare the gdf
gdf_base = gpd.read_parquet('../Tools/geometry/cm_geometry.parquet')
gdf = gdf_base.copy()

In [None]:
# Future prediction maps, predictions, rolling
path = Mydropbox + 'Projects/PredictingFatalities/maps/cm_future/'
stepstoplot=[3,5,8,12,24,36]
#titles = [vid2date(i) for i in stepstoplot + EndOfHistory]


df = EnsembleList[0]['future_df_calibrated'].copy()
gdf2 = gdf_base.copy()
df = df.join(gdf2.set_index("country_id"))
gdf3 = gpd.GeoDataFrame(df, geometry="geom")

In [None]:
for step in stepstoplot:
        month = step + EndOfHistory
        gdf = gdf3.loc[month]
        m=Mapper2(
        width=10,
        height=10,
        frame_on=True,
        title='Ensemble predictions as of ' + vid2date(EndOfHistory+step) + ', ' + str(step) + ' months after last month with data',
        bbox=[-18.5, 64.0, -35.5, 43.0], 
        ).add_layer(
        gdf=gdf,
        map_scale=standard_scale,
        cmap="rainbow",
        edgecolor="black",
        linewidth=0.5,
        column='step_combined', 
        inform_colorbar=True
        )
        m.cbar.set_ticks(standard_scale)
        m.cbar.set_ticklabels(standard_scale_labels)
        if WriteToOverleaf:
            m.save(f'{overleafpath}PredictionMap_cm_ensemble_standard_scale_r{EndOfHistory}_m{month}.png')
#        except:
#            print('Overleaf/dropbox folder not found')
        m.save(f'{path}PredictionMap_cm_ensemble_standard_scale_r{EndOfHistory}_m{month}.png')

## Retrain the surrogate models

In [None]:
Datasets[8]['df'].loc[544]

In [None]:
from cm_surrogatemodels import TrainSurrogateModels
SurrogateModelSteps = [1,3,6,36]
SurrogateModelSteps = steps
EndOfHistory_test = test_partitioner_dict['train'][1] 
Plotpath = Mydropbox + 'Projects/PredictingFatalities/SurrogateModels/'


       
SurrogateModelList = TrainSurrogateModels(data_df = Datasets[8]['df'], 
                                          Ensemble_df = ensemble_test_df, 
                                          EndOfHistory = EndOfHistory_test, 
                                          SurrogateModelSteps = SurrogateModelSteps, 
                                          NumberOfMonths = 48,
                                          Plotpath = Plotpath)

In [None]:
SurrogateModelList

In [None]:
predictors_df = Datasets[8]['df'].loc[EndOfHistory]

EnsembleList[0]['future_df_surrogates'] = EnsembleList[0]['future_df_calibrated'].copy()
# Initialize dataframe to hold surrogate model predictions:
for item in SurrogateModelList:
    if item['Step'] == 1:
        colname = item['Modelname'][item['Modelname'].index(' ') + 1:] # Remove first word (which is a step number)
        EnsembleList[0]['future_df_surrogates'][colname] = np.nan  
# Compute predictions for each step
for step in steps:
    month = EndOfHistory + step
#    print('Step',step,'Month',month)
    for item in SurrogateModelList:
        colname = item['Modelname'][item['Modelname'].index(' ') + 1:] # Remove first word (which is a step number)
        if item['Step']==step:
#            print('colname:',colname,'Step:',item['Step'], item['Columns'])
            EnsembleList[0]['future_df_surrogates'][colname].loc[month] = item['GAM'].predict(predictors_df[item['Columns']])

# Storing the surrogate model future predictions
api_definition = []
for item in SurrogateModelList:
    if item['Step'] == 36:
        colname = item['Modelname'][item['Modelname'].index(' ') + 1:] # Remove first word (which is a step number)
        predstore_future = level +  '_surrogate_' + item['Shortname'] + '_f' + str(EndOfHistory)
        print('Storing surrogate model predictions for model',colname, 'as:',predstore_future)
        predictions_to_store = pd.DataFrame(EnsembleList[0]['future_df_surrogates'][colname])
        predictions_to_store.forecasts.set_run(run_id)
        predictions_to_store.forecasts.to_store(name=predstore_future, overwrite = True) 
        api_item = {
            'Dev_id': dev_id,
            'EndOfHistory': EndOfHistory,
            'Model': colname,
            'Prediction storage colname': predstore_future
        }
        api_definition.append(api_item)

api_definition

In [None]:
import json
with open( '../Intermediates/api_defintion.json', 'w') as api_file:
   json.dump(api_definition,api_file)

In [None]:
for model in SurrogateModelList:
    if model['Step'] == 1:
        print(model['Modelname'])

In [None]:
# Mapping

predictors_df = Datasets[10]['df'].loc[EndOfHistory]
predictors_df_3m = Datasets[10]['df'].loc[EndOfHistory-3]

path = Mydropbox + 'Projects/PredictingFatalities/maps/cm_future/Surrogate/'
surrogate_scale=[np.log1p(0),np.log1p(3),np.log1p(10), np.log1p(30), np.log1p(100), np.log1p(300)]

surrogate_scale_labels = ['', '','', '', '', '']

MapSteps = [1,3,6,12,36]
for model in SurrogateModelList:
    if model['Step'] in MapSteps:
        print(model['Modelname'], model['Columns'])

        df = predictors_df[model['Columns']]
        df[model['Predcolname']] = model['GAM'].predict(predictors_df[model['Columns']])
        gdf2 = gdf_base.copy()
        df = df.join(gdf2.set_index("country_id"))
        gdf3 = gpd.GeoDataFrame(df, geometry="geom")
        Predcolname = model['Predcolname']
        step = model['Step']
        TargetMonth = EndOfHistory+step

        m=Mapper2(
        width=10,
        height=10,
        frame_on=True,
        title='Surrogate model ' + model['Modelname'] + ' predictions as of ' + vid2date(TargetMonth) + ', ' + str(step) + ' months after last month with data',
        bbox=[-18.5, 64.0, -35.5, 43.0], 
        ).add_layer(
        gdf=gdf3,
        map_scale=surrogate_scale,
        cmap="rainbow",
        edgecolor="black",
        linewidth=0.5,
        column=model['Predcolname'], 
        inform_colorbar=True
        )
        m.cbar.set_ticks(surrogate_scale)
        m.cbar.set_ticklabels(surrogate_scale_labels)

        m.save(f'{path}cm_surrogate_{Predcolname}_small_scale_{EndOfHistory}_{TargetMonth}.png')
        if WriteToOverleaf:
            m.save(f'{overleafpath}cm_surrogate_{Predcolname}_small_scale_{EndOfHistory}_{TargetMonth}.png')


# Changes to 3- and 6-month forecasts, and since last actual observation

In [None]:
# Reading in data for mapping
# Predictions now and then
predstore_then = level +  '_' + EnsembleList[0]['modelname'] + '_f' + str(EndOfHistory-3)

df_now = EnsembleList[0]['future_df_calibrated'].copy()
try:
    df_then = pd.DataFrame.forecasts.read_store(run=run_id, name=predstore_then)
except:
    print('Trouble reading forecasts issued three months ago')
    
# Actuals
qs = (Queryset("hh_fatalities_ged_ln_ultrashort", "country_month"))
df_lastobserved = qs.fetch().astype(float)

In [None]:
ViewsMetadata().with_name('ensemble_f506').fetch()

In [None]:
# Compute log of mean non-logged fatalities, past six months
df_observed = df_lastobserved.loc[EndOfHistory]
df_observed['ged_sb_0'] = np.expm1(df_observed['ln_ged_sb'])
df_observed['ged_sum'] = df_observed['ged_sb_0']
for t in [1,2,3,4,5]:
    colname = 'ged_sb_' + str(t)
    df_observed[colname] = np.expm1(df_lastobserved.loc[EndOfHistory-t]['ln_ged_sb'])
    df_observed['ged_sum'] = df_observed['ged_sum'] + df_observed[colname]
df_observed['ln_ged_sum'] = np.log1p(df_observed['ged_sum']/6)
#df_observed.tail(20)

In [None]:
StepsForward = [
{
    'Step': 3,
    'df_now': df_now.loc[EndOfHistory + 3],
    'df_then': df_then.loc[EndOfHistory - 3 + 3]
},
{
    'Step': 6,
    'df_now': df_now.loc[EndOfHistory + 6],
    'df_then': df_then.loc[EndOfHistory - 3 + 6]
},
    {
    'Step': 12,
    'df_now': df_now.loc[EndOfHistory + 12],
    'df_then': df_then.loc[EndOfHistory - 3 + 12]
},
    {
    'Step': 36,
    'df_now': df_now.loc[EndOfHistory + 36],
    'df_then': df_then.loc[EndOfHistory - 3 + 36]
},
]
engine = sa.create_engine(source_db_path)
#predictors_df = data_vdem_short.loc[EndOfHistory]
#predictors_df_3m = data_vdem_short.loc[EndOfHistory-3]

for s in StepsForward:
    s['df_now'].rename(columns={'step_combined':'Now'}, inplace=True)
    s['df_then'].rename(columns={'step_combined':'Then'}, inplace=True)
    s['df'] = pd.concat([s['df_now'],s['df_then'],df_observed['ln_ged_sum']],axis=1)
    s['df']['Change_in_prediction'] = s['df']['Now']-s['df']['Then']
    s['df']['Change_since_last_observed'] = s['df']['Now']-s['df']['ln_ged_sum']
    
    # Surrogate model change
    for sm in SurrogateModelList:
        if sm['Step'] == s['Step']:
            s['sdf'] = predictors_df[sm['Columns']]
            s['sdf'][sm['Predcolname']] = sm['GAM'].predict(predictors_df[sm['Columns']])
            s['sdf_3m'] = predictors_df_3m[sm['Columns']]
            s['sdf_3m'][sm['Predcolname']] = sm['GAM'].predict(predictors_df_3m[sm['Columns']])
            print(sm['Step'],sm['Predcolname'])
            dfcolname = 's_pred_m' + sm['Shortname'] + '_ch3m' 
            s['df'][dfcolname] = s['sdf'][sm['Predcolname']] - s['sdf_3m'][sm['Predcolname']]
    
    s['gdf'] = gpd.GeoDataFrame.from_postgis(
        "SELECT id as country_id, in_africa, in_me, geom FROM prod.country", 
        engine, 
        geom_col='geom'
    )
    s['gdf'] = s['gdf'].to_crs(4326)

    s['gdf_t'] = s['df'].join(s['gdf'].set_index("country_id"))
    s['gdf'] = gpd.GeoDataFrame(s['gdf_t'], geometry="geom")
    
    

In [None]:
StepsForward[3]['gdf'].describe()

In [None]:
SurrogateModelList

In [None]:
delta = 3

tickvalues=np.array([-300,-30,-3,3,30,300])
ticklabels=[str(tv) for tv in tickvalues]

tickvalues=np.sign(tickvalues)*np.log1p(np.abs(tickvalues)+1)
#print(tickvalues)
tickvalues = np.array([-83,-80,-50,-20,0,20,50,100,200,500])
ticklabels=[str(tv) for tv in tickvalues]
ticklabels[0] = ""
tickvalues = np.log((100+tickvalues)/100)


t0s=range(506,508) # From start of month A, to start of (but not including) month B
bbox="africa_middle_east"
cmap='bwr'#'rainbow'
ColumnsToPlot = ['Change_in_prediction',
                 'Change_since_last_observed',
                 's_pred_mCH_ch3m',
                 's_pred_mNCH_ch3m',
                 's_pred_mDem_ch3m',
                 's_pred_mIMR_ch3m',
                # 's_pred_mTopics10_ch3m',
                ]


for s in StepsForward:
    print('Step:',s['Step'])
    for column in ColumnsToPlot:
        titlestring=''
        plot = ViewsMap(
            width=10,
            label=f"{column}, s= {s['Step']}",
            title="",
            scale=None,
            bbox=bbox
        ).add_layer(
            s['gdf'],
            edgecolor="black",
            linewidth=0.2,
            column=column,
        inform_colorbar=True,
        cmap=cmap,
        vmin=tickvalues[0],vmax=tickvalues[-1]
    )

        ax=plot.ax
        fg=s['gdf'].plot(ax=ax,edgecolor='black',linewidth=0.2,facecolor='None')
       # fg=gdf_c.plot(ax=ax,edgecolor='gray',linewidth=1.0,facecolor='None')
        figure=plot.fig
        fontdict={'fontsize':20}
        fig=plot.fig

        plot.cbar.set_ticks(tickvalues)
        plot.cbar.set_ticklabels(ticklabels)
        if abs(delta)==1:
            mnth='month'
        else:
            mnth='months'
        plot.cbar.set_label(f'Percent change in {column} over past '+str(delta)+' '+mnth)
        plot.save(path+column+str(s['Step'])+'_r' + str(EndOfHistory) +'.png')
        if WriteToOverleaf:
            plot.save(overleafpath+column+str(s['Step'])+'_r' + str(EndOfHistory) +'.png')

In [None]:
s['gdf'].head()

# Uncertainty of predictions

In [None]:
# Train model to transform predictions from  fatalities to multiclass probabilities
from sklearn.linear_model import LogisticRegression
# Classes are: 
# 0: Less than 0.5
# 1: 0.5-10
# 2: 10-100
# 3: 100-1000
# 4: 1000 +

multi_classifiers = []
df_future = EnsembleList[0]['future_df_calibrated'].copy()
for cls in [0,1,2,3,4]:
    df_future[f'multi_{cls}_logit'] = np.nan

for step in steps:
    Month = EndOfHistory + step
    X = np.array(ensemble_test_df[f'step_pred_{step}'])
    X = X.reshape(-1,1)
    # Multiclass
    y_multi = np.array(ensemble_test_df['ged_multi']).reshape(-1, 1)
    multi_clf = LogisticRegression(random_state=0).fit(X, y_multi)
    multi_classifiers.append(multi_clf)
    X_future = np.array(df_future['step_combined'].loc[Month]).reshape(-1,1)
    p_multi = multi_clf.predict_proba(X_future)
    for cls in [0,1,2,3,4]:
        df_future[f'multi_{cls}_logit'].loc[Month] = p_multi[:,cls]

        
df_future.describe()

In [None]:
# Some uncertainty calculations
#October 2022 (514)
CL = [
    ('Ethiopia',57,4.114,0.0025217435284640467,0.167910951747582,0.7099704830039664,0.11880956751085855,0.0007872542091291349),
    ('Kenya',237,2.202,0.27979924123523675,0.37093623451842744,0.3384779788333302,0.010780625441912769,5.919971092604168e-06),
    ('Nigeria',79,5.891,1.1834522019553202e-05,0.030050677139008334,0.5285925149395453,0.41358824669460437,0.027756726704822352),
    ('South Africa',163,0.103,0.9794051664145842,0.017598332401191557,0.0029811455728619585,1.5355064175926178e-05,5.471865662986281e-10),
    ('South Sudan',246,1.782,0.5171316993204126,0.2898656997683062,0.18882479925510778,0.004176475005701126,1.3266504722650525e-06),
    ('Sudan',245,1.971,0.40522067161826564,0.3345539054572092,0.2536133366482648,0.006609400618818177,2.6856574423520527e-06),
    ('Syria',220,4.818,0.0003291633453796389,0.09280289507829718,0.6904155754185439,0.21292026113053514,0.0035321050272440497),
    ('Tanzania',242,0.741,0.9214126374988004,0.06115410932346314,0.017278449601019125,0.0001547909157059216,1.2661011272757968e-08),
    ('Yemen',124,6.352,2.556591139139699e-06,0.016708050733656395,0.42553124292260786,0.4969226339388311,0.06083551581376548),
    ('Zimbabwe',158,0.050,0.9816070866339232,0.015813575350686226,0.0025667146500692674,1.2622945665883365e-05,4.196556261097782e-10),
]

for C in CL: 
    print(C[0],C[2],np.expm1(C[2]))
    print('< 0.5:',C[3])
    print('0.5-10:',C[4])
    print('10-100:',C[5])
    print('100-1000:',C[6])
    print('1000+:',C[7])
    print('****')
    

In [None]:
df_future.to_csv('Categorical_probabilities.csv')