# ViEWS 3 ensembles: future predictions

UK FCDO Fatalities project, pgm level

This notebook produces future predictions for a set of models defined in the list of dictionaries ModelList, produced by the notebook fatal_pgm_compute_ensemble in this repository. 

The notebook draws on the following .py script files in this repository:

Ensembling.py

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# Basics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook
# sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn import linear_model
from sklearn.metrics import mean_squared_error
# Views 3
from viewser.operations import fetch
from viewser import Queryset, Column
import views_runs
from views_partitioning import data_partitioner, legacy
from stepshift import views
import views_dataviz
from views_runs import storage, ModelMetadata
from views_runs.storage import store, retrieve, fetch_metadata
from views_forecasts.extensions import *

# Mapper
import geopandas as gpd

from views_dataviz.map import mapper, utils
from views_dataviz import color
from views_dataviz.map.presets import ViewsMap

import sqlalchemy as sa
#from ingester3.config import source_db_path

# Other packages
import pickle as pkl

#Parallelization
from joblib import Parallel, delayed, cpu_count
from functools import partial
from genetic2 import *

from pathlib import Path

# Predicting fatalities scripts
from HurdleRegression import *
import Ensembling
from Ensembling import CalibratePredictions, RetrieveStoredPredictions, mean_sd_calibrated, gam_calibrated
import FetchData

In [None]:
# Common parameters:

dev_id = 'fatalities001'
run_id = 'fatalities001' 
EndOfHistory = 508
prod_id = '2022_04_t01'
get_future = True
WriteToOverleaf = True

level = 'pgm'

depvar = "ln_ged_sb_dep"

steps = [*range(1, 36+1, 1)] # Which steps to train and predict for

#steps = [1,2,3,4,5,6,7,8,9,10,11,12,15,18,21,24] # Which steps to train and predict for
#fi_steps = [1,3,6,12,36] # Which steps to present feature importances for
#steps = [1,12,24,36]
fi_steps = [1,3,6,12,36]
#steps = [1,6,36]
#fi_steps = [1,6,36]

# Specifying partitions

calib_partitioner_dict = {"train":(121,396),"predict":(397,444)}
test_partitioner_dict = {"train":(121,444),"predict":(445,492)}
future_partitioner_dict = {"train":(121,492),"predict":(493,504)}
calib_partitioner =  views_runs.DataPartitioner({"calib":calib_partitioner_dict})
test_partitioner =  views_runs.DataPartitioner({"test":test_partitioner_dict})
future_partitioner =  views_runs.DataPartitioner({"future":future_partitioner_dict})

Mydropbox = '/Users/angli742/Dropbox (ViEWS)/ViEWS/'
localgitpath = '/Users/angli742/ViEWS3/'

if WriteToOverleaf:
    overleafpath = f'/Users/angli742/Dropbox (ViEWS)/ViEWS/DataReleases/OverleafSync/MonthlyUpdates/{run_id}_{prod_id}/'


In [None]:
gitname = 'EnsembleMetaData_pgm_' + dev_id + '.csv'
EnsembleMetaData = pd.read_csv(gitname)
ModelList = EnsembleMetaData.to_dict('records')
i = 0
for model in ModelList:
    print(i, model['modelname'])
    i = i + 1

# Retrieve and calibrate predictions and data

In [None]:
# Retrieving the predictions for calibration and test partitions
# The ModelList contains the predictions organized by model

ModelList = Ensembling.RetrieveStoredPredictions_pgm(ModelList, steps, EndOfHistory, run_id, level, False)

In [None]:
ModelList[0].keys()

In [None]:
Datasets = FetchData.FetchData_pgm(dev_id)

In [None]:
len(Datasets)

In [None]:
for model in ModelList:
    print(model['modelname'])
#    print(model['predictions_calib_df'])

In [None]:
calibrate_const_models=False

from views_runs import Storage, StepshiftedModels
from views_partitioning.data_partitioner import DataPartitioner
from viewser import Queryset, Column
from views_runs import operations
from views_runs.run_result import RunResult

RewritePredictions = True # Set this to True to rewrite predictions even if they exist

def RetrainAndPredict(modelname):
    force_retrain = False
    modelstore = storage.Storage()
    # Predictions for true future
    ct = datetime.now()
    print('Future', ct)
    modelstore = storage.Storage()
    model['RunResult_future']  = RunResult.retrain_or_retrieve(
            retrain            = force_retrain,
            store              = modelstore,
            partitioner        = DataPartitioner({"test":future_partitioner_dict}),
            stepshifted_models = StepshiftedModels(model['algorithm'], steps, model['depvar']),
            dataset            = FetchData.get_training_data(Datasets,ModelList,modelname),
            queryset_name      = model['queryset'],
            partition_name     = "test",
            timespan_name      = "train",
            storage_name       = model['modelname'] + '_future',
            author_name        = "JED",
    )       
    predictions_future = model['RunResult_future'].run.future_point_predict(EndOfHistory,model['RunResult_future'].data)
    return predictions_future



i = 0
print('Computing predictions, production run ' + prod_id + ', development run ' + run_id)
for model in ModelList:

    # Loop that checks whether (1) this a model trained outside the main system, 
    # (2) retrieves the prediction if it exists in prediction storage,
    # (3) if not checks whether the trained model exists, retrains if not, 
    # Then calibrates the predictions and stores them if they have not been stored before for this run.
    # To do: set the data_preprocessing to the function in the model dictionary
    
#    model['predstorename_ncal'] = level +  '_' + model['modelname'] + '_noncalibrated' + '_f' + str(EndOfHistory)
    model['predstorename_ncal'] = level +  '_' + model['modelname'] + '_f' + str(EndOfHistory)
    model['predstorename_cal'] = level +  '_' + model['modelname'] + '_calibrated' + '_f' + str(EndOfHistory)

    print(i, model['modelname'])

    ct = datetime.now()
    print('Trying to retrieve non-calibrated predictions', ct)
    if RewritePredictions:
        print(model['predstorename_ncal'])
        model['future_df_noncalibrated'] = RetrainAndPredict(model['modelname'])
    else:
        try:
            model['future_df_noncalibrated'] = pd.DataFrame.forecasts.read_store(run=run_id, name=model['predstorename_ncal'])
            print('Predictions for ', model['predstorename_ncal'], ', run', run_id, 'exist, retrieving from prediction storage')

        except KeyError:
            print(model['predstorename_ncal'], ', run', run_id, 'does not exist, predicting')
            model['future_df_noncalibrated'] = RetrainAndPredict(model['predstorename_ncal'])

    # Calibrating and storing   
    # Storing non-calibrated
        
#    model['predstorename_ncal'] = level +  '_' + model['modelname'] + '_noncalibrated' + '_f' + str(EndOfHistory)
    model['predstorename_ncal'] = level +  '_' + model['modelname'] + '_f' + str(EndOfHistory)
    model['predstorename_cal'] = level +  '_' + model['modelname'] + '_calibrated' + '_f' + str(EndOfHistory)

    print(i, model['modelname'])

    ct = datetime.now()
    print('Trying to retrieve non-calibrated predictions', ct)
    if RewritePredictions:
        model['future_df_noncalibrated'] = RetrainAndPredict(model['modelname'])
    else:
        try:
            model['future_df_noncalibrated'] = pd.DataFrame.forecasts.read_store(run=run_id, name=model['predstorename_ncal'])
            print('Predictions for ', model['predstorename_ncal'], ', run', run_id, 'exist, retrieving from prediction storage')

        except KeyError:
            print(model['predstorename_ncal'], ', run', run_id, 'does not exist, predicting')
            model['future_df_noncalibrated'] = RetrainAndPredict(model['predstorename_ncal'])

    # Calibrating and storing   
    # Storing non-calibrated
    
#    print('before store',model['future_df_noncalibrated'].index.names)
        
    model['future_df_noncalibrated'].forecasts.set_run(run_id)
    model['future_df_noncalibrated'].forecasts.to_store(name=model['predstorename_ncal'], overwrite=True)   
    
#    print('after store',model['future_df_noncalibrated'].index.names)
    
    if calibrate_const_models:
        print('Calibrating')
        model['future_df_calibrated'] = model['future_df_noncalibrated'].copy()
            
        model['future_df_calibrated']['step_combined']=cal_pg_c(model['future_df_calibrated'],cm_predictions_future,'step_combined',df_pg_id_c_id=df_pg_id_c_id,log_feature=True,super_calibrate=False)    
        # Storing calibrated
        model['future_df_calibrated'].forecasts.set_run(run_id)
        model['future_df_calibrated'].forecasts.to_store(name=model['predstorename_cal'], overwrite=True)   
            
    i = i + 1

print('All done')
        

In [None]:
EnsembleList = [] # Separate list of dictionaries for ensembles!

Ensemble = {
    'modelname':            'ensemble_cm_calib',
    'algorithm':            [],
    'depvar':               depvar,
    'data_train':           [],
    'Algorithm_text':       '',
    'calibration_gams':     [],
    'future_df_calibrated': [],
}
EnsembleList.append(Ensemble)



In [None]:
cm_ensemble=ViewsMetadata().with_name('cm_genetic_ensemble_f'+str(EndOfHistory)).fetch()

In [None]:
calib_run_id=int(cm_ensemble['runs_id'].values[0])

In [None]:
cm_predictions_calib = pd.DataFrame.forecasts.read_store(run=calib_run_id, name='cm_ensemble_genetic_calib')
cm_predictions_test = pd.DataFrame.forecasts.read_store(run=calib_run_id, name='cm_ensemble_genetic_test')
cm_predictions_future = pd.DataFrame.forecasts.read_store(run=calib_run_id, name='cm_genetic_ensemble_f'+str(EndOfHistory))

In [None]:
stepcols=['step_pred_' + str(step) for step in steps]

In [None]:
n_models = len(ModelList)

targetcalib=ModelList[0]['predictions_calib_df'][depvar]
targettest=ModelList[0]['predictions_test_df'][depvar]

valscalib=ModelList[0]['predictions_calib_df'][stepcols].values.copy()
valstest=ModelList[0]['predictions_test_df'][stepcols].values.copy()
valsfuture=ModelList[0]['future_df_noncalibrated'].values.copy()

trimmed_calib=ModelList[0]['predictions_calib_df'][stepcols].copy()
index_calib=trimmed_calib.index
columns_calib=trimmed_calib.columns

trimmed_test=ModelList[0]['predictions_test_df'][stepcols].copy()
index_test=trimmed_test.index
columns_test=trimmed_test.columns

trimmed_future=ModelList[0]['future_df_noncalibrated'].copy()
index_future=trimmed_future.index
columns_future=trimmed_future.columns

for model in ModelList[1:]:
    print('adding',model['modelname'])

    valscalib+=model['predictions_calib_df'][stepcols].values.copy()
    valstest+=model['predictions_test_df'][stepcols].values.copy()
    valsfuture+=model['future_df_noncalibrated'].values.copy()

    valscalib/=n_models
    valstest/=n_models
    valsfuture/=n_models

    Ensemble['predictions_calib_df']=pd.DataFrame(data=valscalib, index=index_calib, columns=columns_calib)
    Ensemble['predictions_test_df']=pd.DataFrame(data=valstest, index=index_test, columns=columns_test)
    Ensemble['predictions_future_df']=pd.DataFrame(data=valsfuture, index=index_future, columns=columns_future)
    
df_pg_id_c_id=Ensembling.fetch_df_pg_id_c_id()
    
for col in stepcols:

    thisstep=int(''.join([''+str(f) for f in filter(str.isdigit, col)]))
    thismonth = EndOfHistory + thisstep

    Ensemble['predictions_calib_df'][col]=Ensembling.calibrate_pg_with_c(Ensemble['predictions_calib_df'],cm_predictions_calib,col,df_pg_id_c_id=df_pg_id_c_id,log_feature=True,super_calibrate=False)

    Ensemble['predictions_test_df'][col]=Ensembling.calibrate_pg_with_c(Ensemble['predictions_test_df'],cm_predictions_test,col,df_pg_id_c_id=df_pg_id_c_id,log_feature=True,super_calibrate=False)
    
future_calib=Ensembling.calibrate_pg_with_c(Ensemble['predictions_future_df'],cm_predictions_future,'step_combined',df_pg_id_c_id=df_pg_id_c_id,log_feature=True,super_calibrate=False)    
    
Ensemble['predictions_future_df']['step_combined']=future_calib['step_combined']

In [None]:
ModelList[1].keys()

In [None]:
Ensemble['predictions_calib_df'][depvar]=targetcalib
Ensemble['predictions_test_df'][depvar]=targettest

In [None]:
# Save ensemble predictions
predstore_calib = level +  '_' + Ensemble['modelname'] + '_calib'
Ensemble['predictions_calib_df'].forecasts.set_run(run_id)
Ensemble['predictions_calib_df'].forecasts.to_store(name=predstore_calib, overwrite = True)
predstore_test = level +  '_' + Ensemble['modelname'] + '_test'
Ensemble['predictions_test_df'].forecasts.set_run(run_id)
Ensemble['predictions_test_df'].forecasts.to_store(name=predstore_test, overwrite = True)
predstore_future = level +  '_' + Ensemble['modelname'] + '_f'+str(EndOfHistory)
Ensemble['predictions_future_df'].forecasts.set_run(run_id)
Ensemble['predictions_future_df'].forecasts.to_store(name=predstore_future, overwrite = True)

# Use ensemble predictions for test partition to create categorical predictions

In [None]:
ensemble_test_df=Ensemble['predictions_test_df'].copy()

In [None]:
Ensemble['predictions_test_df']

In [None]:
# Generate dichotomous version of dependent variable
ensemble_test_df['ged_gte_25'] = ensemble_test_df['ln_ged_sb_dep'].apply(lambda x: 1 if x >= np.log1p(25) else 0)
# Generate multiclass version for uncertainty estimation
def ged_categorical(x):
    if x < np.log1p(0.5):
        return 0
    elif x < np.log1p(10):
        return 1
    elif x < np.log1p(100):
        return 2
    elif x < np.log1p(1000):
        return 3
    else :
        return 4

ensemble_test_df['ged_multi'] = ensemble_test_df['ln_ged_sb_dep'].apply(ged_categorical)

ensemble_test_df.describe()

In [None]:
plt.scatter(ensemble_test_df['ln_ged_sb_dep'],ensemble_test_df['ged_multi'])

In [None]:
for step in steps:
    if ensemble_test_df[f'step_pred_{step}'].isnull().sum().sum() != 0:
        print('****WARNING***** - detected',ensemble_test_df[f'step_pred_{step}'].isnull().sum().sum(),'Nan(s) in column step_pred_'+str(step))
        print('Replacing with zeros')
        ensemble_test_df[f'step_pred_{step}']=ensemble_test_df[f'step_pred_{step}'].fillna(0.0)

In [None]:
# Train model to transform predictions from  fatalities to (1) dichotomous and (2) multiclass
from sklearn.linear_model import LogisticRegression
dichotomous_classifiers = []
multi_classifiers = []
for step in steps:
    X = np.array(ensemble_test_df[f'step_pred_{step}'])
    X = X.reshape(-1,1)
    # Dichotomous
    y_dich = np.array(ensemble_test_df['ged_gte_25']).reshape(-1, 1)
    dich_clf = LogisticRegression(random_state=0).fit(X, y_dich)
    dichotomous_classifiers.append(dich_clf)
    p_dich = dich_clf.predict_proba(X)
    ensemble_test_df['dich_step_{step}_logit'] = p_dich[:,1].ravel()
    # Multiclass
    y_multi = np.array(ensemble_test_df['ged_multi']).reshape(-1, 1)
    multi_clf = LogisticRegression(random_state=0).fit(X, y_multi)
    multi_classifiers.append(multi_clf)
    p_multi = multi_clf.predict_proba(X)
    for cls in [0,1,2,3,4]:
        ensemble_test_df[f'multi_{cls}_step_{step}_logit'] = p_multi[:,cls].ravel()

ensemble_test_df.describe()

In [None]:
EnsembleList[0]['future_df_dichotomous'] = EnsembleList[0]['predictions_future_df'].copy() # Copy from baseline

for step in steps:
    month = EndOfHistory + step
#    weightcol = 'step_pred_' + str(step)
#    weights = np.array(pd.DataFrame(i_weights_df[weightcol]))
#    EnsembleList[0]['future_df_calibrated'].loc[month] = ConstituentModels_df_w.loc[month].dot(weights).values
    x_d = np.array(EnsembleList[0]['predictions_future_df'].loc[month]).reshape(-1,1)
    pred_step = dichotomous_classifiers[step-1].predict_proba(x_d)
    EnsembleList[0]['future_df_dichotomous']['step_combined'].loc[month] = pred_step[:,1]

In [None]:
predstore_future_dich = level +  '_' + EnsembleList[0]['modelname'] + '_dich_f' + str(EndOfHistory)
EnsembleList[0]['future_df_dichotomous'].forecasts.set_run(run_id)
EnsembleList[0]['future_df_dichotomous'].forecasts.to_store(name=predstore_future_dich, overwrite = True) 

# Mapping future predictions

In [None]:
from matplotlib import pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import contextily as ctx

from views_dataviz import color
from views_dataviz.map import utils
from views_dataviz.map.presets import ViewsMap

import sqlalchemy as sa
#from ingester3.config import source_db_path
#from ingester3.Country import Country
#from ingester3.extensions import *
#from ingester3.ViewsMonth import ViewsMonth

import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import geopandas as gpd
import pandas as pd
import numpy as np

class Mapper2:
    """
    `Map` takes basic properties and allows the user to consecutively add
    layers to the Map object. This makes it possible to prepare mapping
    "presets" at any level of layeredness that can be built on further.
    
    Mapper2 allows for the customizable addition of scaling to the map. 
    -re-add the code for labels later when i can test it

    Attributes
    ----------
    width: Integer value for width in inches.
    height: Integer value for height in inches.
    bbox: List for the bbox per [xmin, xmax, ymin, ymax].
    frame_on: Bool for whether to draw a frame around the map.
    title: Optional default title at matplotlib's default size.
    figure: Optional tuple of (fig, size) to use if you want to plot into an
        already existing fig and ax, rather than making a new one.
    """

    def __init__(
        self,
        width,
        height,
        bbox=None,
        cmap=None,
        frame_on=True,
        title="",  # Default title without customization. (?)
        figure=None,
    ):
        self.width = width
        self.height = height
        self.bbox = bbox  # xmin, xmax, ymin, ymax
        self.cmap = cmap
        if figure is None:
            self.fig, self.ax = plt.subplots(figsize=(self.width, self.height))
        else:
            self.fig, self.ax = figure
        self.texts = []
        self.ax.set_title(title)

        if frame_on:  # Remove axis ticks only.
            self.ax.tick_params(
                top=False,
                bottom=False,
                left=False,
                right=False,
                labelleft=False,
                labelbottom=False,
            )
        else:
            self.ax.axis("off")

        if bbox is not None:
            self.ax.set_xlim((self.bbox[0], self.bbox[1]))
            self.ax.set_ylim((self.bbox[2], self.bbox[3]))

    def add_layer(self, gdf, map_scale=False, map_dictionary=False, cmap=None, inform_colorbar=False, **kwargs):
        """Add a geopandas plot to a new layer.

        Parameters
        ----------
        gdf: Geopandas GeoDataFrame to plot.
        cmap: Optional matplotlib colormap object or string reference
            (e.g. "viridis").
        inform_colorbar: Set or overwrite colorbar with the current layer.
            Not applicable when `color` is supplied in the kwargs.
        map_scale: set a manual scale for the map. If missing defaults to the Remco procedure. 
        map_dictionary: set manual labels for the map. If missing defaults to the default labels.
        **kwargs: Geopandas `.plot` keyword arguments.
        """
        if "color" in kwargs:
            colormap = None
        else:
            colormap = self.cmap if cmap is None else cmap
            if inform_colorbar and "column" in kwargs:
                if hasattr(self, "cax"):
                    self.cax.remove()
                if "vmin" not in kwargs:
                    self.vmin = gdf[kwargs["column"]].min()
                else:
                    self.vmin = kwargs["vmin"]
                if "vmax" not in kwargs:
                    self.vmax = gdf[kwargs["column"]].max()
                else:
                    self.vmax = kwargs["vmax"]
        
        try: Mapper2.add_colorbar(self, colormap, min(map_scale), max(map_scale))
        except: Mapper2.add_colorbar(self, colormap, self.vmin, self.vmax)
        
        try:
            self.ax = gdf.plot(ax=self.ax, cmap=colormap, vmin=min(map_scale), vmax=max(map_scale), **kwargs)
        except: 
            self.ax = gdf.plot(ax=self.ax, cmap=colormap, **kwargs)

                
        return self
    
    def add_colorbar(
        self,
        cmap,
        vmin,
        vmax,
        location="right",
        size="5%",
        pad=0.1,
        alpha=1,
        labelsize=16,
        tickparams=None,
    ):
        """Add custom colorbar to Map.

        Needed since GeoPandas legend and plot axes do not align, see:
        https://geopandas.readthedocs.io/en/latest/docs/user_guide/mapping.html

        Parameters
        ----------
        cmap: Matplotlib colormap object or string reference (e.g. "viridis").
        vmin: Minimum value of range colorbar.
        vmax: Maximum value of range colorbar.
        location: String for location of colorbar: "top", "bottom", "left"
            or "right".
        size: Size in either string percentage or number of pixels.
        pad: Float for padding between the plot's frame and colorbar.
        alpha: Float for alpha to apply to colorbar.
        labelsize: Integer value for the text size of the ticklabels.
        tickparams: Dictionary containing value-label pairs. For example:
            {0.05: "5%", 0.1: "10%"}
        """
        norm = plt.Normalize(vmin, vmax)
        if isinstance(cmap, str):
            cmap = plt.get_cmap(cmap)
        cmap = color.force_alpha_colormap(cmap=cmap, alpha=alpha)
        scalar_to_rgba = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
        divider = make_axes_locatable(self.ax)
        self.cax = divider.append_axes(location, size, pad)
        self.cax.tick_params(labelsize=labelsize)
        tickvalues = (
            list(tickparams.keys()) if tickparams is not None else None
        )
        self.cbar = plt.colorbar(
            scalar_to_rgba, cax=self.cax, ticks=tickvalues
        )
        if tickparams is not None:
            self.cbar.set_ticklabels(list(tickparams.values()))
        return self
    
    def save(
        self, path, dpi=200, **kwargs
    ):  # Just some defaults to reduce work.
        """Save Map figure to file.
        Parameters
        ----------
        path: String path, e.g. "./example.png".
        dpi: Integer dots per inch. Increase for higher resolution figures.
        **kwargs: Matplotlib `savefig` keyword arguments.
        """
        self.fig.savefig(path, dpi=dpi, bbox_inches="tight", **kwargs)
        plt.close(self.fig)
        

def vid2date(i):
    year=str(1980 + i//12)
    month=str(i%12)
    return year+'/'+month
        
#def vid2date(i):
#    year=str(ViewsMonth(i).year)
#    month=str(ViewsMonth(i).month)
#    return year+'/'+month

#note the zip function occured earlier
standard_scale = [np.log1p(0),np.log1p(3),np.log1p(10), np.log1p(30), np.log1p(100),  np.log1p(300)]#, np.log1p(1000), np.log1p(3000),  np.log1p(10000)]
standard_scale_labels = ['0', '3','10', '30','100', '300']#, '1000', '3000', '10000']

small_scale=[np.log1p(0),np.log1p(3),np.log1p(10), np.log1p(30), np.log1p(100),  np.log1p(300)]#, np.log1p(1000)]


small_scale_labels = ['0', '3','10', '30','100', '300']#, '1000']

small_scale_nolabels = ['', '','', '','', '', '']

In [None]:
# get pgm geometries
gdf_base = gpd.read_parquet('./geometry/pgm_geometry.parquet')

In [None]:
# get cm geometries
gdf_c = gpd.read_parquet('./geometry/cm_geometry.parquet')
gdf_c = gdf_c.to_crs(4326)

In [None]:
df_with_wanted_index=Datasets[0]['df']

FetchData.index_check(EnsembleList[0],df_with_wanted_index)

In [None]:
# Future prediction maps, predictions, rolling
#path = Mydropbox + 'Projects/PredictingFatalities/maps/cm_future/'
stepstoplot=[3,5,6,8,12,18,24,36]
#titles = [vid2date(i) for i in stepstoplot + EndOfHistory]


df = Ensemble['predictions_future_df'].copy()
gdf2 = gdf_base.copy()
df = df.join(gdf2.set_index("priogrid_gid"))
gdf3 = gpd.GeoDataFrame(df, geometry="geom")

In [None]:
path= Mydropbox + f'DataReleases/MonthlyUpdates/{run_id}_{prod_id}/Continuous/Ensemble/ForecastMaps/'

In [None]:
for step in stepstoplot:
        month = step + EndOfHistory
        gdf = gdf3.loc[month]
        m=Mapper2(
        width=10,
        height=10,
        frame_on=True,
        title='Ensemble predictions as of ' + vid2date(EndOfHistory+step) + ', ' + str(step) + ' months after last month with data',
        bbox=[-18.5, 64.0, -35.5, 43.0], 
        ).add_layer(
        gdf=gdf,
        map_scale=standard_scale,
        cmap="rainbow",
        edgecolor="black",
        linewidth=0.2,
        column='step_combined', 
        inform_colorbar=True
        )
        ax=m.ax
        fg=gdf_c.plot(ax=ax,edgecolor='gray',linewidth=0.7,facecolor='None')
        
        m.cbar.set_ticks(standard_scale)
        m.cbar.set_ticklabels(standard_scale_labels)

#        m.save(f'{overleafpath}Figures/Future/PredictionMap_cm_ensemble_standard_scale_r{EndOfHistory}_m{month}.png')

        m.save(f'{path}PredictionMap_pgm_ensemble_standard_scale_r{EndOfHistory}_m{month}.png')

In [None]:
for step in stepstoplot:
        month = step + EndOfHistory
        gdf = gdf3.loc[month]
        m=Mapper2(
        width=10,
        height=10,
        frame_on=True,
        title='Ensemble predictions as of ' + vid2date(EndOfHistory+step) + ', ' + str(step) + ' months after last month with data',
        bbox=[29.446846321370213, 50.987309710685814, 1.1561557161401845, 18.29970129951559], 
        ).add_layer(
        gdf=gdf,
        map_scale=standard_scale,
        cmap="rainbow",
        edgecolor="black",
        linewidth=0.2,
        column='step_combined', 
        inform_colorbar=True
        )
        ax=m.ax
        fg=gdf_c.plot(ax=ax,edgecolor='gray',linewidth=1.0,facecolor='None')
        figure=m.fig
        fontdict={'fontsize':20}
        figure.text(0.4,0.45,'ETHIOPIA',fontdict=fontdict,color='black')
        figure.text(0.2,0.7,'SUDAN',fontdict=fontdict,color='black')
        figure.text(0.15,0.35,'S. SUDAN',fontdict=fontdict,color='black')
        figure.text(0.65,0.5,'SOMALIA',fontdict=fontdict,color='black')
        figure.text(0.35,0.25,'KENYA',fontdict=fontdict,color='black')
        
        m.cbar.set_ticks(standard_scale)
        m.cbar.set_ticklabels(standard_scale_labels)

#        m.save(f'{overleafpath}Figures/Future/PredictionMap_cm_ensemble_standard_scale_r{EndOfHistory}_m{month}.png')

        m.save(f'{path}PredictionMap_Ethiopia_pgm_ensemble_standard_scale_r{EndOfHistory}_m{month}.png')

In [None]:
for step in stepstoplot:
        month = step + EndOfHistory
        gdf = gdf3.loc[month]
        m=Mapper2(
        width=10,
        height=10,
        frame_on=True,
        title='Ensemble predictions as of ' + vid2date(EndOfHistory+step) + ', ' + str(step) + ' months after last month with data',
        bbox=[-2.3019466946294584, 20.374695512438592, 1.103974761908613, 16.794164972712068], 
        ).add_layer(
        gdf=gdf,
        map_scale=standard_scale,
        cmap="rainbow",
        edgecolor="black",
        linewidth=0.2,
        column='step_combined', 
        inform_colorbar=True
        )
        ax=m.ax
        fg=gdf_c.plot(ax=ax,edgecolor='gray',linewidth=1.0,facecolor='None')
        figure=m.fig
        fontdict={'fontsize':20}
        figure.text(0.4,0.45,'NIGERIA',fontdict=fontdict,color='black')
        figure.text(0.4,0.7,'NIGER',fontdict=fontdict,color='black')
        figure.text(0.5,0.35,'CAMEROON',fontdict=fontdict,color='black')
        figure.text(0.7,0.60,'CHAD',fontdict=fontdict,color='black')
        figure.text(0.7,0.4,'C.A.R.',fontdict=fontdict,color='black')
        figure.text(0.15,0.60,'B. FASO',fontdict=fontdict,color='black')
        
        m.cbar.set_ticks(standard_scale)
        m.cbar.set_ticklabels(standard_scale_labels)

#        m.save(f'{overleafpath}Figures/Future/PredictionMap_cm_ensemble_standard_scale_r{EndOfHistory}_m{month}.png')

        m.save(f'{path}PredictionMap_Nigeria_pgm_ensemble_standard_scale_r{EndOfHistory}_m{month}.png')

# Changes to 3- and 6-month forecasts, and since last actual observation


In [None]:
# Reading in data for mapping
# Predictions now and then
predstore_then = level +  '_' + EnsembleList[0]['modelname'] + '_f' + str(EndOfHistory-3)

df_now = EnsembleList[0]['predictions_future_df'].copy()

df_then=ViewsMetadata().with_name('ensemble_cm_calib_f'+str(EndOfHistory-3)).fetch()

try:
    df_then = pd.DataFrame.forecasts.read_store(run=run_id, name=predstore_then)
except:
    print('Trouble reading forecasts issued three months ago')
    
# Actuals

df_lastobserved = Datasets[0]['df']

In [None]:
# Compute log of mean non-logged fatalities, past six months
df_observed = df_lastobserved.loc[EndOfHistory]
df_observed['ged_sb_0'] = np.expm1(df_observed['ln_ged_sb'])
df_observed['ged_sum'] = df_observed['ged_sb_0']
for t in [1,2,3,4,5]:
    colname = 'ged_sb_' + str(t)
    df_observed[colname] = np.expm1(df_lastobserved.loc[EndOfHistory-t]['ln_ged_sb'])
    df_observed['ged_sum'] = df_observed['ged_sum'] + df_observed[colname]
df_observed['ln_ged_sum'] = np.log1p(df_observed['ged_sum']/6)
#df_observed.tail(20)

In [None]:
StepsForward = [
{
    'Step': 3,
    'df_now': df_now.loc[EndOfHistory + 3],
    'df_then': df_then.loc[EndOfHistory - 3 + 3]
},
{
    'Step': 6,
    'df_now': df_now.loc[EndOfHistory + 6],
    'df_then': df_then.loc[EndOfHistory - 3 + 6]
},
]
engine = sa.create_engine(source_db_path)
#predictors_df = data_vdem_short.loc[EndOfHistory]
#predictors_df_3m = data_vdem_short.loc[EndOfHistory-3]

for s in StepsForward:
    s['df_now'].rename(columns={'step_combined':'Now'}, inplace=True)
    s['df_then'].rename(columns={'step_combined':'Then'}, inplace=True)
    s['df'] = pd.concat([s['df_now'],s['df_then'],df_observed['ln_ged_sum']],axis=1)
    s['df']['Change_in_prediction'] = s['df']['Now']-s['df']['Then']
    s['df']['Change_since_last_observed'] = s['df']['Now']-s['df']['ln_ged_sum']
    
#    # Surrogate model change
#    for sm in SurrogateModelList:
#        if sm['Step'] == s['Step']:
#            s['sdf'] = predictors_df[sm['Columns']]
#            s['sdf'][sm['Predcolname']] = sm['GAM'].predict(predictors_df[sm['Columns']])
#            s['sdf_3m'] = predictors_df_3m[sm['Columns']]
#            s['sdf_3m'][sm['Predcolname']] = sm['GAM'].predict(predictors_df_3m[sm['Columns']])
#            print(sm['Step'],sm['Predcolname'])
#            dfcolname = sm['Predcolname'][:-2] + '_ch3m'
#            s['df'][dfcolname] = s['sdf'][sm['Predcolname']] - s['sdf_3m'][sm['Predcolname']]
    
    s['gdf'] = gdf_base
    s['gdf'] = s['gdf'].to_crs(4326)

    s['gdf_t'] = s['df'].join(s['gdf'].set_index("priogrid_gid"))
    s['gdf'] = gpd.GeoDataFrame(s['gdf_t'], geometry="geom")
    
    
StepsForward[0]['gdf'].describe()

In [None]:
tickvalues = np.array([-80,-50,-20,0,20,50,100,200,500])
print(tickvalues)
ticklabels=[str(tv) for tv in tickvalues]
tickvalues = np.log((100+tickvalues)/100)
print(tickvalues)

In [None]:
path= Mydropbox + f'DataReleases/MonthlyUpdates/{run_id}_{prod_id}/Continuous/Ensemble/ChangeMaps/'

In [None]:
delta = 3

tickvalues=np.array([-300,-30,-3,3,30,300])
ticklabels=[str(tv) for tv in tickvalues]

tickvalues=np.sign(tickvalues)*np.log1p(np.abs(tickvalues)+1)
#print(tickvalues)
tickvalues = np.array([-83,-80,-50,-20,0,20,50,100,200,500])
ticklabels=[str(tv) for tv in tickvalues]
ticklabels[0] = ""
tickvalues = np.log((100+tickvalues)/100)


t0s=range(508,509) # From start of month A, to start of (but not including) month B
bbox="africa_middle_east"
cmap='bwr'#'rainbow'
for s in StepsForward:
    for column in ['Change_in_prediction','Change_since_last_observed']:
        titlestring=''
        plot = ViewsMap(
            width=10,
            label=f"{column}, s= {s['Step']}",
            title="",
            scale=None,
            bbox=bbox
        ).add_layer(
            s['gdf'],
            edgecolor="black",
            linewidth=0.1,
            column=column,
        inform_colorbar=True,
        cmap=cmap,
        vmin=tickvalues[0],vmax=tickvalues[-1]
    )

        ax=plot.ax
        fg=s['gdf'].plot(ax=ax,edgecolor='black',linewidth=0.2,facecolor='None')
       # fg=gdf_c.plot(ax=ax,edgecolor='gray',linewidth=1.0,facecolor='None')
        figure=plot.fig
        fontdict={'fontsize':20}
        fig=plot.fig

        plot.cbar.set_ticks(tickvalues)
        plot.cbar.set_ticklabels(ticklabels)
        if abs(delta)==1:
            mnth='month'
        else:
            mnth='months'
        plot.cbar.set_label(f'Percent change in {column} over past '+str(delta)+' '+mnth)

        plot.save(Mydropbox + f'DataReleases/MonthlyUpdates/{run_id}_{prod_id}/Continuous/Ensemble/ChangeMaps/' + column + '_' + 'pgm'+ '_' + 's' + str(s['Step'])+'_r' + str(EndOfHistory) +'.png')
        #plot.save(overleafpath+column+str(s['Step'])+'_r' + str(EndOfHistory) +'.png')