# Compare old and new pipeline forecasts for single run

In [5]:
import requests
import pandas as pd
import json
import os
from pathlib import Path
import numpy as np

#import matplotlib.pyplot as plt

# External
import geopandas as gpd
import sqlalchemy as sa

# VIEWS
from viewser.operations import fetch
from views_mapper2.mapper2 import *
from views_mapper2.BBoxWriter import *
from views_mapper2.dictionary_writer import *
from ingester3.config import source_db_path
from views_forecasts.extensions import *
from viewser import Queryset, Column
from views_mapper2.mapper2 import Mapper2

## General settings

### Set end of history

In [6]:
eoh = 549
EndOfHistory = 549

### Set steps to map

In [7]:
steps = [1, 3, 6, 12, 24, 36] # set step to plot

### Fetch mapping functions

In [8]:

def custom_add_colorbar(
    self,
    cmap,
    vmin,
    vmax,
    location="right",
    size="5%",
    pad=0.1,
    transparency=None,
    labelsize=10,
    tickparams=None,
):
    norm = plt.Normalize(vmin, vmax)
    if isinstance(cmap, str):
        cmap = plt.get_cmap(cmap)
    cmap = color.force_alpha_colormap(cmap=cmap, alpha=transparency)
    scalar_to_rgba = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
    divider = make_axes_locatable(self.ax)
    self.cax = divider.append_axes(location, size, pad)
    self.cax.tick_params(labelsize=labelsize)

    tickvalues = (
        list(tickparams.values()) if tickparams is not None else None
    )
    self.cbar = plt.colorbar(
        scalar_to_rgba, cax=self.cax, ticks=tickvalues
    )
    if tickparams is not None:
        self.cbar.set_ticklabels(list(tickparams.keys()))
    return self

# Monkey patch the method
Mapper2.add_colorbar = custom_add_colorbar

custom_scale = [0, 3, 10, 30, 100, 300]
custom_1p = log1p_dict(custom_scale)




## Fetch geometries

In [9]:
import os
home = os.path.expanduser("~")
#dropbox = home+'/ViEWS Dropbox/VIEWS research collaboration/Dissemination/Working Papers/Prediction Competition Preprint Maps/'
desktop = home+'/Desktop/' # (Toggle on to save to desktop) Change to your directory
#forecasts.to_csv(desktop+'forecasts.csv', index=False)

In [10]:
# CM

# Get country geometries to overlay as borders
engine = sa.create_engine(source_db_path) 
gdf_cm_master = gpd.GeoDataFrame.from_postgis(
    "SELECT id as country_id, name, in_africa, in_me, geom FROM prod.country",
    engine,
    geom_col='geom'
)
gdf_cm_master = gdf_cm_master.to_crs(4326)
gdf_cm = gdf_cm_master.copy()
gdf_cm


Unnamed: 0,country_id,name,in_africa,in_me,geom
0,236,Tanzania,1,0,"MULTIPOLYGON (((34.13636 -9.57117, 34.07444 -9..."
1,65,Russia,0,0,"MULTIPOLYGON (((104.26255 77.68518, 104.33638 ..."
2,39,United Kingdom,0,0,"MULTIPOLYGON (((-4.86416 54.62722, -4.92250 54..."
3,189,USSR,0,0,"MULTIPOLYGON (((104.26255 77.68518, 104.33638 ..."
4,85,Italy,0,0,"MULTIPOLYGON (((12.75357 43.97100, 12.76472 43..."
...,...,...,...,...,...
215,133,Afghanistan,0,0,"MULTIPOLYGON (((61.27655 35.60725, 61.29638 35..."
216,187,Czechoslovakia,0,0,"MULTIPOLYGON (((14.73972 50.82388, 14.77153 50..."
217,227,Serbia and Montenegro,0,0,"MULTIPOLYGON (((18.45555 42.56583, 18.46389 42..."
218,230,Serbia,0,0,"MULTIPOLYGON (((20.08568 42.54815, 20.10469 42..."


In [11]:
# PGM

# Get priogrid geometries
engine = sa.create_engine(source_db_path)
gdf_pgm_master = gpd.GeoDataFrame.from_postgis(
    "SELECT id as pg_id, in_africa, in_me, geom FROM prod.priogrid", 
    engine, 
    geom_col='geom'
)
gdf_pgm_master = gdf_pgm_master.to_crs(4326)
gdf_pgm = gdf_pgm_master.copy()

gdf_pgm.rename(columns = {'pg_id':'priogrid_id'}, inplace = True)
gdf_pgm


Unnamed: 0,priogrid_id,in_africa,in_me,geom
0,240318,False,False,"POLYGON ((98.50000 76.50000, 98.50000 77.00000..."
1,173423,False,False,"POLYGON ((131.00000 30.00000, 131.00000 30.500..."
2,234640,False,False,"POLYGON ((139.50000 72.50000, 139.50000 73.000..."
3,211904,False,False,"POLYGON ((-68.50000 57.00000, -68.50000 57.500..."
4,218313,False,False,"POLYGON ((-104.00000 61.50000, -104.00000 62.0..."
...,...,...,...,...
64813,169611,True,True,"POLYGON ((25.00000 27.50000, 25.00000 28.00000..."
64814,153089,False,True,"POLYGON ((44.00000 16.00000, 44.00000 16.50000..."
64815,172514,False,True,"POLYGON ((36.50000 29.50000, 36.50000 30.00000..."
64816,170344,True,True,"POLYGON ((31.50000 28.00000, 31.50000 28.50000..."


## Fetch old pipeline forecasts

### Set old run_id and run names

In [12]:
old_run_id = 46
old_run_name_cm = f'cm_genetic_ensemble_f{eoh}'
old_run_name_pgm = f'pgm_ensemble_cm_calib_f{eoh}'

### Get list of old cm runs - should only return one result

In [13]:
ViewsMetadata().with_name(name=old_run_name_cm).fetch()

Unnamed: 0,id,name,file_name,runs_id,model_generations_id,user_name,spatial_loa,temporal_loa,ds,osa,time_min,time_max,space_min,space_max,steps,target,prediction_columns,date_written,description,deleted
0,23316,cm_genetic_ensemble_f549,pr_46_cm_genetic_ensemble_f549.parquet,46,1,angelica,c,m,False,False,550,585,1,246,[],step_combined,[],2025-10-27 11:24:59.630812+00:00,,False


### Get list of old pgm runs - should only return one result

In [14]:
ViewsMetadata().with_name(name=old_run_name_pgm).fetch()

Unnamed: 0,id,name,file_name,runs_id,model_generations_id,user_name,spatial_loa,temporal_loa,ds,osa,time_min,time_max,space_min,space_max,steps,target,prediction_columns,date_written,description,deleted
0,23360,pgm_ensemble_cm_calib_f549,pr_46_pgm_ensemble_cm_calib_f549.parquet,46,1,angelica,pg,m,False,False,550,585,62356,190511,[],step_combined,[],2025-10-27 15:59:48.620817+00:00,,False


### Fetch dfs for old data

In [15]:
old_data_cm = pd.DataFrame.forecasts.read_store(run=old_run_id, name=old_run_name_cm)
old_data_pgm = pd.DataFrame.forecasts.read_store(run=old_run_id, name=old_run_name_pgm)


pr_46_cm_genetic_ensemble_f549.parquet
pr_46_pgm_ensemble_cm_calib_f549.parquet


### Quick check that the df's look ok

In [16]:
old_data_cm

Unnamed: 0_level_0,feature,step_combined
month_id,country_id,Unnamed: 2_level_1
550,1,0.001749
550,2,0.003540
550,3,0.043151
550,4,0.742192
550,5,0.009478
...,...,...
585,242,0.343062
585,243,0.629551
585,244,0.317161
585,245,5.258471


In [17]:
old_data_pgm

Unnamed: 0_level_0,feature,step_combined
month_id,priogrid_id,Unnamed: 2_level_1
550,62356,0.000184
550,79599,0.000081
550,79600,0.000078
550,79601,0.000074
550,80317,0.000103
...,...,...
585,190496,0.009952
585,190507,0.001289
585,190508,0.001099
585,190510,0.001199


## Fetch new pipeline forecasts

### Set new run id and run names

In [18]:
new_run_id = 67
new_runs_cm = f'pink_ponyclub_predictions_forecasting'
new_runs_pgm = f'skinny_love_predictions_forecasting_'

### Select new cm run

In [19]:
# Get list of new cm runs - may return many results
ViewsMetadata().with_name(name=new_runs_cm).fetch()

Unnamed: 0,id,name,file_name,runs_id,model_generations_id,user_name,spatial_loa,temporal_loa,ds,osa,time_min,time_max,space_min,space_max,steps,target,prediction_columns,date_written,description,deleted
0,23157,pink_ponyclub_predictions_forecasting_20251009...,pr_67_pink_ponyclub_predictions_forecasting_20...,67,1,‘xiaolong’,c,m,False,False,550,585,1,246,[],pred_ln_ged_sb_dep,[],2025-10-09 11:06:04.322679+00:00,,False
1,23192,pink_ponyclub_predictions_forecasting_20251016...,pr_67_pink_ponyclub_predictions_forecasting_20...,67,1,jim,c,m,False,False,549,584,1,246,[],pred_ln_ged_sb_dep,[],2025-10-16 13:32:41.156497+00:00,,False
2,23256,pink_ponyclub_predictions_forecasting_20251024...,pr_67_pink_ponyclub_predictions_forecasting_20...,67,1,‘xiaolong’,c,m,False,False,550,585,1,246,[],pred_ln_ged_sb_dep,[],2025-10-24 14:54:22.122957+00:00,,False
3,23333,pink_ponyclub_predictions_forecasting_20251027...,pr_67_pink_ponyclub_predictions_forecasting_20...,67,1,‘xiaolong’,c,m,False,False,550,585,1,246,[],pred_ln_ged_sb_dep,[],2025-10-27 11:39:07.875302+00:00,,False


In [20]:
# Select the last run in the df - this is the most recent, and presumably therefore the one we want
new_data_cm_name = ViewsMetadata().with_name(name=new_runs_cm).fetch().iloc[-2]['name']
new_data_cm_name

'pink_ponyclub_predictions_forecasting_20251024_145418'

### Select new pgm run

In [21]:
# Get list of new pgm runs - may return many results
ViewsMetadata().with_name(name=new_runs_pgm).fetch()

Unnamed: 0,id,name,file_name,runs_id,model_generations_id,user_name,spatial_loa,temporal_loa,ds,osa,time_min,time_max,space_min,space_max,steps,target,prediction_columns,date_written,description,deleted
0,23170,skinny_love_predictions_forecasting_20251011_1...,pr_67_skinny_love_predictions_forecasting_2025...,67,1,‘xiaolong’,pg,m,False,False,550,585,62356,190511,[],pred_ln_ged_sb_dep,[],2025-10-11 16:48:45.115231+00:00,,False
1,23172,skinny_love_predictions_forecasting_20251014_1...,pr_67_skinny_love_predictions_forecasting_2025...,67,1,‘xiaolong’,pg,m,False,False,550,585,62356,190511,[],pred_ln_ged_sb_dep,[],2025-10-14 15:24:46.573238+00:00,,False
2,23269,skinny_love_predictions_forecasting_20251024_2...,pr_67_skinny_love_predictions_forecasting_2025...,67,1,‘xiaolong’,pg,m,False,False,550,585,62356,190511,[],pred_ln_ged_sb_dep,[],2025-10-24 20:21:18.729988+00:00,,False


In [22]:
# Select the last run in the df - this is the most recent, and presumably therefore the one we want
new_data_pgm_name = ViewsMetadata().with_name(name=new_runs_pgm).fetch().iloc[-1]['name']
new_data_pgm_name

'skinny_love_predictions_forecasting_20251024_202114'

### Fetch data for new runs

In [23]:
new_data_cm = pd.DataFrame.forecasts.read_store(run=new_run_id, name=new_data_cm_name)
new_data_pgm = pd.DataFrame.forecasts.read_store(run=new_run_id, name=new_data_pgm_name)

pr_67_pink_ponyclub_predictions_forecasting_20251024_145418.parquet
pr_67_skinny_love_predictions_forecasting_20251024_202114.parquet


## Prep and plot new pipeline CM forecasts

In [24]:
new_data_cm

Unnamed: 0_level_0,Unnamed: 1_level_0,pred_ln_ged_sb_dep
month_id,country_id,Unnamed: 2_level_1
550,1,0.003773
550,2,0.003762
550,3,0.011403
550,4,0.713244
550,5,0.003762
...,...,...
585,242,0.105974
585,243,0.378329
585,244,0.107760
585,245,3.757777


In [25]:
# Add steps and streamline index
new_data_cm = new_data_cm.reset_index()
new_data_cm['step'] = new_data_cm['month_id'] - EndOfHistory
new_data_cm = new_data_cm.set_index(['step', 'country_id'], drop = True)
new_data_cm

Unnamed: 0_level_0,Unnamed: 1_level_0,month_id,pred_ln_ged_sb_dep
step,country_id,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1,550,0.003773
1,2,550,0.003762
1,3,550,0.011403
1,4,550,0.713244
1,5,550,0.003762
...,...,...,...
36,242,585,0.105974
36,243,585,0.378329
36,244,585,0.107760
36,245,585,3.757777


In [26]:
## Merge in geometries
cm_data_new = pd.merge(new_data_cm, gdf_cm, on="country_id", how="left")
gdf_cm_new = gpd.GeoDataFrame(cm_data_new, geometry="geom")

gdf_cm_new

Unnamed: 0,country_id,month_id,pred_ln_ged_sb_dep,name,in_africa,in_me,geom
0,1,550,0.003773,Guyana,0,0,"MULTIPOLYGON (((-58.17262 6.81222, -58.15494 6..."
1,2,550,0.003762,Suriname,0,0,"MULTIPOLYGON (((-55.12796 5.82217, -55.10445 5..."
2,3,550,0.011403,Trinidad and Tobago,0,0,"MULTIPOLYGON (((-61.07945 10.82416, -61.07556 ..."
3,4,550,0.713244,Venezuela,0,0,"MULTIPOLYGON (((-66.31029 10.62602, -66.28309 ..."
4,5,550,0.003762,Samoa,0,0,"MULTIPOLYGON (((-172.59650 -13.50911, -172.551..."
...,...,...,...,...,...,...,...
6871,242,585,0.105974,Tanzania,1,0,"MULTIPOLYGON (((34.13636 -9.57117, 34.07444 -9..."
6872,243,585,0.378329,Morocco,1,0,"MULTIPOLYGON (((-4.42042 35.15125, -4.35792 35..."
6873,244,585,0.107760,Mauritania,1,0,"MULTIPOLYGON (((-10.71639 15.43890, -10.71945 ..."
6874,245,585,3.757777,Sudan,1,0,"MULTIPOLYGON (((34.09223 9.47747, 33.90162 9.4..."


In [27]:
# Plot the new pipeline CM forecasts

for step in steps:

    # Generate month to plot based on first month of selected dataset and the step chosen above

    t = (new_data_cm['month_id'].min())-1
    month_to_plot = t + step 
    month_to_plot

    cm_data_to_plot = gdf_cm_new[gdf_cm_new['month_id'] == month_to_plot]  
    region = 'globe' #options are 'ame', 'me' or 'globe'

    cmap = ['rainbow']

    for cmaps in cmap:
        test_map = Mapper2(
        width=10,   # dimension width
        height=10,  # dimension height
        frame_on=True,
        bbox=bbox_from_cid(f'{region}'), 
        ).add_layer(
        gdf=cm_data_to_plot,  
        cmap=cmaps,
        transparency=1,
        background=None, 
        map_dictionary=dictionary_stand_1p_10k, # changed based on column
        edgecolor="black",  # border color choice
        linewidth=0.15,  # line size choice
        column="pred_ln_ged_sb_dep", 
        )

    # Save the map with appropriate filename
    test_map.save(f'{desktop}/new_run_{EndOfHistory}_cm_s{step}_ln1_{region}_{cmaps}', dpi=350)

    # Print map status
    print(f'{cmaps} map for step {step} saved to {desktop}/new_run_{EndOfHistory}_cm_s{step}_ln1_{region}_{cmaps}!')

rainbow map for step 1 saved to /Users/alm/Desktop//new_run_549_cm_s1_ln1_globe_rainbow!
rainbow map for step 3 saved to /Users/alm/Desktop//new_run_549_cm_s3_ln1_globe_rainbow!
rainbow map for step 6 saved to /Users/alm/Desktop//new_run_549_cm_s6_ln1_globe_rainbow!
rainbow map for step 12 saved to /Users/alm/Desktop//new_run_549_cm_s12_ln1_globe_rainbow!
rainbow map for step 24 saved to /Users/alm/Desktop//new_run_549_cm_s24_ln1_globe_rainbow!
rainbow map for step 36 saved to /Users/alm/Desktop//new_run_549_cm_s36_ln1_globe_rainbow!


## Prep and plot new pipeline PGM forecasts

In [28]:
# The pgm data currently has a single-element vector as every value in the forecast column - we need to crudely fix this
pgm_values = new_data_pgm['pred_ln_ged_sb_dep'].values


In [29]:
pgm_values

array([array([5.3644035e-06], dtype=float32),
       array([2.503392e-06], dtype=float32),
       array([4.2915253e-06], dtype=float32), ...,
       array([8.427742e-05], dtype=float32),
       array([8.427742e-05], dtype=float32),
       array([8.427742e-05], dtype=float32)], dtype=object)

In [30]:
pgm_values_scalar = [pgm_val[0] for pgm_val in pgm_values]

In [31]:
pgm_values_scalar

[5.3644035e-06,
 2.503392e-06,
 4.2915253e-06,
 3.4570635e-06,
 6.4372807e-06,
 1.5735503e-05,
 1.2278481e-05,
 5.1259863e-06,
 2.7418098e-06,
 2.7418098e-06,
 5.245195e-06,
 2.7418098e-06,
 5.7220295e-06,
 2.503392e-06,
 5.3644035e-06,
 3.933899e-06,
 4.1723165e-06,
 7.3909487e-06,
 4.1723165e-06,
 4.1723165e-06,
 0.00015889335,
 0.00012647305,
 9.417489e-06,
 5.1259863e-06,
 2.7418098e-06,
 2.7418098e-06,
 2.7418098e-06,
 2.7418098e-06,
 9.417489e-06,
 5.1259863e-06,
 2.7418098e-06,
 5.9604467e-06,
 4.1723165e-06,
 4.1723165e-06,
 1.20400655e-05,
 2.0146166e-05,
 5.841238e-06,
 7.3909487e-06,
 3.933899e-06,
 2.503392e-06,
 3.3378547e-06,
 5.1259863e-06,
 5.7220295e-06,
 5.1259863e-06,
 2.7418098e-06,
 2.7418098e-06,
 2.7418098e-06,
 2.7418098e-06,
 2.503392e-06,
 2.503392e-06,
 3.3378547e-06,
 4.1723165e-06,
 4.1723165e-06,
 4.1723165e-06,
 3.933899e-06,
 3.933899e-06,
 4.1723165e-06,
 1.03711545e-05,
 6.4372807e-06,
 7.152532e-06,
 5.1259863e-06,
 5.841238e-06,
 5.1259863e-06,
 2.74

In [32]:
new_data_pgm['pred_ln_ged_sb_dep'] = pgm_values_scalar
new_data_pgm

Unnamed: 0_level_0,Unnamed: 1_level_0,pred_ln_ged_sb_dep
month_id,priogrid_id,Unnamed: 2_level_1
550,62356,0.000005
550,79599,0.000003
550,79600,0.000004
550,79601,0.000003
550,80317,0.000006
...,...,...
585,190496,0.104910
585,190507,0.000084
585,190508,0.000084
585,190510,0.000084


In [33]:
# reset index
new_data_pgm = new_data_pgm.reset_index()
new_data_pgm



Unnamed: 0,month_id,priogrid_id,pred_ln_ged_sb_dep
0,550,62356,0.000005
1,550,79599,0.000003
2,550,79600,0.000004
3,550,79601,0.000003
4,550,80317,0.000006
...,...,...,...
471955,585,190496,0.104910
471956,585,190507,0.000084
471957,585,190508,0.000084
471958,585,190510,0.000084


In [34]:
# fetch pg identifiers
qs_pg = (Queryset("c_pg", "priogrid_month")

               # target variable
               .with_column(Column("gwcode", from_loa="country", from_column="gwcode")
                            )
               .with_column(Column("name", from_loa="country", from_column="name")
                            )
        )
pg_identifiers=qs_pg.publish().fetch()
pg_identifiers

100%|██████████| 20.9M/20.9M [00:01<00:00, 12.5MB/s]


Queryset c_pg read successfully 


Unnamed: 0_level_0,Unnamed: 1_level_0,gwcode,name
month_id,priogrid_gid,Unnamed: 2_level_1,Unnamed: 3_level_1
1,62356,560,South Africa
1,79599,560,South Africa
1,79600,560,South Africa
1,79601,560,South Africa
1,80317,560,South Africa
...,...,...,...
852,190496,355,Bulgaria
852,190507,640,Turkey
852,190508,640,Turkey
852,190510,640,Turkey


In [35]:
# reset index and fix inconsistent col names
pg_identifiers = pg_identifiers.reset_index()
pg_identifiers = pg_identifiers.rename(columns = {'priogrid_gid':'priogrid_id'})
pg_identifiers

Unnamed: 0,month_id,priogrid_id,gwcode,name
0,1,62356,560,South Africa
1,1,79599,560,South Africa
2,1,79600,560,South Africa
3,1,79601,560,South Africa
4,1,80317,560,South Africa
...,...,...,...,...
11169715,852,190496,355,Bulgaria
11169716,852,190507,640,Turkey
11169717,852,190508,640,Turkey
11169718,852,190510,640,Turkey


In [36]:
# merge with forecasts
pgm_data_new = pd.merge(new_data_pgm, pg_identifiers, on=["priogrid_id", "month_id"], how="left")
pgm_data_new


Unnamed: 0,month_id,priogrid_id,pred_ln_ged_sb_dep,gwcode,name
0,550,62356,0.000005,560,South Africa
1,550,79599,0.000003,560,South Africa
2,550,79600,0.000004,560,South Africa
3,550,79601,0.000003,560,South Africa
4,550,80317,0.000006,560,South Africa
...,...,...,...,...,...
471955,585,190496,0.104910,355,Bulgaria
471956,585,190507,0.000084,640,Turkey
471957,585,190508,0.000084,640,Turkey
471958,585,190510,0.000084,640,Turkey


In [37]:
# add steps
pgm_data_new['step'] = pgm_data_new['month_id'] - EndOfHistory
pgm_data_new

Unnamed: 0,month_id,priogrid_id,pred_ln_ged_sb_dep,gwcode,name,step
0,550,62356,0.000005,560,South Africa,1
1,550,79599,0.000003,560,South Africa,1
2,550,79600,0.000004,560,South Africa,1
3,550,79601,0.000003,560,South Africa,1
4,550,80317,0.000006,560,South Africa,1
...,...,...,...,...,...,...
471955,585,190496,0.104910,355,Bulgaria,36
471956,585,190507,0.000084,640,Turkey,36
471957,585,190508,0.000084,640,Turkey,36
471958,585,190510,0.000084,640,Turkey,36


In [38]:
final_pgm_data_new = pd.merge(pgm_data_new, gdf_pgm, on="priogrid_id", how="left")
gdf_pgm_new = gpd.GeoDataFrame(final_pgm_data_new, geometry="geom")  
gdf_pgm_new

Unnamed: 0,month_id,priogrid_id,pred_ln_ged_sb_dep,gwcode,name,step,in_africa,in_me,geom
0,550,62356,0.000005,560,South Africa,1,True,False,"POLYGON ((37.50000 -47.00000, 37.50000 -46.500..."
1,550,79599,0.000003,560,South Africa,1,True,False,"POLYGON ((19.00000 -35.00000, 19.00000 -34.500..."
2,550,79600,0.000004,560,South Africa,1,True,False,"POLYGON ((19.50000 -35.00000, 19.50000 -34.500..."
3,550,79601,0.000003,560,South Africa,1,True,False,"POLYGON ((20.00000 -35.00000, 20.00000 -34.500..."
4,550,80317,0.000006,560,South Africa,1,True,False,"POLYGON ((18.00000 -34.50000, 18.00000 -34.000..."
...,...,...,...,...,...,...,...,...,...
471955,585,190496,0.104910,355,Bulgaria,36,False,True,"POLYGON ((27.50000 42.00000, 27.50000 42.50000..."
471956,585,190507,0.000084,640,Turkey,36,False,True,"POLYGON ((33.00000 42.00000, 33.00000 42.50000..."
471957,585,190508,0.000084,640,Turkey,36,False,True,"POLYGON ((33.50000 42.00000, 33.50000 42.50000..."
471958,585,190510,0.000084,640,Turkey,36,False,True,"POLYGON ((34.50000 42.00000, 34.50000 42.50000..."


In [36]:
# PGM - NEW

for step in steps:

    # Generate month to plot based on first month of selected dataset and the step chosen above

    t = (new_data_pgm['month_id'].min())-1
    month_to_plot = t + step 
    month_to_plot

    pgm_data_to_plot = gdf_pgm_new[gdf_pgm_new['month_id'] == month_to_plot]  
    region = 'ame' #options are 'ame', 'me' or 'globe'

    cmap = ['rainbow']
    #cmap = ['rainbow', 'binary']

    for cmaps in cmap:
        test_map = Mapper2(
        width=10,   # dimension width
        height=10,  # dimension height
        frame_on=True,
        bbox=bbox_from_cid(f'{region}'), 
        ).add_layer(
        gdf=pgm_data_to_plot,  
        cmap=cmaps,
        transparency=1,
        background=None, 
        map_dictionary=custom_1p, # changed based on column
        edgecolor="black",  # border color choice
        linewidth=0.05,  # line size choice
        column="pred_ln_ged_sb_dep", 
        )
        # Jim procedure for country border creation using the geometries inherent to views3
        ax = test_map.ax
        gdf_cm.plot(ax=ax, edgecolor='black', linewidth=0.01, facecolor='None')

    
    # Save the map with appropriate filename
    test_map.save(f'{desktop}/new_run_{EndOfHistory}_pgm_s{step}_ln1_{region}_{cmaps}', dpi=350)

    # Print map status
    print(f'{cmaps} map for step {step} saved to {desktop}/new_run_{EndOfHistory}_m_s{step}_ln1_{region}_{cmaps}!')


rainbow map for step 1 saved to /Users/alm/Desktop//new_run_549_m_s1_ln1_ame_rainbow!
rainbow map for step 3 saved to /Users/alm/Desktop//new_run_549_m_s3_ln1_ame_rainbow!
rainbow map for step 6 saved to /Users/alm/Desktop//new_run_549_m_s6_ln1_ame_rainbow!
rainbow map for step 12 saved to /Users/alm/Desktop//new_run_549_m_s12_ln1_ame_rainbow!
rainbow map for step 24 saved to /Users/alm/Desktop//new_run_549_m_s24_ln1_ame_rainbow!
rainbow map for step 36 saved to /Users/alm/Desktop//new_run_549_m_s36_ln1_ame_rainbow!


#  Plot old pipeline data

### CM

In [38]:
old_data_cm

Unnamed: 0_level_0,feature,step_combined
month_id,country_id,Unnamed: 2_level_1
550,1,0.001749
550,2,0.003540
550,3,0.043151
550,4,0.742192
550,5,0.009478
...,...,...
585,242,0.343062
585,243,0.629551
585,244,0.317161
585,245,5.258471


In [39]:
old_data_cm = old_data_cm.reset_index()
old_data_cm['step'] = old_data_cm['month_id'] - EndOfHistory
old_data_cm = old_data_cm.set_index(['step', 'country_id'], drop = True)
old_data_cm

Unnamed: 0_level_0,feature,month_id,step_combined
step,country_id,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1,550,0.001749
1,2,550,0.003540
1,3,550,0.043151
1,4,550,0.742192
1,5,550,0.009478
...,...,...,...
36,242,585,0.343062
36,243,585,0.629551
36,244,585,0.317161
36,245,585,5.258471


In [40]:
cm_data_old = pd.merge(old_data_cm, gdf_cm, on="country_id", how="left")
gdf_cm_old = gpd.GeoDataFrame(cm_data_old, geometry="geom")

gdf_cm_old


Unnamed: 0,country_id,month_id,step_combined,name,in_africa,in_me,geom
0,1,550,0.001749,Guyana,0,0,"MULTIPOLYGON (((-58.17262 6.81222, -58.15494 6..."
1,2,550,0.003540,Suriname,0,0,"MULTIPOLYGON (((-55.12796 5.82217, -55.10445 5..."
2,3,550,0.043151,Trinidad and Tobago,0,0,"MULTIPOLYGON (((-61.07945 10.82416, -61.07556 ..."
3,4,550,0.742192,Venezuela,0,0,"MULTIPOLYGON (((-66.31029 10.62602, -66.28309 ..."
4,5,550,0.009478,Samoa,0,0,"MULTIPOLYGON (((-172.59650 -13.50911, -172.551..."
...,...,...,...,...,...,...,...
6871,242,585,0.343062,Tanzania,1,0,"MULTIPOLYGON (((34.13636 -9.57117, 34.07444 -9..."
6872,243,585,0.629551,Morocco,1,0,"MULTIPOLYGON (((-4.42042 35.15125, -4.35792 35..."
6873,244,585,0.317161,Mauritania,1,0,"MULTIPOLYGON (((-10.71639 15.43890, -10.71945 ..."
6874,245,585,5.258471,Sudan,1,0,"MULTIPOLYGON (((34.09223 9.47747, 33.90162 9.4..."


In [41]:
# CM - OLD

for step in steps:

    # Generate month to plot based on first month of selected dataset and the step chosen above

    t = (old_data_cm['month_id'].min())-1
    month_to_plot = t + step 
    month_to_plot

    cm_data_to_plot = gdf_cm_old[gdf_cm_old['month_id'] == month_to_plot]  

    region = 'globe' #options are 'ame', 'me' or 'globe'

    cmap = ['rainbow']

    for cmaps in cmap:
        test_map = Mapper2(
        width=10,   # dimension width
        height=10,  # dimension height
        frame_on=True,
        bbox=bbox_from_cid(f'{region}'), 
        ).add_layer(
        gdf=cm_data_to_plot,  
        cmap=cmaps,
        transparency=1,
        background=None, 
        map_dictionary=dictionary_stand_1p_10k, # changed based on column
        edgecolor="black",  # border color choice
        linewidth=0.15,  # line size choice
        column="step_combined", 
        )

    # Save the map with appropriate filename
    test_map.save(f'{desktop}/old_run_{EndOfHistory}_cm_s{step}_ln1_{region}_{cmaps}', dpi=350)

    # Print map status
    print(f'{cmaps} map for step {step} saved to {desktop}/old_run_{EndOfHistory}_cm_s{step}_ln1_{region}_{cmaps}!')

rainbow map for step 1 saved to /Users/alm/Desktop//old_run_549_cm_s1_ln1_globe_rainbow!
rainbow map for step 3 saved to /Users/alm/Desktop//old_run_549_cm_s3_ln1_globe_rainbow!
rainbow map for step 6 saved to /Users/alm/Desktop//old_run_549_cm_s6_ln1_globe_rainbow!
rainbow map for step 12 saved to /Users/alm/Desktop//old_run_549_cm_s12_ln1_globe_rainbow!
rainbow map for step 24 saved to /Users/alm/Desktop//old_run_549_cm_s24_ln1_globe_rainbow!
rainbow map for step 36 saved to /Users/alm/Desktop//old_run_549_cm_s36_ln1_globe_rainbow!


### PGM

In [42]:
old_data_pgm

Unnamed: 0_level_0,feature,step_combined
month_id,priogrid_id,Unnamed: 2_level_1
550,62356,0.000184
550,79599,0.000081
550,79600,0.000078
550,79601,0.000074
550,80317,0.000103
...,...,...
585,190496,0.009952
585,190507,0.001289
585,190508,0.001099
585,190510,0.001199


In [43]:
old_data_pgm = old_data_pgm.reset_index()
old_data_pgm['step'] = old_data_pgm['month_id'] - EndOfHistory
old_data_pgm = old_data_pgm.set_index(['step', 'priogrid_id'], drop = True)
old_data_pgm


Unnamed: 0_level_0,feature,month_id,step_combined
step,priogrid_id,Unnamed: 2_level_1,Unnamed: 3_level_1
1,62356,550,0.000184
1,79599,550,0.000081
1,79600,550,0.000078
1,79601,550,0.000074
1,80317,550,0.000103
...,...,...,...
36,190496,585,0.009952
36,190507,585,0.001289
36,190508,585,0.001099
36,190510,585,0.001199


In [44]:
pgm_data_old = pd.merge(old_data_pgm, gdf_pgm, on="priogrid_id", how="left")
gdf_pgm_old = gpd.GeoDataFrame(pgm_data_old, geometry="geom")  

gdf_pgm_old

Unnamed: 0,priogrid_id,month_id,step_combined,in_africa,in_me,geom
0,62356,550,0.000184,True,False,"POLYGON ((37.50000 -47.00000, 37.50000 -46.500..."
1,79599,550,0.000081,True,False,"POLYGON ((19.00000 -35.00000, 19.00000 -34.500..."
2,79600,550,0.000078,True,False,"POLYGON ((19.50000 -35.00000, 19.50000 -34.500..."
3,79601,550,0.000074,True,False,"POLYGON ((20.00000 -35.00000, 20.00000 -34.500..."
4,80317,550,0.000103,True,False,"POLYGON ((18.00000 -34.50000, 18.00000 -34.000..."
...,...,...,...,...,...,...
471955,190496,585,0.009952,False,True,"POLYGON ((27.50000 42.00000, 27.50000 42.50000..."
471956,190507,585,0.001289,False,True,"POLYGON ((33.00000 42.00000, 33.00000 42.50000..."
471957,190508,585,0.001099,False,True,"POLYGON ((33.50000 42.00000, 33.50000 42.50000..."
471958,190510,585,0.001199,False,True,"POLYGON ((34.50000 42.00000, 34.50000 42.50000..."


In [45]:
# PGM

for step in steps:

    # Generate month to plot based on first month of selected dataset and the step chosen above

    t = (old_data_pgm['month_id'].min())-1
    month_to_plot = t + step 
    month_to_plot

    pgm_data_to_plot = gdf_pgm_old[gdf_pgm_old['month_id'] == month_to_plot]  

    region = 'ame' #options are 'ame', 'me' or 'globe'

    cmap = ['rainbow']
    #cmap = ['rainbow', 'binary']

    for cmaps in cmap:
        test_map = Mapper2(
        width=10,   # dimension width
        height=10,  # dimension height
        frame_on=True,
        bbox=bbox_from_cid(f'{region}'), 
        ).add_layer(
        gdf=pgm_data_to_plot,  
        cmap=cmaps,
        transparency=1,
        background=None, 
        map_dictionary=custom_1p, # changed based on column
        edgecolor="black",  # border color choice
        linewidth=0.05,  # line size choice
        column="step_combined", 
        )
        # Jim procedure for country border creation using the geometries inherent to views3
        ax = test_map.ax
        gdf_cm.plot(ax=ax, edgecolor='black', linewidth=0.01, facecolor='None')

    
    # Save the map with appropriate filename
    test_map.save(f'{desktop}/old_run_{EndOfHistory}_pgm_s{step}_ln1_{region}_{cmaps}', dpi=350)

    # Print map status
    print(f'{cmaps} map for step {step} saved to {desktop}/old_run_{EndOfHistory}_pgm_s{step}_ln1_{region}_{cmaps}!')

rainbow map for step 1 saved to /Users/alm/Desktop//old_run_549_pgm_s1_ln1_ame_rainbow!
rainbow map for step 3 saved to /Users/alm/Desktop//old_run_549_pgm_s3_ln1_ame_rainbow!
rainbow map for step 6 saved to /Users/alm/Desktop//old_run_549_pgm_s6_ln1_ame_rainbow!
rainbow map for step 12 saved to /Users/alm/Desktop//old_run_549_pgm_s12_ln1_ame_rainbow!
rainbow map for step 24 saved to /Users/alm/Desktop//old_run_549_pgm_s24_ln1_ame_rainbow!
rainbow map for step 36 saved to /Users/alm/Desktop//old_run_549_pgm_s36_ln1_ame_rainbow!


## Check reconciliation between CM and PGM data in the new pipeline

### Sum PGM predictions by country-month

In [39]:
gdf_pgm_new

Unnamed: 0,month_id,priogrid_id,pred_ln_ged_sb_dep,gwcode,name,step,in_africa,in_me,geom
0,550,62356,0.000005,560,South Africa,1,True,False,"POLYGON ((37.50000 -47.00000, 37.50000 -46.500..."
1,550,79599,0.000003,560,South Africa,1,True,False,"POLYGON ((19.00000 -35.00000, 19.00000 -34.500..."
2,550,79600,0.000004,560,South Africa,1,True,False,"POLYGON ((19.50000 -35.00000, 19.50000 -34.500..."
3,550,79601,0.000003,560,South Africa,1,True,False,"POLYGON ((20.00000 -35.00000, 20.00000 -34.500..."
4,550,80317,0.000006,560,South Africa,1,True,False,"POLYGON ((18.00000 -34.50000, 18.00000 -34.000..."
...,...,...,...,...,...,...,...,...,...
471955,585,190496,0.104910,355,Bulgaria,36,False,True,"POLYGON ((27.50000 42.00000, 27.50000 42.50000..."
471956,585,190507,0.000084,640,Turkey,36,False,True,"POLYGON ((33.00000 42.00000, 33.00000 42.50000..."
471957,585,190508,0.000084,640,Turkey,36,False,True,"POLYGON ((33.50000 42.00000, 33.50000 42.50000..."
471958,585,190510,0.000084,640,Turkey,36,False,True,"POLYGON ((34.50000 42.00000, 34.50000 42.50000..."


In [44]:
gdf_pgm_new['pred_linear_ged_sb_dep'] = np.expm1(gdf_pgm_new['pred_ln_ged_sb_dep'])
gdf_pgm_new

Unnamed: 0,month_id,priogrid_id,pred_ln_ged_sb_dep,gwcode,name,step,in_africa,in_me,geom,pred_linear_ged_sb_dep
0,550,62356,0.000005,560,South Africa,1,True,False,"POLYGON ((37.50000 -47.00000, 37.50000 -46.500...",0.000005
1,550,79599,0.000003,560,South Africa,1,True,False,"POLYGON ((19.00000 -35.00000, 19.00000 -34.500...",0.000003
2,550,79600,0.000004,560,South Africa,1,True,False,"POLYGON ((19.50000 -35.00000, 19.50000 -34.500...",0.000004
3,550,79601,0.000003,560,South Africa,1,True,False,"POLYGON ((20.00000 -35.00000, 20.00000 -34.500...",0.000003
4,550,80317,0.000006,560,South Africa,1,True,False,"POLYGON ((18.00000 -34.50000, 18.00000 -34.000...",0.000006
...,...,...,...,...,...,...,...,...,...,...
471955,585,190496,0.104910,355,Bulgaria,36,False,True,"POLYGON ((27.50000 42.00000, 27.50000 42.50000...",0.110610
471956,585,190507,0.000084,640,Turkey,36,False,True,"POLYGON ((33.00000 42.00000, 33.00000 42.50000...",0.000084
471957,585,190508,0.000084,640,Turkey,36,False,True,"POLYGON ((33.50000 42.00000, 33.50000 42.50000...",0.000084
471958,585,190510,0.000084,640,Turkey,36,False,True,"POLYGON ((34.50000 42.00000, 34.50000 42.50000...",0.000084


In [45]:
df_sum_pgm = (
    gdf_pgm_new.groupby(["name", "month_id", "gwcode"])["pred_linear_ged_sb_dep"]
      .sum()
      .reset_index()
)
df_sum_pgm = df_sum_pgm.rename(columns={'pred_linear_ged_sb_dep':'sum_pgm_pred_linear_ged_sb_dep'})
df_sum_pgm

Unnamed: 0,name,month_id,gwcode,sum_pgm_pred_linear_ged_sb_dep
0,Afghanistan,550,700,11.454067
1,Afghanistan,551,700,12.550658
2,Afghanistan,552,700,13.316750
3,Afghanistan,553,700,12.034202
4,Afghanistan,554,700,12.634485
...,...,...,...,...
2731,Zimbabwe,581,552,0.054956
2732,Zimbabwe,582,552,0.072248
2733,Zimbabwe,583,552,0.076165
2734,Zimbabwe,584,552,0.071036


### Fetch CM identifiers

In [49]:
qs_c = (Queryset("c_data", "country_month")

               # target variable
               .with_column(Column("gwcode", from_loa="country", from_column="gwcode")
                            )
               .with_column(Column("name", from_loa="country", from_column="name")
                            )
        )
c_data=qs_c.publish().fetch()

Queryset c_data read successfully 


In [59]:
c_data = c_data.reset_index()
c_data

Unnamed: 0,index,month_id,country_id,gwcode,name
0,0,1,1,110,Guyana
1,1,1,2,115,Suriname
2,2,1,3,52,Trinidad and Tobago
3,3,1,4,101,Venezuela
4,4,1,5,990,Samoa
...,...,...,...,...,...
158225,158225,852,242,510,Tanzania
158226,158226,852,243,600,Morocco
158227,158227,852,244,435,Mauritania
158228,158228,852,245,625,Sudan


### Merge CM predictions with identifiers

In [60]:
new_cm_df = new_data_cm.reset_index()
new_cm_df

Unnamed: 0,step,country_id,month_id,pred_ln_ged_sb_dep
0,1,1,550,0.003773
1,1,2,550,0.003762
2,1,3,550,0.011403
3,1,4,550,0.713244
4,1,5,550,0.003762
...,...,...,...,...
6871,36,242,585,0.105974
6872,36,243,585,0.378329
6873,36,244,585,0.107760
6874,36,245,585,3.757777


In [61]:
new_cm_df['pred_linear_ged_sb_dep'] = np.expm1(new_cm_df['pred_ln_ged_sb_dep'])
new_cm_df

Unnamed: 0,step,country_id,month_id,pred_ln_ged_sb_dep,pred_linear_ged_sb_dep
0,1,1,550,0.003773,0.003780
1,1,2,550,0.003762,0.003769
2,1,3,550,0.011403,0.011468
3,1,4,550,0.713244,1.040601
4,1,5,550,0.003762,0.003769
...,...,...,...,...,...
6871,36,242,585,0.105974,0.111793
6872,36,243,585,0.378329,0.459844
6873,36,244,585,0.107760,0.113780
6874,36,245,585,3.757777,41.853047


In [62]:
new_cm_df = new_cm_df.rename(columns={'pred_linear_ged_sb_dep':'sum_cm_pred_linear_ged_sb_dep'})
new_cm_df

Unnamed: 0,step,country_id,month_id,pred_ln_ged_sb_dep,sum_cm_pred_linear_ged_sb_dep
0,1,1,550,0.003773,0.003780
1,1,2,550,0.003762,0.003769
2,1,3,550,0.011403,0.011468
3,1,4,550,0.713244,1.040601
4,1,5,550,0.003762,0.003769
...,...,...,...,...,...
6871,36,242,585,0.105974,0.111793
6872,36,243,585,0.378329,0.459844
6873,36,244,585,0.107760,0.113780
6874,36,245,585,3.757777,41.853047


In [63]:
df_sum_cm = pd.merge(new_cm_df, c_data, on=["country_id", "month_id"], how="left")
df_sum_cm.sort_values(by=['name', 'month_id'], inplace=True)
df_sum_cm

Unnamed: 0,step,country_id,month_id,pred_ln_ged_sb_dep,sum_cm_pred_linear_ged_sb_dep,index,gwcode,name
119,1,133,550,2.522047,11.454070,100476,700,Afghanistan
310,2,133,551,2.606435,12.550661,100667,700,Afghanistan
501,3,133,552,2.661430,13.316754,100858,700,Afghanistan
692,4,133,553,2.567577,12.034202,101049,700,Afghanistan
883,5,133,554,2.612602,12.634488,101240,700,Afghanistan
...,...,...,...,...,...,...,...,...
6064,32,158,581,0.053499,0.054956,106421,552,Zimbabwe
6255,33,158,582,0.069758,0.072249,106612,552,Zimbabwe
6446,34,158,583,0.073404,0.076165,106803,552,Zimbabwe
6637,35,158,584,0.068626,0.071036,106994,552,Zimbabwe


## Merge CM and PGM dfs with country-months sums

In [64]:
# Check pgm df one last time
df_sum_pgm

Unnamed: 0,name,month_id,gwcode,sum_pgm_pred_linear_ged_sb_dep
0,Afghanistan,550,700,11.454067
1,Afghanistan,551,700,12.550658
2,Afghanistan,552,700,13.316750
3,Afghanistan,553,700,12.034202
4,Afghanistan,554,700,12.634485
...,...,...,...,...
2731,Zimbabwe,581,552,0.054956
2732,Zimbabwe,582,552,0.072248
2733,Zimbabwe,583,552,0.076165
2734,Zimbabwe,584,552,0.071036


In [65]:
df_sum_final = pd.merge( df_sum_pgm, df_sum_cm, on=["month_id", "gwcode", "name"], how="left")
df_sum_final

Unnamed: 0,name,month_id,gwcode,sum_pgm_pred_linear_ged_sb_dep,step,country_id,pred_ln_ged_sb_dep,sum_cm_pred_linear_ged_sb_dep,index
0,Afghanistan,550,700,11.454067,1,133,2.522047,11.454070,100476
1,Afghanistan,551,700,12.550658,2,133,2.606435,12.550661,100667
2,Afghanistan,552,700,13.316750,3,133,2.661430,13.316754,100858
3,Afghanistan,553,700,12.034202,4,133,2.567577,12.034202,101049
4,Afghanistan,554,700,12.634485,5,133,2.612602,12.634488,101240
...,...,...,...,...,...,...,...,...,...
2731,Zimbabwe,581,552,0.054956,32,158,0.053499,0.054956,106421
2732,Zimbabwe,582,552,0.072248,33,158,0.069758,0.072249,106612
2733,Zimbabwe,583,552,0.076165,34,158,0.073404,0.076165,106803
2734,Zimbabwe,584,552,0.071036,35,158,0.068626,0.071036,106994


In [53]:
print('All done!')

All done!
