# Explore and map the data behind the "(De)escalation View" on VIEWS Dashboard 2.0 | Country level (cm)

In [None]:
import requests
import pandas as pd
import json
#import matplotlib.pyplot as plt

# External
import geopandas as gpd
import sqlalchemy as sa

# VIEWS
from viewser.operations import fetch
from views_mapper2.mapper2 import *
from views_mapper2.BBoxWriter import *
from views_mapper2.dictionary_writer import *
from ingester3.config import source_db_path

## Get forecasts

In [None]:
# Define the URL you want to fetch data from. 

FORECASTS ='https://api.viewsforecasting.org/fatalities002_2024_01_t01/cm/sb/main_mean'

# List the dataset specified in the URL above

dataset = 'fatalities002_2024_01_t01'

In [None]:
# Request function
r = requests.get(FORECASTS)

#The set page size 
PAGE_SIZE=1000

#Define the full request 
req_url=FORECASTS #+'/'+str()+'?'+'pagesize='+str(PAGE_SIZE)
    
master_list=[]
r=requests.get(req_url)
page_data=r.json()

list(page_data)

In [None]:
# The loop to keep fetching pages until all data is accessed 

master_list+=page_data['data']
    
while page_data['next_page'] != '':
    r=requests.get(page_data['next_page'])
    page_data=r.json()

    master_list+=page_data['data']
        
pred_raw=pd.DataFrame(master_list)

In [None]:
pred_raw

In [None]:
pred_raw.to_csv(f'cm_{dataset}.csv')

## Get actuals

In [None]:
# Define the URL you want to fetch data from. 

UCDP ='https://api.viewsforecasting.org/predictors_fatalities002_0000_00/cm/px/ucdp_ged_sb_best_sum'

In [None]:
# Request function
r = requests.get(UCDP)

#The set page size 
PAGE_SIZE=1000

#Define the full request 
req_url=UCDP #+'/'+str()+'?'+'pagesize='+str(PAGE_SIZE)
    
master_list_2=[]
r=requests.get(req_url)
page_data=r.json()

list(page_data)

In [None]:
# The loop to keep fetching pages until all data is accessed 

master_list_2+=page_data['data']
    
while page_data['next_page'] != '':
    r=requests.get(page_data['next_page'])
    page_data=r.json()

    master_list_2+=page_data['data']
        
ged_raw=pd.DataFrame(master_list_2)

In [None]:
ged_raw.to_csv('ged_cm.csv')

In [None]:
#ged_raw.set_index(['country_id', 'month_id'])

In [None]:
#ged_raw.xs('Sudan', level='name')

# Inspect the data

In [None]:
# Inspect the data to see that it looks correct

pred_raw

In [None]:
# Inspect the data to see that it looks correct

ged_raw

# Set prediction step (month) to analyze

## Find EndOFHistory in GED data

In [None]:
ged_raw['month_id'].max()

In [None]:
# Set EndOfHistory as t

t = ged_raw['month_id'].max() 
t

In [None]:
#t = 522 # June 2023
#t

## Choose prediction step

In [None]:
step = 1 # Change here
step # Check that it matches value above

In [None]:
pred_month_to_plot = t+step 
pred_month_to_plot

# Filter out relevant columns

## Predictions

In [None]:
cm_columns_to_keep = ['country_id', 'month_id', 'name', 'isoab', 'year', 'main_mean']

# creating new df
pred_filtered = pred_raw[cm_columns_to_keep]
pred_filtered

## GED actuals

In [None]:
columns_to_keep_ged = ['country_id', 'month_id', 'name', 'isoab', 'year', 'ucdp_ged_sb_best_sum']

# creating new df
ged_filtered = ged_raw[columns_to_keep_ged]
ged_filtered

# Create new dataframes from GED actuals

## Last month (1-month)

In [None]:
# Group by 'country_id' and 'name', then filter rows where 'month_id' is in range [526, 528]
ged_1month = ged_filtered[ged_filtered['month_id'].between(t, t)]

# Group by 'country_id' and 'name' and calculate the mean of 'ucdp_ged_sb_best_sum'
ged_1month = ged_1month.groupby(['country_id'])['ucdp_ged_sb_best_sum'].mean().to_frame().rename(columns={'ucdp_ged_sb_best_sum':'ged_1m_avg'})

ged_1month

## 3-month average

In [None]:
d3 = t - 2
d3

In [None]:
# Group by 'country_id' and 'name', then filter rows where 'month_id' is in range [526, 528]
ged_3months = ged_filtered[ged_filtered['month_id'].between(d3, t)]

# Group by 'country_id' and 'name' and calculate the mean of 'ucdp_ged_sb_best_sum'
ged_3months = ged_3months.groupby(['country_id'])['ucdp_ged_sb_best_sum'].mean().to_frame().rename(columns={'ucdp_ged_sb_best_sum':'ged_3m_avg'})

ged_3months

## 6-month average

In [None]:
d6 = t - 5
d6

In [None]:
# Group by 'country_id' and 'name', then filter rows where 'month_id' is in range [526, 528]
ged_6months = ged_filtered[ged_filtered['month_id'].between(d6, t)]

# Group by 'country_id' and 'name' and calculate the mean of 'ucdp_ged_sb_best_sum'
ged_6months = ged_6months.groupby(['country_id'])['ucdp_ged_sb_best_sum'].mean().to_frame().rename(columns={'ucdp_ged_sb_best_sum':'ged_6m_avg'})

ged_6months

## 12-month average

In [None]:
d12 = t - 11
d12

In [None]:
# Group by 'country_id' and 'name', then filter rows where 'month_id' is in range [526, 528]
ged_12months = ged_filtered[ged_filtered['month_id'].between(d12, t)]

# Group by 'country_id' and 'name' and calculate the mean of 'ucdp_ged_sb_best_sum'
ged_12months = ged_12months.groupby(['country_id'])['ucdp_ged_sb_best_sum'].mean().to_frame().rename(columns={'ucdp_ged_sb_best_sum':'ged_12m_avg'})

ged_12months

# Merge GED actuals dataframes

In [None]:
merged_ged = pd.merge(ged_1month, ged_3months, left_index=True, right_index=True)
merged_ged = pd.merge(merged_ged, ged_6months, left_index=True, right_index=True)
merged_ged = pd.merge(merged_ged, ged_12months, left_index=True, right_index=True)

merged_ged

# Merge predictions and GED actuals for chosen month

In [None]:
pred_filtered

In [None]:
merged_ged

In [None]:
pred_month_to_plot = t+1 # one past end of history of ged actuals
pred_month_to_plot

In [None]:
pred_final = pred_filtered[pred_filtered['month_id'].between(pred_month_to_plot, pred_month_to_plot)]#.reset_index()
pred_final.set_index(['country_id', 'name'], drop=True, inplace=True)
pred_final

In [None]:
merged_final = pd.merge(merged_ged, pred_final, left_index=True, right_index=True)
merged_final

In [None]:
merged_final['diff_1m'] = merged_final['main_mean'] - merged_final['ged_1m_avg']
merged_final['diff_3m'] = merged_final['main_mean'] - merged_final['ged_3m_avg']
merged_final['diff_6m'] = merged_final['main_mean'] - merged_final['ged_6m_avg']
merged_final['diff_12m'] = merged_final['main_mean'] - merged_final['ged_12m_avg']
merged_final_cm = merged_final 

merged_final_cm

### Query results for specific country

In [None]:
merged_final_cm.xs('Sudan', level='name')

## Download result as csv

In [None]:
merged_final_cm.to_csv(f'comparison_{dataset}.csv')

### Query data for specific country

In [None]:
merged_final_cm.xs(245, level='country_id')

# Descriptive stats for the dataframe

In [None]:
features = ['diff_1m', 'diff_3m', 'diff_6m', 'diff_12m']

### Descriptive stats for the full dataset

In [None]:
merged_final_cm.describe()

In [None]:
merged_final_cm.median()

In [None]:
for feature in features:
    print(merged_final_cm[feature].describe(), '\n')

## Prep dataframes for plotting

In [None]:
engine = sa.create_engine(source_db_path) 
gdf_ci_master = gpd.GeoDataFrame.from_postgis(
    "SELECT id as country_id, name, in_africa, in_me, geom FROM prod.country",
    engine,
    geom_col='geom'
)
gdf_ci_master = gdf_ci_master.to_crs(4326)

In [None]:
data = merged_final_cm.copy()
gdf = gdf_ci_master.copy()

data = data.join(gdf.set_index("country_id"))
gdf = gpd.GeoDataFrame(data, geometry="geom")
gdf

## Plot settings

In [None]:
chosen_month=t+step
transform=''     # , ln_, ln2_

In [None]:
chosen_month

In [None]:
# Executes the transforms

if transform == 'ln_':
    data[f'{transform}'+f'{feature}'] = np.log1p(data[f'{feature}'])

elif transform == 'ln2_':
    data[f'{transform}'+f'{feature}'] = np.log1p(np.log1p(data[f'{feature}']))

else: print("No transform applied")

In [None]:
cmap = 'seismic'

In [None]:
dashboard_index = [-300,-250,-200,-150,-100,-50, 0, 50, 100, 150, 200, 250, 300]
dashboard_dict = norm_dict(dashboard_index)


display(dashboard_dict)

In [None]:
map_dictionary = dashboard_dict

### Where to save the plots

In [None]:
# Save to Desktop

import os
home = os.path.expanduser("~")
display(home)

In [None]:
my_path = home+'/Desktop/'
my_path

## Choose columns to plot

In [None]:
features = ['diff_1m', 'diff_3m', 'diff_6m', 'diff_12m']

# diff_1m, diff_3m, diff_6m

In [None]:
for feature in features:
    
    m = Mapper2(
        width=40,
        height=40,
        frame_on=True,
        title=f'{feature} for {dataset}',
        bbox=bbox_from_cid('globe'), 
        figure = None
    ).add_layer(
        gdf=gdf,#.loc[chosen_month],
        transparency = 1.0,
        edgecolor="black",
        linewidth=0.5,
        cmap = cmap,
        column = f'{transform}'+f'{feature}', 
        map_dictionary = map_dictionary,
    )

plt.savefig(my_path+f'{dataset}_{feature}.png', bbox_inches='tight')  # saves the current figure

In [None]:
print('All done!')