# PRIO Press Release: Draft Watchlist 


In [None]:
#To plot
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook
from matplotlib.animation import PillowWriter
from matplotlib.cm import ScalarMappable
from matplotlib.lines import Line2D
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from textwrap import wrap
from scipy import interpolate
from matplotlib import gridspec
from matplotlib import cm

import seaborn as sns

#To fetch url 
import requests
import json

# sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn import linear_model
from sklearn.metrics import mean_squared_error

# Views 3
from viewser.operations import fetch
from viewser import Queryset, Column
import views_runs
from views_partitioning import data_partitioner, legacy
from stepshift import views
from views_runs import storage, ModelMetadata
from views_runs.storage import store, retrieve, fetch_metadata
from views_forecasts.extensions import *

# For mapping 
import os
from views_mapper2.mapper2 import *
from views_mapper2.BBoxWriter import *
from views_mapper2.dictionary_writer import *
from views_mapper2.label_writer import *
from ingester3.extensions import *

import os
home = os.path.expanduser("~")
desktop = home+'/Desktop/' # (Toggle on to save to desktop)

# Part 1. Fetching data from storage
Due to latest monthly run with GED fixed for Ukraine not being available in the API yet. 

In [None]:
calib_run_id = 46 #run_id
storage_run = pd.DataFrame.forecasts.read_store(run=calib_run_id, name='cm_genetic_ensemble_f526') # fetching run from storage

In [None]:
storage_run #step_combined is main_mean

In [None]:
storage_run['step_combined'] = np.exp(storage_run['step_combined'])-1 # transforming step_combined to become actual predicted fatalities 

In [None]:
storage_run # can save as csv file

In [None]:
storage_run = storage_run.rename(columns={"step_combined": "pred_fat_in2024_storage"})
storage_run

## Part 1: Fetching and downloading data from VIEWS API
Will retrieve the non-manipulated run, e.g. odd numbers for Ukraine. 

In [None]:
# Fetch data URL - could each itiration be fetched in the same loop?
views_url_10 = 'https://api.viewsforecasting.org/fatalities002_2023_10_t01/cm/sb'
print(views_url_10)

# Page size
page_size_10 = 1000

# Request function and defining full request
r_10 = requests.get(views_url_10)
master_list_10 = []

page_data_10 = r_10.json()
list(page_data_10)

# Loop to fetch data until all is accessed
master_list_10 += page_data_10['data']

while page_data_10['next_page'] != '':
    r_10 = requests.get(page_data_10['next_page'])
    page_data_10 = r_10.json()
    master_list_10 += page_data_10['data']

In [None]:
api_run = pd.DataFrame(master_list_10)
list(api_run.columns.values)

In [None]:
api_run = api_run.rename(columns={"main_mean": "pred_fat_in2024_api"})
api_run

In [None]:
columns_to_keep = ['country_id', 'month_id', 'name', 'gwcode', 'year', 'isoab', 'pred_fat_in2024_api']

# creating new df
api_run = api_run[columns_to_keep]
api_run

# Merge storage df with api df

In [None]:
merged = pd.merge(api_run, storage_run, on='country_id')
merged = merged.rename(columns={"name": "country"})

merged

# Part 2. Aggregate to country-year, filter out predictions for 2024. 


In [None]:
merged_2024 = merged.query('year==2024')
merged_2024

In [None]:
agg_functions = {'pred_fat_in2024_api': 'sum', 'pred_fat_in2024_storage': 'sum'}
merged_2024 = merged_2024.groupby(merged_2024['country']).aggregate(agg_functions).reset_index()
merged_2024

# Fetching 2023 UCDP GED Data 

In [None]:
def vid2date(i):
    year=str(1980 + i//12)
    month=str(i%12)
    return year+'/'+month

# Printing month_ids for chosen period
print(ViewsMonth.from_year_month(year=2023, month=8))

In [None]:
# Prep for ged dataframes

StartOfHistory_2022 = 505 # January 2022
EndOfHistory_2022 = 516 
StartOfHistory_2023 = 517 
EndOfHistory_2023 = 524 # Aug 2023


In [None]:
qs_ged_cm = (Queryset("AT_cm_ged_data_no_transformations", "country_month")
             .with_column(Column("year", from_table="month", from_column="year_id"))
             .with_column(Column('month', from_table='month', from_column='month'))
             .with_column(Column('country_name', from_table='country', from_column='name'))
             .with_column(Column("ged_best_sb", from_table="ged2_cm", from_column="ged_sb_best_sum_nokgi"))
             .with_column(Column("ged_best_os", from_table="ged2_cm", from_column="ged_os_best_sum_nokgi"))
             .with_column(Column("ged_best_ns", from_table="ged2_cm", from_column="ged_ns_best_sum_nokgi")))

qs_ged_cm = qs_ged_cm.publish().fetch()

print(f"A dataset with {len(qs_ged_cm.columns)} columns, with "
      f"data between t {min(qs_ged_cm.index.get_level_values(0))} "
      f"and {max(qs_ged_cm.index.get_level_values(0))}. "
      f"({len(np.unique(qs_ged_cm.index.get_level_values(1)))} units)"
     )

In [None]:
qs_ged_cm = qs_ged_cm.rename(columns={"country_name": "country"})
qs_ged_cm

In [None]:
# Create 2022 GED subset, monthly

ged_2022 = qs_ged_cm.query(f'month_id >= {StartOfHistory_2022} and month_id <= {EndOfHistory_2022}')

ged_2022

In [None]:
# Sum fatalities per country, annual

agg_functions = {'ged_best_sb': 'sum'}
ged_2022_total = ged_2022.groupby(ged_2022['country']).aggregate(agg_functions)
ged_2022_total

In [None]:
# Rename column in prep for merge

ged_2022_total = ged_2022_total.rename(columns={"ged_best_sb": "total_ged_sb_2022"})
ged_2022_total

In [None]:
# Create 2023 GED subset

ged_2023_jan_aug = qs_ged_cm.query(f'month_id >= {StartOfHistory_2023} and month_id <= {EndOfHistory_2023}')
ged_2023_jan_aug

In [None]:
# Sum fatalities per country, annual

agg_functions = {'ged_best_sb': 'sum'}
ged_2023_jan_aug = ged_2023_jan_aug.groupby(ged_2023_jan_aug['country']).aggregate(agg_functions)
ged_2023_jan_aug

# Rename column in prep for merge

ged_2023_jan_aug = ged_2023_jan_aug.rename(columns={"ged_best_sb": "total_ged_jan_aug_2023"})
ged_2023_jan_aug

In [None]:
# Create 2023-09 GED subset

ged_2023_09 = qs_ged_cm.query('month_id==525') # sept 2023

# Rename column in prep for merge

ged_2023_09 = ged_2023_09.rename(columns={"ged_best_sb": "ged_sb_2023_09", "ged_best_ns": "ged_ns_2023_09", "ged_best_os": "ged_os_2023_09"})

columns_to_keep = ['country','ged_sb_2023_09', 'ged_ns_2023_09', 'ged_os_2023_09']

# creating new df
ged_2023_09 = ged_2023_09[columns_to_keep]
ged_2023_09

In [None]:
# Create 2023-10 GED subset

ged_2023_10 = qs_ged_cm.query('month_id==526') # oct 2023

# Rename column in prep for merge

ged_2023_10 = ged_2023_10.rename(columns={"ged_best_sb": "ged_sb_2023_10", "ged_best_ns": "ged_ns_2023_10", "ged_best_os": "ged_os_2023_10"})

columns_to_keep = ['country','ged_sb_2023_10', 'ged_ns_2023_10', 'ged_os_2023_10']

# creating new df
ged_2023_10 = ged_2023_10[columns_to_keep]
ged_2023_10

# Merging prediction dfs with GED dfs

In [None]:
merged_step1 = pd.merge(merged_2024, ged_2022_total, on='country')
merged_step2 = pd.merge(merged_step1, ged_2023_jan_aug, on='country')
merged_step3 = pd.merge(merged_step2, ged_2023_09, on='country')
final_df = pd.merge(merged_step3, ged_2023_10, on='country')
final_df
#final_df.to_csv(desktop+'final_df.csv')

In [None]:
columns_to_keep = ['country', 'pred_fat_in2024_api', 'pred_fat_in2024_storage', 'total_ged_sb_2022', 'total_ged_jan_aug_2023', 'ged_sb_2023_09', 'ged_sb_2023_10']
final_df_short = final_df[columns_to_keep]
final_df_short
#final_df_short.to_csv(desktop+'final_df_short.csv')

# Create high-violence watchlist

In [None]:
# sort by high to low
final_df.sort_values(by=['pred_fat_in2024_storage'], ascending=False)


In [None]:
n = 200
final_watchlist = final_df.nlargest(n, 'pred_fat_in2024_storage').reset_index()
final_watchlist

In [None]:
final_watchlist.index = final_watchlist.index+1
final_watchlist.index.name = 'Ranking'
final_watchlist

In [None]:
final_watchlist.to_csv(desktop+'high_violence_watchlist.csv')

In [None]:
print('Done')