# PRIO Annual Report

This notebook produces three different watchlists for use in PRIO's Annual Report on VIEWS data. 

**HIGHEST FATALITIES**

1) [Highest predicted fatalities in 2025](#watchlist-1)

**HIGHEST FATALITIES AMONGST NO/LOW VIOLENCE COUNTRIES**

4) [Highest predicted fatalities in 2025, amongst countries with 25 or less BRDs over the last 12 months (rolling period)](#watchlist-2)

**HIGHEST RELATIVE CHANGE IN FATALITIES**

2) [Highest relative change in fatalities in 2025, compared to last 12 months of actuals (rolling period)](#watchlist-3-1)
3) [Highest relative change in fatalities in 2025, compared to last 12 months of actuals (rolling period), subset by level of violence (low, medium, high, war)](#watchlist-3-2)





In [None]:
import os
# Get the current directory where the notebook is running
current_dir = os.getcwd()
save_folder = 'data'

## Load forecasts from API

Fetch forecasts from the VIEWS API and create dataframe.

In [None]:
import requests
import pandas as pd
import json
import os
from pathlib import Path
import numpy as np


In [None]:

# Set display options to show all rows and columns
pd.set_option('display.max_rows', 10)
pd.set_option('display.max_columns', 10)

In [None]:
# List the dataset specified in the URL above

model = 'fatalities002'
release = '2024_07_t01'
dataset = f'{model}_{release}'

# Define the URL you want to fetch data from. 

loa = 'cm'
tv = 'sb'

FORECASTS = f'https://api.viewsforecasting.org/{dataset}/{loa}/{tv}/main_mean'
FORECASTS 


In [None]:
# Request function
r = requests.get(FORECASTS)

#The set page size 
PAGE_SIZE=1000

#Define the full request 
req_url=FORECASTS #+'/'+str()+'?'+'pagesize='+str(PAGE_SIZE)
    
master_list=[]
r=requests.get(req_url)
page_data=r.json()

list(page_data)

In [None]:
# The loop to keep fetching pages until all data is accessed 

master_list+=page_data['data']
    
while page_data['next_page'] != '':
    r=requests.get(page_data['next_page'])
    page_data=r.json()

    master_list+=page_data['data']
        
raw_forecasts=pd.DataFrame(master_list)
raw_forecasts.rename(columns={'name':'country'},inplace=True)

In [None]:
columns_to_keep=['country_id', 'country', 'month_id', 'year', 'month', 'main_mean']
raw_forecasts = raw_forecasts[columns_to_keep].copy()
print('Here is the filtered dataframe for the fatalities002 model:')
raw_forecasts

In [None]:
# Save raw forecasts to a csv file

file_name = dataset
save_path = os.path.join(current_dir, save_folder, file_name)

raw_forecasts.to_csv(f'{save_path}.csv')

## Load actuals from API

Fetch actuals (UCDP) from the VIEWS API and create dataframe.

In [None]:
# List the dataset specified in the URL above

dataset = 'predictors_fatalities002_0000_00/cm/px/ucdp_ged_sb_best_sum'

# Define the URL you want to fetch data from. 

GED = f'https://api.viewsforecasting.org/{dataset}'
GED 


In [None]:
# Request function
r = requests.get(FORECASTS)

#The set page size 
PAGE_SIZE=1000

#Define the full request 
req_url=GED #+'/'+str()+'?'+'pagesize='+str(PAGE_SIZE)
    
master_list=[]
r=requests.get(req_url)
page_data=r.json()

list(page_data)

In [None]:
# The loop to keep fetching pages until all data is accessed 

master_list+=page_data['data']
    
while page_data['next_page'] != '':
    r=requests.get(page_data['next_page'])
    page_data=r.json()

    master_list+=page_data['data']
        
ged=pd.DataFrame(master_list)
ged.rename(columns={'name':'country'},inplace=True)

In [None]:
# Save ged to a csv file

file_name = 'ged_cm'
save_path = os.path.join(current_dir, save_folder, file_name)

ged.to_csv(f'{save_path}.csv')

## WATCHLIST 1: Highest predicted fatalities in 2025 <a class="anchor" id="watchlist-1"></a>

In [None]:
# Create dataframe with forecasts aggregated by country-year 

forecasts_by_year = raw_forecasts.groupby(['country_id', 'country', 'year'], as_index=False)['main_mean'].sum()
forecasts_by_year

In [None]:
# Filter out and store forecasts for 2025 only

forecasts_for_2025 = forecasts_by_year[forecasts_by_year['year'] == 2025]
sorted_forecasts_for_2025 = forecasts_for_2025.sort_values(by=['main_mean'], ascending=False)
sorted_forecasts_for_2025


In [None]:
# Save filtered forecasts to a csv file

file_name = 'watchlist1_cy_forecasts_for_2025'
save_path = os.path.join(current_dir, save_folder, file_name)

sorted_forecasts_for_2025.to_csv(f'{save_path}.csv')

### CREATE WATCHLIST 1: highest fatalities in 2025 by country-year 

In [None]:
# PRODUCE WATCHLIST: COUNTRY-YEAR FORECASTS FOR 2025

sorted_forecasts_for_2025.iloc[:10]

## WATCHLIST 2: Highest predicted fatalities, for qualifying countries <a class="anchor" id="watchlist-2"></a>

I.e., predicted fatalities in 2025, by country-year, amongst countries with less than 25 BRDs in the last 12 months

## WATCHLIST 3: Highest predicted change in fatalities <a class="anchor" id="watchlist-3"></a>

### Prediction vs. last 12 months of actuals <a class="anchor" id="watchlist-3-1"></a>

In [None]:
# Get the last 12 unique values in the month_id column

last_12_month_ids = ged['month_id'].drop_duplicates().nlargest(12)
last_12_month_ids

In [None]:
# Filter the DataFrame based on the last 12 unique month_id values, sum by country-year

actuals_last_12months = ged[ged['month_id'].isin(last_12_month_ids)][['country_id', 'month_id', 'country', 'year', 'month', 'ucdp_ged_sb_best_sum']]
actuals_last_12months = actuals_last_12months.groupby(['country_id', 'country'], as_index=False)['ucdp_ged_sb_best_sum'].sum()
actuals_last_12months

In [None]:
# Add country-year ged to country-year forecasts

prep_for_predicted_change = pd.merge(forecasts_for_2025, actuals_last_12months, on=['country_id', 'country'], how='left')
prep_for_predicted_change.rename(columns={'ucdp_ged_sb_best_sum':'actuals_last_12months'},inplace=True)
prep_for_predicted_change = prep_for_predicted_change.sort_values(by=['main_mean'], ascending=False)
prep_for_predicted_change

In [None]:
# Create a new column with the percent change from actuals_last_12_months to forecasts for 2025

prep_for_predicted_change['predicted_change_in_percent'] = ((prep_for_predicted_change['main_mean'] - prep_for_predicted_change['actuals_last_12months']) / prep_for_predicted_change['actuals_last_12months']) * 100
prep_for_predicted_change

In [None]:
# Replace inf values with NaN, sort by % change

predicted_change = prep_for_predicted_change.replace([np.inf, -np.inf], np.nan)
sorted_predicted_change = predicted_change.sort_values(by=['predicted_change_in_percent'], ascending=False)
sorted_predicted_change.iloc[:10]

#### CREATE WATCHLIST 2.1: Predicted change (%) in fatalities in 2025 

In [None]:
# Save results to a csv file

file_name = 'watchlist2_cy_forecasts_actuals_predictedchange'
save_path = os.path.join(current_dir, save_folder, file_name)

sorted_predicted_change.to_csv(f'{save_path}.csv')

### Prediction vs. last 12 months of actuals, per level of violence <a class="anchor" id="watchlist-3-2"></a>

In [None]:
# Group predicted change into low-violence, medium-violence, and high-violence categories

#if main_mean < 25: category = 'low-violence'
#elif 25 < main_mean > 100: category = 'medium-violence'
#elif main_mean > 100: category = 'high-violence'
#else: category = 'error'

predicted_change['predicted_violence_category'] = predicted_change['main_mean'].apply(
    lambda x: 'low-violence' if x < 25 
              else 'medium-violence' if 25 <= x < 100 
              else 'high-violence' if 100 <= x < 1000 
              else 'war' if x >= 1000 
              else 'unknown'  # Optional else case if needed
)
predicted_change

#### CREATE WATCHLIST 2.2: Predicted change (%) in fatalities in 2025, per level of violence

In [None]:
# Get the unique categories
unique_categories = predicted_change['predicted_violence_category'].unique()

# Loop through each unique category, save them as csv files
for category in unique_categories:
    file_name = f'watchlist2_cy_{category}_predictedchange.csv'
    save_path = os.path.join(current_dir, save_folder, file_name)
    # Filter the DataFrame for the current category
    category_df = predicted_change[predicted_change['predicted_violence_category'] == category].sort_values(by=['predicted_change_in_percent'], ascending=False)
    category_df.to_csv(f'{save_path}.csv')
    print(f'Saved {file_name}')

In [None]:
# Use this cell to explore each list in the notebook – just change the category that is filtered out. 

category_df = predicted_change[predicted_change['predicted_violence_category'] == 'war'].sort_values(by=['predicted_change_in_percent'], ascending=False)
#category_df.to_csv(f'{category}_predictedchange_watchlist.csv')
category_df.iloc[:10]

In [None]:
# Create list of countries that observed less than 25 BRD in the last 12 months

qualifying_countries = actuals_last_12months[actuals_last_12months['ucdp_ged_sb_best_sum'] < 25].copy()
qualifying_countries.rename(columns={'ucdp_ged_sb_best_sum':'actuals_last_12months'},inplace=True)
qualifying_countries

In [None]:
forecasts_for_2025

In [None]:
# Filter for low-risk forecasts based on country_id from qualifying_countries

low_risk_forecasts = forecasts_for_2025[forecasts_for_2025['country_id'].isin(qualifying_countries['country_id'])][['country_id', 'country', 'year', 'main_mean']]
low_risk_forecasts

In [None]:
# Merge the two DataFrames on both 'country_id' and 'country' - create full dataset

low_risk_forecasts_and_ged = pd.merge(low_risk_forecasts, qualifying_countries, on=['country_id', 'country'], how='left')
low_risk_forecasts_and_ged = low_risk_forecasts_and_ged.sort_values(by=['main_mean'], ascending=False)
low_risk_forecasts_and_ged

### CREATE WATCHLIST 3: Country-year forecasts for 2025, qualifying countries only

In [None]:
# Save to csv

file_name = 'watchlist3_low_risk_forecasts_and_ged'
save_path = os.path.join(current_dir, save_folder, file_name)

low_risk_forecasts_and_ged.to_csv(f'{save_path}.csv')

In [None]:
# Show only the top-10 countries with the highest forecasted fatalities in 2025, among those with less than 25 BRD in the last 12 months

low_risk_forecasts_and_ged.iloc[:10]

In [None]:
print('All done!')