## UCDP External Support data exploration & preprocessing

In [1]:
import pandas as pd
from pathlib import Path

In [2]:
PROJECT_ROOT = Path.cwd().parents[1]
xlsx_path = PROJECT_ROOT / "data" / "raw" / "military" / "ucdp-esd-ty-raw.xlsx"

esd_df = pd.read_excel(xlsx_path)

In [3]:
key_cols = ['ext_name', 'location', 'year', 'ext_sup', 'ext_p', 'ext_w', 'ext_t', 'ext_f']
print(esd_df[key_cols].isnull().sum())

ext_name    1539
location       0
year           0
ext_sup        0
ext_p          0
ext_w          0
ext_t          0
ext_f          0
dtype: int64


In [4]:
# Filter to rows with actual support (ext_sup=1) and 1990+
esd_df = esd_df[esd_df['ext_sup'] == 1]
esd_df = esd_df[esd_df['year'] >= 1990]

# Select key columns: supporter, recipient location, year, support types
esd_clean = esd_df[['ext_name', 'location', 'year', 'ext_p', 'ext_w', 'ext_t', 'ext_f']].copy()
esd_clean.columns = ['supporter', 'target_location', 'year', 'troops', 'weapons', 'training', 'funding']

In [5]:
output_path = PROJECT_ROOT / "data" / "processed" / "uucdp-esd-ty.csv"
esd_clean.to_csv(output_path, index=False)

print(esd_clean.shape)
print(esd_clean.head())

(6899, 7)
                                 supporter          target_location  year  \
23                    Government of Israel          Iran: Kurdistan  1993   
25                    Government of Israel          Iran: Kurdistan  1996   
26                                     PUK          Iran: Kurdistan  1996   
27              Government of Saudi Arabia          Iran: Kurdistan  2016   
69  Government of United States of America  Philippines: Government  1990   

    troops  weapons  training  funding  
23       0        0         0        0  
25       0        0         0        0  
26       0        0         0        0  
27       0        0         0        1  
69       0        1         1        1  


In [6]:
# Quick check on top supporters in your filtered data
print(esd_clean['supporter'].value_counts().head(10))

supporter
Government of United States of America    563
Government of Iran                        209
Government of Pakistan                    184
Government of France                      165
Government of United Kingdom              150
Government of China                       140
Government of Russia (Soviet Union)       111
Government of Turkey                      101
al-Qaida                                   97
Government of India                        96
Name: count, dtype: int64
