## Worldbank Bilateral Debt exploration & preprocessing

In [1]:
from pathlib import Path
import pandas as pd

PROJECT_ROOT = Path.cwd().parents[1]
csv_path = PROJECT_ROOT / "data" / "raw" / "economic" / "worldbank-ids-bilateral-debt-raw.csv"

debt_df = pd.read_csv(csv_path)


In [3]:
# Select relevant columns and reshape
id_cols = ['Country Name', 'Country Code', 'Counterpart-Area Name', 'Counterpart-Area Code']
year_cols = [c for c in debt_df.columns if 'YR' in c and int(c.split('[YR')[1][:4]) >= 1990]

# Melt to long format
debt_long = debt_df.melt(
    id_vars=id_cols,
    value_vars=year_cols,
    var_name='year',
    value_name='debt_stock'
)

# Clean year column
debt_long['year'] = debt_long['year'].str.extract(r'\[YR(\d{4})\]').astype(int)

# Clean debt values (replace '..' with NaN, convert to numeric)
debt_long['debt_stock'] = pd.to_numeric(debt_long['debt_stock'], errors='coerce')

# Drop missing values
debt_long = debt_long.dropna(subset=['debt_stock'])

# Rename columns
debt_long.columns = ['debtor', 'debtor_code', 'creditor', 'creditor_code', 'year', 'debt_stock']

# Check and save
print(debt_long.shape)
print(debt_long.isnull().sum())

output_path = PROJECT_ROOT / "data" / "processed" / "worldbank_bilateral_debt.csv"
debt_long.to_csv(output_path, index=False)


(24760, 6)
debtor           0
debtor_code      0
creditor         0
creditor_code    0
year             0
debt_stock       0
dtype: int64
