### Data Sources
PPP: https://data.worldbank.org/indicator/PA.NUS.PPP

CPI: https://data.worldbank.org/indicator/FP.CPI.TOTL.ZG

#### Enter parameters for PPP

In [10]:
PPP_YEAR = 2023
RAW_PATH = Path('data/raw/ppp.csv')
OUTPUT_PATH = Path(f'data/processed/ppp_cleaned_{PPP_YEAR}.csv')

#### Clean ppp.csv

In [11]:
import pandas as pd
import pycountry
from pathlib import Path

# Load data, skipping metadata lines
df = pd.read_csv(RAW_PATH, skiprows=4)

# Keep only necessary columns
df = df[['Country Code', str(PPP_YEAR)]]

# Drop rows with missing PPP data
df = df.dropna(subset=[str(PPP_YEAR)])

# Convert ISO-3 to ISO-2 country codes
def convert_iso3_to_iso2(iso3):
    try:
        return pycountry.countries.get(alpha_3=iso3).alpha_2
    except:
        return None

df['Country Code'] = df['Country Code'].apply(convert_iso3_to_iso2)

# Drop rows with failed conversion
df = df.dropna(subset=['Country Code'])

# Reset index
df = df.reset_index(drop=True)

# Output preview
print(df.head())

# Save cleaned PPP data
df.to_csv(OUTPUT_PATH, index=False)

  Country Code        2023
0           AW    1.352821
1           AF   14.806405
2           AO  209.794208
3           AL   40.588824
4           AD    0.603563


#### Enter parameters for CPI

In [12]:
CPI_YEAR = 2024
RAW_PATH = Path('data/raw/cpi.csv')
OUTPUT_PATH = Path(f'data/processed/cpi_cleaned_{CPI_YEAR}.csv')

#### Clean cpi.csv

In [13]:
import pandas as pd
import pycountry
from pathlib import Path

# Load data, skipping metadata lines
df = pd.read_csv(RAW_PATH, skiprows=4)

# Keep only necessary columns
df = df[['Country Code', str(CPI_YEAR)]]

# Drop rows with missing CPI data
df = df.dropna(subset=[str(CPI_YEAR)])

# Convert ISO-3 to ISO-2 country codes
def convert_iso3_to_iso2(iso3):
    try:
        return pycountry.countries.get(alpha_3=iso3).alpha_2
    except:
        return None

df['Country Code'] = df['Country Code'].apply(convert_iso3_to_iso2)

# Drop rows with failed conversion
df = df.dropna(subset=['Country Code'])

# Reset index
df = df.reset_index(drop=True)

# Output preview
print(df.head())

# Save cleaned CPI data
df.to_csv(OUTPUT_PATH, index=False)

  Country Code       2024
0           AF  -6.601186
1           AO  28.240495
2           AL   2.214490
3           AM   0.269512
4           AU   3.161614
