### Data Sources
PPP: https://data.worldbank.org/indicator/PA.NUS.PPP

CPI: https://data.worldbank.org/indicator/FP.CPI.TOTL.ZG

#### Enter parameters for PPP

In [1]:
PPP_YEAR = 2023

#### Clean ppp.csv

In [2]:
import pandas as pd
import pycountry

df = pd.read_csv('ppp.csv', skiprows=4)

df = df[['Country Code', str(PPP_YEAR)]]

df = df.dropna(subset=[str(PPP_YEAR)])

def convert_iso3_to_iso2(iso3):
    try:
        return pycountry.countries.get(alpha_3=iso3).alpha_2
    except:
        return None

df['Country Code'] = df['Country Code'].apply(convert_iso3_to_iso2)

df = df.dropna(subset=['Country Code'])

df = df.reset_index(drop=True)

print(df.head())

df.to_csv(f'ppp_cleaned_{PPP_YEAR}.csv', index=False)

  Country Code        2023
0           AW    1.352821
1           AF   14.806405
2           AO  209.794208
3           AL   40.588824
4           AD    0.603563


#### Enter parameters for CPI

In [3]:
CPI_YEARS = [2023, 2024]

#### Clean cpi.csv

In [5]:
import pandas as pd
import pycountry

df = pd.read_csv('cpi.csv', skiprows=4)

year_cols = [str(year) for year in CPI_YEARS]

df = df[['Country Code'] + year_cols]

df = df.dropna(subset=year_cols)

def convert_iso3_to_iso2(iso3):
    try:
        return pycountry.countries.get(alpha_3=iso3).alpha_2
    except:
        return None

df['Country Code'] = df['Country Code'].apply(convert_iso3_to_iso2)

df = df.dropna(subset=['Country Code'])

df = df.reset_index(drop=True)

print(df.head())

year_range = f"{CPI_YEARS[0]}_to_{CPI_YEARS[-1]}"
filename = f"cpi_cleaned_{year_range}.csv"

df.to_csv(filename, index=False)

  Country Code       2023       2024
0           AF  -4.644709  -6.601186
1           AO  13.644102  28.240495
2           AL   4.759764   2.214490
3           AM   1.980419   0.269512
4           AU   5.597015   3.161614
