In [17]:
from pathlib import Path

ROOT = Path.cwd()
while not (ROOT / "README.md").exists() and ROOT != ROOT.parent:
    ROOT = ROOT.parent

if not (ROOT / "README.md").exists():
    raise FileNotFoundError("Repo root not found. Open the project folder in VS Code (WSL) and rerun.")

DATA_DIR = ROOT / "data"
RAW_DIR = DATA_DIR / "raw"
PROCESSED_DIR = DATA_DIR / "processed"

print("ROOT:", ROOT)
print("RAW_DIR exists:", RAW_DIR.exists(), RAW_DIR)
print("PROCESSED_DIR exists:", PROCESSED_DIR.exists(), PROCESSED_DIR)

ROOT: /home/mehdi/BEE2041/bee2041-hyperinflation-project
RAW_DIR exists: True /home/mehdi/BEE2041/bee2041-hyperinflation-project/data/raw
PROCESSED_DIR exists: True /home/mehdi/BEE2041/bee2041-hyperinflation-project/data/processed


In [18]:
import pandas as pd
import re
from pathlib import Path

DATA_FILE = ROOT /"data"/"raw"/"inflation_worlbank.csv"
OUT = ROOT /"data"/"processed"/"inflation_worldbank_tidy.csv"

df = pd.read_csv(DATA_FILE, skiprows=4, encoding="utf-8-sig")

# Identify year columns like "1960"..."2024"
year_columns = [c for c in df.columns if re.fullmatch(r"\d{4}", str(c))]
identifier_columns = [c for c in df.columns if c not in year_columns]

tidy = df.melt(
    id_vars=identifier_columns,
    value_vars=year_columns,
    var_name="year",
    value_name="inflation_annual_%",
)

tidy["year"] = tidy["year"].astype(int)
tidy["inflation_annual_%"] = pd.to_numeric(tidy["inflation_annual_%"], errors="coerce")

# World Bank country metadata has "Region" blank for aggregates; we'll filter using that file in the next step.
tidy = tidy.dropna(subset=["inflation_annual_%"])

tidy.to_csv(OUT, index=False)
print("Saved:", OUT, "rows:", len(tidy))

df = pd.read_csv(DATA_FILE, skiprows=4, encoding="utf-8-sig")
df = df.dropna(axis=1, how='all')

Saved: /home/mehdi/BEE2041/bee2041-hyperinflation-project/data/processed/inflation_worldbank_tidy.csv rows: 11295
