In [None]:
import requests
import pandas as pd
from pathlib import Path


In [None]:
OUT = Path(r"C:\Users\Admin\Desktop\TANPHAT\hocotruong\Năm ba 2025-2026\HK1_A\Thu thập và tiền xử lý dữ liệu\Đồ_án_GDP\data\data_raw")
OUT.mkdir(parents=True, exist_ok=True)

In [None]:
def crawl_worldbank_raw(indicator: str, country: str = "VN", per_page: int = 1000) -> pd.DataFrame:

    base = f"https://api.worldbank.org/v2/country/{country}/indicator/{indicator}"
    page = 1
    all_rows = []

    while True:
        params = {"format": "json", "per_page": per_page, "page": page}
        r = requests.get(base, params=params, timeout=30)
        r.raise_for_status()
        payload = r.json()

        # payload = [metadata, data]
        if not isinstance(payload, list) or len(payload) < 2 or payload[1] is None:
            break

        meta, data = payload[0], payload[1]
        if not data:
            break


        for item in data:
            row = {
              
                "indicator_id":      (item.get("indicator") or {}).get("id"),
                "indicator_value":   (item.get("indicator") or {}).get("value"),
                "country_id":        (item.get("country") or {}).get("id"),
                "country_value":     (item.get("country") or {}).get("value"),
                "countryiso3code":   item.get("countryiso3code"),
                "date":              item.get("date"),
                "value":             item.get("value"),     
                "unit":              item.get("unit"),
                "obs_status":        item.get("obs_status"),
                "decimal":           item.get("decimal"),
            }
            all_rows.append(row)

        # dừng nếu đã tới trang cuối
        total_pages = meta.get("pages", 1)
        if page >= total_pages:
            break
        page += 1

    # Đưa vào DataFrame 
    df = pd.DataFrame(all_rows, columns=[
        "indicator_id","indicator_value",
        "country_id","country_value","countryiso3code",
        "date","value","unit","obs_status","decimal"
    ])
    return df

def save_worldbank_raw(indicator: str, country: str = "VN", out_dir: Path = None) -> Path:
    df = crawl_worldbank_raw(indicator, country)
    if out_dir is None:
        out_dir = Path(".")
    out_dir.mkdir(parents=True, exist_ok=True)
    out_path = out_dir / f"{indicator}_{country}_RAW.csv"
    df.to_csv(out_path, index=False, encoding="utf-8-sig")
    print(f"✅ Lưu {len(df)} dòng -> {out_path}")
    return out_path


In [None]:
# GDP (current USD)
save_worldbank_raw("NY.GDP.MKTP.CD", "VN", OUT)

In [None]:

# FDI inflows (USD)
save_worldbank_raw("BX.KLT.DINV.CD.WD", "VN", OUT)

In [None]:
# GDP growth (%)
save_worldbank_raw("NY.GDP.MKTP.KD.ZG", "VN", OUT)
