In [4]:
import pandas as pd

# Load your WHO dataset CSV
who_df = pd.read_csv("data/WHO_WorldBank_merged.csv")

# Check column names
print(who_df.columns)

# Count unique countries
num_countries = who_df['country_name_x'].nunique()
print(f"Number of unique countries in WHO dataset: {num_countries}")


Index(['country_name_x', 'year', 'mental_health_value', 'indicator_code',
       'country_name_matched', 'country_name_y', 'GDP_per_capita',
       'School_enrollment', 'Unemployment'],
      dtype='object')
Number of unique countries in WHO dataset: 79


In [5]:
import requests
import pandas as pd

WHO_API_URL = "https://ghoapi.azureedge.net/api/MH_12"

# Fetch data from WHO API
response = requests.get(WHO_API_URL)
response.raise_for_status()
data = response.json()["value"]

# Convert to DataFrame
who_df = pd.DataFrame(data)

# The country code column is 'SpatialDim'
num_countries = who_df['SpatialDim'].nunique()
print(f"Number of unique countries from WHO API: {num_countries}")


Number of unique countries from WHO API: 196


In [6]:
import requests
import pandas as pd

BASE_URL = "https://ghoapi.azureedge.net/api/"
INDICATOR = "MH_12"  # Mental health indicator

rows = []
skip = 0
page_size = 1000

while True:
    params = {"$top": page_size, "$skip": skip}
    response = requests.get(f"{BASE_URL}{INDICATOR}", params=params)
    data = response.json()
    records = data.get("value", [])

    if not records:
        break

    for item in records:
        if item.get("NumericValue") is not None:
            rows.append({
                "country_name": item.get("SpatialDim"),
                "year": int(item.get("TimeDim")),
                "mental_health_value": item.get("NumericValue"),
                "indicator_code": item.get("IndicatorCode")
            })
    skip += page_size

# Convert to DataFrame
who_df = pd.DataFrame(rows)

# Count unique countries
num_countries = who_df['country_name'].nunique()
print(f"Number of unique countries in WHO dataset: {num_countries}")



# Assuming wb_df_raw is already defined somewhere for World Bank
# wb_df_raw.to_csv("data/raw/world_bank_data_raw.csv", index=False)

print("WHO data fetched:", who_df.shape)


Number of unique countries in WHO dataset: 196
WHO data fetched: (12936, 4)


In [8]:
import requests
import pandas as pd

# World Bank indicators
indicators = {
    "GDP_per_capita": "NY.GDP.PCAP.CD",
    "Unemployment": "SL.UEM.TOTL.ZS",
    "School_enrollment": "SE.SEC.ENRR"
}

wb_rows = []

for name, code in indicators.items():
    url = f"http://api.worldbank.org/v2/country/all/indicator/{code}?format=json&per_page=20000"
    response = requests.get(url)
    data = response.json()
    records = data[1]  # Actual data

    for item in records:
        wb_rows.append({
            "country_name": item["country"]["value"],  # use country name
            "year": int(item["date"]),
            "indicator": name,
            "value": item["value"]  # keep raw value, even if None
        })

# Convert to DataFrame
wb_df_raw = pd.DataFrame(wb_rows)

# Filter years to match WHO data (e.g., 2000â€“2023)
wb_df_raw = wb_df_raw[(wb_df_raw['year'] >= 2000) & (wb_df_raw['year'] <= 2023)]

# Exclude aggregate regions to get actual countries only
aggregates = [
    'World', 'Euro area', 'OECD members', 'Low income', 'High income',
    'European Union', 'East Asia & Pacific (excluding high income)',
    'Europe & Central Asia (excluding high income)', 'Middle East & North Africa',
    'Sub-Saharan Africa', 'North America', 'South Asia', 'Latin America & Caribbean'
]

wb_df_raw = wb_df_raw[~wb_df_raw['country_name'].isin(aggregates)]

# Pivot indicators to separate columns
wb_df_raw = wb_df_raw.pivot_table(index=["country_name", "year"],
                                  columns="indicator",
                                  values="value").reset_index()
wb_df_raw.columns.name = None

# Count unique countries now
num_countries_wb = wb_df_raw['country_name'].nunique()
print(f"Number of actual countries in World Bank dataset: {num_countries_wb}")



print("Raw World Bank data fetched:", wb_df_raw.shape)


Number of actual countries in World Bank dataset: 253
Raw World Bank data fetched: (6013, 5)
