# Our world in data â€“ data collection

- Median age
- The Economist Democracy Index
- Self-reported life satisfaction
- Human Development Index
- Gender Inequality Index

In [None]:
import json
import numpy as np
import pandas as pd
from datetime import date

In [None]:
with open("countries.json") as f:
    countries = json.load(f)

all_country_codes = set(item["country_code"] for item in countries)

## Median Age

https://ourworldindata.org/grapher/median-age

In [None]:
median_age = pd.read_csv(
    "https://ourworldindata.org/grapher/median-age.csv?v=1&csvType=full&useColumnShortNames=true",
    storage_options={'User-Agent': 'Our World In Data data fetch/1.0'}
).rename(
    columns={"Code": "country_code", "Year": "year",
             "median_age__sex_all__age_all__variant_estimates": "historical", "median_age__sex_all__age_all__variant_medium": "forecast"}
).dropna(
    subset=["country_code", "year"]
).loc[lambda row: (row["country_code"].isin(all_country_codes)) & (row["year"] < date.today().year)].assign(
    value=lambda row: round(row["historical"].fillna(row["forecast"]), 2)
).drop(
    columns=["Entity", "historical", "forecast"]
).to_dict(
    "records"
)

# median_age

In [None]:
with open("data/median_age.json", "w") as f:
    json.dump(median_age, f, indent=2)

## The Economist Democracy Index

https://ourworldindata.org/grapher/democracy-index-eiu

In [None]:
democracy_index = pd.read_csv(
    "https://ourworldindata.org/grapher/democracy-index-eiu.csv?v=1&csvType=full&useColumnShortNames=true",
    storage_options={'User-Agent': 'Our World In Data data fetch/1.0'}
).rename(
    columns={'Code': 'country_code', 'Year': 'year', 'democracy_eiu': 'value'}
).dropna(
    subset=["country_code", "year"]
).drop(
    columns=["Entity", "owid_region"]
).loc[lambda row: row["country_code"].isin(all_country_codes)].to_dict(
    "records"
)

# democracy_index

In [None]:
with open("data/democracy_index.json", "w") as f:
    json.dump(democracy_index, f, indent=2)

## Self-reported life satisfaction

https://ourworldindata.org/grapher/happiness-cantril-ladder

In [None]:
life_satisfaction = pd.read_csv(
    "https://ourworldindata.org/grapher/happiness-cantril-ladder.csv?v=1&csvType=full&useColumnShortNames=true",
    storage_options={'User-Agent': 'Our World In Data data fetch/1.0'}
).rename(
    columns={'Code': 'country_code', 'Year': 'year', 'cantril_ladder_score': 'value'}
).dropna(
    subset=["country_code", "year"]
).drop(
    columns=["Entity"]
).loc[lambda row: row["country_code"].isin(all_country_codes)].to_dict(
    "records"
)

# life_satisfaction

In [None]:
with open("data/life_satisfaction_index.json", "w") as f:
    json.dump(life_satisfaction, f, indent=2)

## Human Development Index

https://ourworldindata.org/grapher/human-development-index

In [None]:
hdi = pd.read_csv(
    "https://ourworldindata.org/grapher/human-development-index.csv?v=1&csvType=full&useColumnShortNames=true",
    storage_options={'User-Agent': 'Our World In Data data fetch/1.0'}
).rename(
    columns={'Code': 'country_code', 'Year': 'year', 'hdi__sex_total': 'value'}
).dropna(
    subset=["country_code", "year"]
).drop(
    columns=["Entity", "owid_region"]
).loc[lambda row: row["country_code"].isin(all_country_codes)].to_dict(
    "records"
)

# hdi

In [None]:
with open("data/hdi.json", "w") as f:
    json.dump(hdi, f, indent=2)

## Gender Inequality Index

https://ourworldindata.org/grapher/gender-inequality-index-from-the-human-development-report

In [None]:
gender_inequality = pd.read_csv(
    "https://ourworldindata.org/grapher/gender-inequality-index-from-the-human-development-report.csv?v=1&csvType=full&useColumnShortNames=true",
    storage_options={'User-Agent': 'Our World In Data data fetch/1.0'}
).rename(
    columns={'Code': 'country_code', 'Year': 'year', 'gii': 'value'}
).dropna(
    subset=["country_code", "year"]
).drop(
    columns=["Entity"]
).loc[lambda row: row["country_code"].isin(all_country_codes)].to_dict(
    "records"
)

# gender_inequality

In [None]:
with open("data/gender_inequality.json", "w") as f:
    json.dump(gender_inequality, f, indent=2)