In [21]:
# Import modules
import requests
import json
from xml.etree import ElementTree as ET
import pandas as pd
import csv
from pathlib import Path
import matplotlib.pyplot as plt

In [22]:
# OECD data API documentation: https://gitlab.algobank.oecd.org/public-documentation/dotstat-migration/-/raw/main/OECD_Data_API_documentation.pdf
# Query URL with all parameters is provided in whole by the OECD Data Explorer https://data-explorer.oecd.org/
# No API key is needed
url = "https://sdmx.oecd.org/public/rest/data/OECD.ELS.HD,DSD_SHA@DF_SHA,1.0/.A.EXP_HEALTH.PT_B1GQ._T.._T.._T...?startPeriod=1970&endPeriod=2023&dimensionAtObservation=AllDimensions"

In [23]:
# Pass the API URL to the get function. Include headers to specify json

# Define the headers
headers = {
    'Accept': 'application/vnd.sdmx.data+json; charset=utf-8; version=1.0'
}

response = requests.get(url, headers=headers)
#response = requests.get(url)
print(response.status_code)

200


In [24]:
# Check if the request was successful
if response.status_code == 200:
    # Print the raw response content to inspect
    print("Response Content:")
    print(response.content[:500])  # Print the first 500 characters to inspect
    
    # Check the Content-Type header
    content_type = response.headers.get('Content-Type')
    print("Content-Type:", content_type)
    
    # If the response is JSON, parse it
    if 'application/vnd.sdmx.data+json' in content_type:
        data = response.json()
        print(data)
    else:
        print("Response is not in JSON format.")
else:
    print('Failed to retrieve data. Status code:', response.status_code)
    print("Response Content:")
    print(response.content)

Response Content:
b'{"meta":{"schema":"https://raw.githubusercontent.com/sdmx-twg/sdmx-json/master/data-message/tools/schemas/1.0/sdmx-json-data-schema.json","id":"IREF010332","prepared":"2024-07-15T05:40:49Z","test":true,"contentLanguages":["en","en-US"],"sender":{"id":"Disseminate_Final_DMZ","name":"unknown","names":{"en-US":"unknown"}}},"data":{"dataSets":[{"action":"Information","links":[{"urn":"urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=OECD.ELS.HD:DSD_SHA(1.0)","rel":"DataStructure"}],"annotation'
Content-Type: application/vnd.sdmx.data+json; charset=utf-8; version=1.0
{'meta': {'schema': 'https://raw.githubusercontent.com/sdmx-twg/sdmx-json/master/data-message/tools/schemas/1.0/sdmx-json-data-schema.json', 'id': 'IREF010332', 'prepared': '2024-07-15T05:40:49Z', 'test': True, 'contentLanguages': ['en', 'en-US'], 'sender': {'id': 'Disseminate_Final_DMZ', 'name': 'unknown', 'names': {'en-US': 'unknown'}}}, 'data': {'dataSets': [{'action': 'Information', 'links': [{'urn

In [25]:
# Parse the JSON response content
data = json.loads(response.content.decode('utf-8'))

# Extract the necessary information
observations = data['data']['dataSets'][0]['observations']
dimensions = data['data']['structure']['dimensions']['observation']

# Extract dimension names for easier access
dim_values = {dim['id']: dim['values'] for dim in dimensions}

# Open a CSV file for writing
with open('../output/csv/OECD_Health_Intermittent.csv', 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)

    # Write the header row
    header = ['Country', 'Measure', 'Unit of Measure', 'Year', 'Health Expenditure']
    csvwriter.writerow(header)

    # Write the data rows
    for key, value in observations.items():
        dim_indices = key.split(':')
        row = [
            dim_values['REF_AREA'][int(dim_indices[0])]['name'],  # Reference Area
            dim_values['MEASURE'][int(dim_indices[1])]['name'],   # Measure
            dim_values['UNIT_MEASURE'][int(dim_indices[2])]['name'],      # Unit of Measure
            dim_values['TIME_PERIOD'][int(dim_indices[-1])]['id'], # Time Period (assuming the last index for TIME_PERIOD)
            value[0]  # Observation Value
        ]      
        csvwriter.writerow(row)

print("Data has been written to output.csv")

Data has been written to output.csv


In [26]:
# Create file path
health_expend = Path("../output/csv/OECD_Health_Intermittent.csv")
# Read in Data
health_expend_df = pd.read_csv(health_expend)

display(health_expend_df)

Unnamed: 0,Country,Measure,Unit of Measure,Year,Health Expenditure
0,Brazil,Expenditure,Percentage of GDP,2005,8.035
1,India,Expenditure,Percentage of GDP,2003,4.134
2,India,Expenditure,Percentage of GDP,2002,4.373
3,Belgium,Expenditure,Percentage of GDP,2013,10.575
4,India,Expenditure,Percentage of GDP,2009,3.594
...,...,...,...,...,...
1856,Brazil,Expenditure,Percentage of GDP,2010,7.945
1857,Brazil,Expenditure,Percentage of GDP,2009,8.395
1858,Brazil,Expenditure,Percentage of GDP,2008,8.011
1859,Brazil,Expenditure,Percentage of GDP,2007,8.202


In [27]:
# Manual mapping dictionary to match names of few countries between both files
country_mapping = {
    "United States": "United States of America",
    "T√ºrkiye": "Türkiye",
    "China (People‚Äôs Republic of)": "China",
    "Slovak Republic": "Slovakia"
}
# Standardize country names in gdp file
health_expend_df['Country'] = health_expend_df['Country'].replace(country_mapping)
health_expend_df

Unnamed: 0,Country,Measure,Unit of Measure,Year,Health Expenditure
0,Brazil,Expenditure,Percentage of GDP,2005,8.035
1,India,Expenditure,Percentage of GDP,2003,4.134
2,India,Expenditure,Percentage of GDP,2002,4.373
3,Belgium,Expenditure,Percentage of GDP,2013,10.575
4,India,Expenditure,Percentage of GDP,2009,3.594
...,...,...,...,...,...
1856,Brazil,Expenditure,Percentage of GDP,2010,7.945
1857,Brazil,Expenditure,Percentage of GDP,2009,8.395
1858,Brazil,Expenditure,Percentage of GDP,2008,8.011
1859,Brazil,Expenditure,Percentage of GDP,2007,8.202


In [28]:
health_expend_df.dtypes

Country                object
Measure                object
Unit of Measure        object
Year                    int64
Health Expenditure    float64
dtype: object

In [29]:
# Specify the file path where you want to save the CSV file
csv_file_path = '../output/csv/OECD_Health_Expenditure.csv'

# Write final_df to CSV
health_expend_df.to_csv(csv_file_path, index=False)

print(f"DataFrame successfully written to {csv_file_path}")

DataFrame successfully written to ../output/csv/OECD_Health_Expenditure.csv
