In [221]:
import requests
import numpy as np
import pandas as pd

In [222]:
res = requests.get("https://ec.europa.eu/eurostat/api/dissemination/statistics/1.0/data/sdg_13_40?format=JSON&lang=EN")
raw = res.json()

In [268]:
def parse_df_from_eurostat(values: dict[str: str], d: list[str], rows: list[str], cols: list[str], label:str="", col_label:str="") -> list[pd.DataFrame]:
	"""
	Given a dict of indexed scaler values and the label lists, parse into a list of dataframes
	Note: Across all tables, the size (row x col) should be the same!

	:param values: A dict with indexed scaler values, ie {'572': 145.5} where the index is the cell that value occupies
	:param tables: A list of the table labels
	:param rows: A list of row labels
	:param cols: A list of column labels 
	:param label: The optional column label for the row keys
	:param col_label: The optional label for the column header group
	:returns: A dataframe with multiindexes for each table
	"""
	calc_index = lambda i, j, k: k + (j * len(cols)) + (i * (len(cols) * len(rows)))
	data_3d = []
	for i in range(len(d)):
		data_2d = []
		for j in range(len(rows)):
			builder_row = []
			for k in range(len(cols)):
				if str(calc_index(i, j, k)) in values:
					builder_row.append(values[str(calc_index(i, j, k))])
				else:
					builder_row.append(np.NaN)
			data_2d.append(builder_row)
		data_3d.append(data_2d)
	

	dataframes = []
	for table in data_3d:
		df = pd.DataFrame(table)
		dataframes.append(df)
	
	df = pd.concat(dataframes, axis=1)
	cols = [l[0] for l in cols]
	df.columns = pd.MultiIndex.from_product([d, cols], names=["table", col_label])
	if label:
		df[label] = [i[0] for i in rows]

	return df

In [286]:
def parse_from_eurostat_raw(raw:dict) -> pd.DataFrame:
	"""
	Parses a dataframe from the raw eurostat output

	:param raw: The raw API output as a parsed dict
	:returns: A dataframe from `parse_df_from_eurostat`
	"""
	rows = list(raw["dimension"]["geo"]["category"]["label"].items())
	cols = list(raw["dimension"]["time"]["category"]["label"].items())

	tables = []
	for stat in raw["dimension"]["statinfo"]["category"]["label"].items():
		for unit in raw["dimension"]["unit"]["category"]["label"].items():
			tables.append(stat[0] + unit[0])

	return tables, parse_df_from_eurostat(raw["value"], tables, rows, cols, label="country", col_label="time")

In [287]:
tables, df = parse_from_eurostat_raw(raw)
df.head()

table,VAL_AMIO_EUR,VAL_AMIO_EUR,VAL_AMIO_EUR,VAL_AMIO_EUR,VAL_AMIO_EUR,VAL_AMIO_EUR,VAL_AMIO_EUR,VAL_AMIO_EUR,VAL_AMIO_EUR,VAL_AMIO_EUR,...,AVG_30YEUR_HAB,AVG_30YEUR_HAB,AVG_30YEUR_HAB,AVG_30YEUR_HAB,AVG_30YEUR_HAB,AVG_30YEUR_HAB,AVG_30YEUR_HAB,AVG_30YEUR_HAB,AVG_30YEUR_HAB,country
time,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,Unnamed: 21_level_1
0,6390.0,6191.0,9519.0,19168.0,8094.0,3341.0,3301.0,15780.0,2282.0,5072.0,...,28.26,28.82,29.25,30.26,31.73,33.19,31.92,36.19,39.5,EU27_2020
1,,,,,,,,,,,...,,,,,,,,,,EU28
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.49,10.43,10.76,10.8,11.34,12.08,9.49,40.3,42.26,BE
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,16.51,21.8,21.96,22.12,22.28,22.43,22.6,22.79,24.44,BG
4,0.0,0.0,0.0,0.0,0.0,9.0,0.0,91.0,0.0,0.0,...,46.06,45.94,46.05,47.74,48.03,47.84,48.24,51.1,50.52,CZ


In [290]:
df.xs(tables[0], level=0, axis=1).head()

time,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,6390.0,6191.0,9519.0,19168.0,8094.0,3341.0,3301.0,15780.0,2282.0,5072.0,...,21797.0,12976.0,11552.0,9837.0,29952.0,22641.0,25111.0,14470.0,59437.0,52259.0
1,,,,,,,,,,,...,,,,,,,,,,
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,727.0,0.0,129.0,29.0,203.0,275.0,513.0,10723.0,807.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1104.0,1117.0,0.0,0.0,0.0,0.0,6.0,2.0,37.0
4,0.0,0.0,0.0,0.0,0.0,9.0,0.0,91.0,0.0,0.0,...,1856.0,16.0,0.0,62.0,667.0,143.0,0.0,165.0,622.0,72.0
