In [1]:
import requests
import numpy as np
import pandas as pd

In [2]:
res = requests.get("https://ec.europa.eu/eurostat/api/dissemination/statistics/1.0/data/sdg_13_10?format=JSON&lang=EN")
raw = res.json()

In [3]:
raw

{'version': '2.0',
 'class': 'dataset',
 'label': 'Net greenhouse gas emissions',
 'source': 'ESTAT',
 'updated': '2024-05-21T11:00:00+0200',
 'value': {'2871': 100.0,
  '2872': 104.7,
  '2873': 96.6,
  '2874': 97.2,
  '2875': 97.5,
  '2876': 101.7,
  '2877': 105.8,
  '2878': 105.4,
  '2879': 104.6,
  '2880': 102.6,
  '2881': 103.0,
  '2882': 107.8,
  '2883': 109.8,
  '2884': 116.4,
  '2885': 116.5,
  '2886': 118.3,
  '2887': 115.4,
  '2888': 112.1,
  '2889': 111.3,
  '2890': 102.7,
  '2891': 108.6,
  '2892': 106.0,
  '2893': 102.5,
  '2894': 102.9,
  '2895': 98.4,
  '2896': 101.4,
  '2897': 102.8,
  '2898': 105.6,
  '2899': 101.9,
  '2900': 103.8,
  '2901': 93.9,
  '2902': 98.3,
  '2903': 93.6,
  '2244': 100.0,
  '2245': 101.4,
  '2246': 101.1,
  '2247': 100.3,
  '2248': 103.4,
  '2249': 105.0,
  '2250': 107.8,
  '2251': 102.3,
  '2252': 106.1,
  '2253': 102.3,
  '2254': 103.1,
  '2255': 101.7,
  '2256': 101.3,
  '2257': 101.8,
  '2258': 102.2,
  '2259': 100.0,
  '2260': 98.3,
  '2261

In [4]:
def parse_df_from_eurostat(values: dict[str: str], tables: list[str], rows: list[str], cols: list[str], label:str="", col_label:str="") -> list[pd.DataFrame]:
	"""
	Given a dict of indexed scaler values and the label lists, parse into a list of dataframes
	Note: Across all tables, the size (row x col) should be the same!

	:param values: A dict with indexed scaler values, ie {'572': 145.5} where the index is the cell that value occupies
	:param tables: A list of the table labels
	:param rows: A list of row labels
	:param cols: A list of column labels 
	:param label: The optional column label for the row keys
	:param col_label: The optional label for the column header group
	:returns: A dataframe with multiindexes for each table
	"""
	calc_index = lambda i, j, k: k + (j * len(cols)) + (i * (len(cols) * len(rows)))
	data_3d = []
	for i in range(len(tables)):
		data_2d = []
		for j in range(len(rows)):
			builder_row = []
			for k in range(len(cols)):
				if str(calc_index(i, j, k)) in values:
					builder_row.append(values[str(calc_index(i, j, k))])
				else:
					builder_row.append(np.NaN)
			data_2d.append(builder_row)
		data_3d.append(data_2d)
	

	dataframes = []
	for table in data_3d:
		df = pd.DataFrame(table)
		dataframes.append(df)
	
	df = pd.concat(dataframes, axis=1)
	cols = [l[0] for l in cols]
	df.columns = pd.MultiIndex.from_product([tables, cols], names=["table", col_label])
	if label:
		df[label] = [i[0] for i in rows]

	return df

In [5]:
# tables = []
# for stat in raw["dimension"]["statinfo"]["category"]["label"].items():
# 	for unit in raw["dimension"]["unit"]["category"]["label"].items():
# 		tables.append(stat[0] + unit[0])

values = raw["value"]
rows = list(raw["dimension"]["geo"]["category"]["label"].items())
cols = list(raw["dimension"]["time"]["category"]["label"].items())
tables = [i[0] for i in raw["dimension"]["nrg_bal"]["category"]["label"].items()]

KeyError: 'nrg_bal'

In [6]:
df = parse_df_from_eurostat(values, tables, rows, cols, label="country", col_label="time")
df.head()

NameError: name 'tables' is not defined

In [7]:
df.xs(tables[0], level=0, axis=1).head()

NameError: name 'df' is not defined

In [18]:
t = []
for j in raw["dimension"]["src_crf"]["category"]["label"].items():
	for k in raw["dimension"]["unit"]["category"]["label"].items():
		t.append(j[0] + "_" + k[0])
t

('TOTXMEMONIA', 'Total (excluding memo items, including international aviation)')
('I90', 'Index, 1990=100')
('T_HAB', 'Tonnes per capita')
('TOTX4_MEMONIA', 'Total (excluding LULUCF and memo items, including international aviation)')
('I90', 'Index, 1990=100')
('T_HAB', 'Tonnes per capita')


['TOTXMEMONIA_I90',
 'TOTXMEMONIA_T_HAB',
 'TOTX4_MEMONIA_I90',
 'TOTX4_MEMONIA_T_HAB']