# Eurostat IPCA 

|dataset|numero di valori|
|-|-:|
|teicp000      |    1.851|
|prc_hicp_manr |3.381.206|
|prc_hicp_midx |7.480.674|
|prc_hicp_mmor |3.576.459|
|prc_hicp_mv12r|3.186.181|

In [None]:
import requests, pandas as pd
from itertools import product
from tabulate import tabulate

def parse_eurostat_data(data):
    """
    Estrae e organizza i dati Eurostat in un DataFrame a partire dal JSON originale.

    Parametri:
    - data: dict, risposta JSON da Eurostat API

    Ritorna:
    - pd.DataFrame con colonne: freq_code, freq_label, unit_code, unit_label, 
      coicop_code, coicop_label, geo_code, geo_label, time, value
    """
    dimensions = data["dimension"]
    
    # Etichette
    freq_labels = dimensions["freq"]["category"]["label"]
    unit_labels = dimensions["unit"]["category"]["label"]
    coicop_labels = dimensions["coicop"]["category"]["label"]
    geo_labels = dimensions["geo"]["category"]["label"]
    time_labels = dimensions["time"]["category"]["label"]
    
    # Ordine dimensioni e combinazioni
    freq_keys = list(freq_labels.keys())
    unit_keys = list(unit_labels.keys())
    coicop_keys = list(coicop_labels.keys())
    geo_keys = list(geo_labels.keys())
    time_keys = list(time_labels.keys())
    
    combinations = list(product(freq_keys, unit_keys, coicop_keys, geo_keys, time_keys))
    
    # Ricostruzione dati
    records = []
    for i, (f, u, c, g, t) in enumerate(combinations):
        if str(i) in data["value"]:
            records.append({
                "freq_code": f,
                "freq_label": freq_labels[f],
                "unit_code": u,
                "unit_label": unit_labels[u],
                "coicop_code": c,
                "coicop_label": coicop_labels[c],
                "geo_code": g,
                "geo_label": geo_labels[g],
                "time": time_labels[t],
                "value": data["value"][str(i)]
            })
    
    return pd.DataFrame(records)

## estrazioni massive

In [None]:
dataset = 'teicp000' # prc_hicp_manr   prc_hicp_midx    prc_hicp_mv12r
url = f"https://ec.europa.eu/eurostat/api/dissemination/statistics/1.0/data/{dataset}?format=JSON"

response = requests.get(url)

if response.status_code == 200:
    data = response.json()
    # print("- ✅ Connessione riuscita.")
    print("- 📦 Chiavi:", data.keys(), "\n")
    print("- 📐 Dimensioni disponibili:", data["dimension"].keys(), "\n")
    print("- 🔢 Numero di valori:", len(data["value"]), "\n")
else:
    print("❌ Errore:", response.status_code)
    print(response.text)

- 📦 Chiavi: dict_keys(['version', 'class', 'label', 'source', 'updated', 'value', 'status', 'id', 'size', 'dimension', 'extension']) 

- 📐 Dimensioni disponibili: dict_keys(['freq', 'indic_bt', 'nace_r2', 's_adj', 'unit', 'geo', 'time']) 

- 🔢 Numero di valori: 4043318 



In [6]:
df = parse_eurostat_data(data)
(df.head())


Unnamed: 0,freq_code,freq_label,unit_code,unit_label,coicop_code,coicop_label,geo_code,geo_label,time,value
0,M,Monthly,RCH_MV12MAVR,Moving 12 months average rate of change,CP00,All-items HICP,IT,Italy,1997-12,1.9
1,M,Monthly,RCH_MV12MAVR,Moving 12 months average rate of change,CP00,All-items HICP,IT,Italy,1998-01,1.8
2,M,Monthly,RCH_MV12MAVR,Moving 12 months average rate of change,CP00,All-items HICP,IT,Italy,1998-02,1.8
3,M,Monthly,RCH_MV12MAVR,Moving 12 months average rate of change,CP00,All-items HICP,IT,Italy,1998-03,1.8
4,M,Monthly,RCH_MV12MAVR,Moving 12 months average rate of change,CP00,All-items HICP,IT,Italy,1998-04,1.8


In [12]:
print(tabulate(df.sample(10), headers='keys', tablefmt='github', showindex=False))

| freq_code   | freq_label   | unit_code   | unit_label            | coicop_code   | coicop_label                | geo_code   | geo_label   | time    |   value |
|-------------|--------------|-------------|-----------------------|---------------|-----------------------------|------------|-------------|---------|---------|
| M           | Monthly      | RCH_A       | Annual rate of change | NRG_FOOD_S    | Energy and seasonal food    | IT         | Italy       | 2002-11 |     3   |
| M           | Monthly      | RCH_A       | Annual rate of change | CP0942        | Cultural services           | IT         | Italy       | 2003-03 |     2.9 |
| M           | Monthly      | RCH_A       | Annual rate of change | CP0442        | Refuse collection           | IT         | Italy       | 1998-12 |     3   |
| M           | Monthly      | RCH_A       | Annual rate of change | CP12322       | Articles for babies         | IT         | Italy       | 2020-05 |     0   |
| M           | Monthly     

## Italia

In [1]:
dataset = 'teicp000' # 'teicp000'    prc_hicp_manr   prc_hicp_midx    prc_hicp_mv12r
url = f"https://ec.europa.eu/eurostat/api/dissemination/statistics/1.0/data/{dataset}?geo=IT&format=JSON"
response = requests.get(url)
data = response.json()

NameError: name 'requests' is not defined

In [2]:
url

'https://ec.europa.eu/eurostat/api/dissemination/statistics/1.0/data/teicp000?geo=IT&format=JSON'

In [10]:
response = requests.get(url)

if response.status_code == 200:
    data = response.json()
    # print("- ✅ Connessione riuscita.")
    print("- 📦 Chiavi:", data.keys(), "\n")
    print("- 📐 Dimensioni disponibili:", data["dimension"].keys(), "\n")
    print("- 🔢 Numero di valori:", len(data["value"]), "\n")
else:
    print("❌ Errore:", response.status_code)
    print(response.text)

- 📦 Chiavi: dict_keys(['version', 'class', 'label', 'source', 'updated', 'value', 'status', 'id', 'size', 'dimension', 'extension']) 

- 📐 Dimensioni disponibili: dict_keys(['freq', 'indic_bt', 'nace_r2', 's_adj', 'unit', 'geo', 'time']) 

- 🔢 Numero di valori: 393758 



In [None]:
df = parse_eurostat_data(data)
df['dataset'] = dataset
print(df.head())

import sqlite3
conn = sqlite3.connect('D:/files/Eurostat.sqlite')  # o altro percorso
df.to_sql(f"ITA_{dataset}", conn, if_exists='replace', index=False)
print(f"✅ Tabella 'ITA_{dataset}' salvata con {len(df)} righe.")
conn.close()

- 🔢 Numero di valori: 393758 



KeyError: 'coicop'

## backup


In [None]:
labels = data["dimension"]["geo"]["category"]["label"]
for code, label in sorted(labels.items()):
    print(f"{code}: {label}")

In [21]:
    # === Estrai geo ===
    geo_labels = data["dimension"]["coicop"]["category"]["label"]
    df_geo = pd.DataFrame(sorted(geo_labels.items()), columns=["code", "label"])

    # === Salva in CSV ===
    df_geo.to_csv("D:/coicop_labels.csv", index=False)
    print("📁 csv salvato")

📁 csv salvato


In [3]:
for dim, details in data["dimension"].items():
    labels = details["category"]["label"]
    print(f"{dim} ({len(labels)}):")
    print("  " + " | ".join(f"{code}: {label}" for code, label in sorted(labels.items())))
    print()


freq (1):
  M: Monthly

unit (1):
  RCH_A: Annual rate of change

coicop (467):
  AP: Administered prices | APF: Fully administered prices | APM: Mainly administered prices | AP_NNRG: Administered prices, non-energy | AP_NRG: Administered prices, energy | CP00: All-items HICP | CP01: Food and non-alcoholic beverages | CP011: Food | CP0111: Bread and cereals | CP01111: Rice | CP01112: Flours and other cereals | CP01113: Bread | CP01114: Other bakery products | CP01115: Pizza and quiche | CP01116: Pasta products and couscous | CP01117: Breakfast cereals | CP01118: Other cereal products | CP0112: Meat | CP01121: Beef and veal | CP01122: Pork | CP01123: Lamb and goat | CP01124: Poultry | CP01125: Other meats | CP01126: Edible offal | CP01127: Dried, salted or smoked meat | CP01128: Other meat preparations | CP0113: Fish and seafood | CP01131: Fresh or chilled fish | CP01132: Frozen fish | CP01133: Fresh or chilled seafood | CP01134: Frozen seafood | CP01135: Dried, smoked or salted fish an

In [22]:
# Guarda quante categorie ci sono per ogni dimensione
for dim, details in data["dimension"].items():
    print(f"{dim}: {len(details['category']['index'])} items")

labels = data["dimension"]["unit"]["category"]["label"]
for code, label in sorted(labels.items()):
    print(f"{code}: {label}")

freq: 1 items
unit: 3 items
coicop: 468 items
geo: 45 items
time: 353 items
I05: Index, 2005=100
I15: Index, 2015=100
I96: Index, 1996=100


In [11]:
df.shape

(1851, 6)

In [None]:
print(len(data["value"]))  # Dovrebbe essere 13916
print(df["geo"].unique())  # Controlla se hai solo 'IT'
print(df["coicop"].nunique())  # Quanti coicop diversi sono rimasti?
print(df["time"].nunique())  # Dovrebbero essere 341 mesi
