In [1]:
import feedparser
import pandas as pd
from datetime import datetime

# URL del feed RSS ufficiale Eurostat
rss_url = "https://ec.europa.eu/eurostat/api/dissemination/catalogue/rss/en/statistics-update.rss"
today_str = datetime.utcnow().strftime('%Y-%m-%d')
csv_file = f"eurostat_updates_{today_str}.csv"

# Parsing feed RSS
feed = feedparser.parse(rss_url)
records = []
for entry in feed.entries:
    records.append({
        'published': entry.published,
        'title': entry.title,
        'description': entry.get('description', '').strip(),
        'link': entry.link
    })

# DataFrame e dataset_id
df = pd.DataFrame(records)
df['dataset_id'] = df['title'].str.split(' - ').str[0].str.strip()

# Riordina le colonne
df = df[['published', 'dataset_id', 'title', 'description', 'link']]
df = df.sort_values('published', ascending=False)

df['json'] = df['dataset_id'].apply(
    lambda x: f"https://ec.europa.eu/eurostat/api/dissemination/statistics/1.0/data/{x}?geo=IT"
)

# Salva su CSV
df.to_csv(csv_file, index=False)

print(df[['published','dataset_id', 'description','link']].head(300).to_markdown(index=False, tablefmt="github"))

| published             | dataset_id           | description                                                                                                                                                    | link                                                                                                    |
|-----------------------|----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------|
| 2025-07-16 23:00:00.0 | INDIC_EN             | Energy indicator                                                                                                                                               | https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/codelist/ESTAT/INDIC_EN/4.0?format=TSV&lang=EN |
| 2025-07-16 23:00:00.0 | TAG00103             | Roo

In [2]:
import pandas as pd
import glob
import os

pd.set_option('display.max_colwidth', None)  # Mostra tutto il contenuto delle celle
pd.set_option('display.max_columns', None)   # Mostra tutte le colonne
pd.set_option('display.width', 0)            # Larghezza automatica (usa l’intera finestra)

# Cartella dove si trovano i file CSV
cartella = r"D:\PKM\Github\Eurostat\codice\csv"
# Trova tutti i file CSV nella cartella
lista_file = glob.glob(os.path.join(cartella, "eurostat_updates_*.csv"))

# Leggi e concatena tutti i CSV
df = pd.concat((pd.read_csv(file) for file in lista_file), ignore_index=True)
df = df[['dataset_id', 'description', 'link', 'json']]
df = df.drop_duplicates(subset='dataset_id')

In [3]:
df[df['description'].str.contains("production", case=False, na=False)].sort_values(by='dataset_id')[['dataset_id', 'description']]

Unnamed: 0,dataset_id,description
1097,APRO_CPNH1,Crop production in national humidity
1098,APRO_CPSH1,Crop production in EU standard humidity
302,APRO_MT_PHEADM,Meat production and foreign trade - head - monthly data
633,EI_ISSPR_M,Production in services growth rates by NACE Rev.2 activity - monthly data
634,EI_ISSP_M,Production in services index by NACE Rev.2 activity - monthly data
635,EI_ISSP_Q,Production in services index by NACE Rev.2 activity - quarterly data
80,NRG_TI_COIFPM,Crude oil imports by field of production - monthly data
78,ORG_APROD,Organic production of animal products
77,ORG_AQTSPEC,Organic production of aquaculture products
74,ORG_CROPAR,Organic crop area by agricultural production methods and crops
