In [5]:
import feedparser
import pandas as pd
from datetime import datetime

# URL del feed RSS ufficiale Eurostat
rss_url = "https://ec.europa.eu/eurostat/api/dissemination/catalogue/rss/en/statistics-update.rss"
today_str = datetime.utcnow().strftime('%Y-%m-%d')
csv_file = f"eurostat_updates_{today_str}.csv"

In [4]:
# Parsing feed RSS
feed = feedparser.parse(rss_url)
records = []
for entry in feed.entries:
    records.append({
        'published': entry.published,
        'title': entry.title,
        'description': entry.get('description', '').strip(),
        'link': entry.link
    })

# DataFrame e dataset_id
df = pd.DataFrame(records)
df['dataset_id'] = df['title'].str.split(' - ').str[0].str.strip()

# Riordina le colonne
df = df[['published', 'dataset_id', 'title', 'description', 'link']]
df = df.sort_values('published', ascending=False)

# Salva su CSV
df.to_csv(csv_file, index=False)

# Calcolo larghezze colonne per allineamento a sinistra
col_widths = {
    'published': 20,
    'dataset_id': max(df['dataset_id'].str.len().max(), 10),
    'title': min(max(df['title'].str.len().max(), 40), 60),
    'description': min(max(df['description'].str.len().max(), 50), 80),
    'link': min(max(df['link'].str.len().max(), 40), 80)
}

# Intestazioni
print(
    f"{'published'.ljust(col_widths['published'])}  "
    f"{'dataset_id'.ljust(col_widths['dataset_id'])}  "
    f"{'title'.ljust(col_widths['title'])}  "
    f"{'description'.ljust(col_widths['description'])}  "
    f"{'link'.ljust(col_widths['link'])}"
)

# Separatore
total_width = sum(col_widths.values()) + 8
print('-' * total_width)

# Prime righe
for _, row in df.head(1000).iterrows():
    print(
        f"{row['published'].ljust(col_widths['published'])}  "
        f"{row['dataset_id'].ljust(col_widths['dataset_id'])}  "
        f"{row['description'].ljust(col_widths['description'])}  "
        f"{row['link'].ljust(col_widths['link'])}"
    )


KeyError: 'title'