In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import acf

# Caricamento dataset M3 Monthly (link Kaggle o CSV locale)
url = "https://view.officeapps.live.com/op/view.aspx?src=https%3A%2F%2Fforecasters.org%2Fdata%2Fm3comp%2FM3C.xls&wdOrigin=BROWSELINK"
df = pd.read_csv(url)

# Ogni serie ha ID, Categoria, Dati
# Supponiamo formato: ID | Category | Value1 | Value2 | ... 
# Adattiamo in base al file reale

def compute_metrics(series):
    """Calcola indicatori statistici della serie"""
    y = series.dropna().values
    n = len(y)
    mean = np.mean(y)
    std = np.std(y)
    cv = std / mean if mean != 0 else np.nan
    # autocorrelazione a lag 12 (stagionalità annuale)
    if n > 13:
        ac = acf(y, nlags=12, fft=True)
        seas12 = ac[12]
    else:
        seas12 = np.nan
    return {"length": n, "mean": mean, "std": std, "cv": cv, "seasonality": seas12}

# Costruzione tabella metriche
records = []
for idx, row in df.iterrows():
    series_id = row["Series"]
    category = row["Category"]
    values = row.drop(["Series", "Category"])
    metrics = compute_metrics(values)
    metrics.update({"id": series_id, "category": category})
    records.append(metrics)

metrics_df = pd.DataFrame(records)

# Selezione 3 serie per categoria
selected = []
for cat, group in metrics_df.groupby("category"):
    # 1) Serie più stagionale
    s1 = group.loc[group["seasonality"].idxmax()]
    # 2) Serie più rumorosa (CV alto)
    s2 = group.loc[group["cv"].idxmax()]
    # 3) Serie intermedia (vicina alla mediana CV)
    median_cv = group["cv"].median()
    s3 = group.iloc[(group["cv"]-median_cv).abs().argsort()[:1]].iloc[0]
    selected.extend([s1, s2, s3])

selected_df = pd.DataFrame(selected)

# Salva risultati
selected_df.to_csv("M3_monthly_selected.csv", index=False)
print("Serie selezionate per categoria salvate in M3_monthly_selected.csv")

# Esempio: grafico di una serie selezionata
example_id = selected_df.iloc[0]["id"]
y = df[df["Series"] == example_id].drop(["Series","Category"], axis=1).values.flatten()
plt.plot(y)
plt.title(f"Serie {example_id} ({selected_df.iloc[0]['category']})")
plt.show()


KeyError: 'Series'