In [47]:
import pandas as pd

# Carica il file
df = pd.read_csv('https://ec.europa.eu/eurostat/api/dissemination/catalogue/toc/txt?lang=en', sep='\t')
df.insert(0, "row_id", range(1, len(df) + 1))
# Funzione per convertire in Period (mese o trimestre)
def parse_period(value):
    try:
        if 'Q' in str(value):
            return pd.Period(value, freq='Q')
        elif '-' in str(value):
            return pd.Period(value, freq='M')
        else:
            return pd.NaT
    except:
        return pd.NaT

# Applica direttamente ai valori testuali
df['data start'] = df['data start'].apply(parse_period)
df['data end'] = df['data end'].apply(parse_period)


# Conversione diretta delle due colonne
df['last update of data'] = pd.to_datetime(df['last update of data'], format='%d.%m.%Y', errors='coerce')
df['last table structure change'] = pd.to_datetime(df['last table structure change'], format='%d.%m.%Y', errors='coerce')
df.shape

(11859, 9)

In [48]:
df['type'].value_counts(dropna=False)

type
dataset    8532
folder     1862
table      1465
Name: count, dtype: int64

In [52]:
df[df['type'] == 'table'].head()

Unnamed: 0,row_id,title,code,type,last update of data,last table structure change,data start,data end,values,indent,title_clean
4,5,Current account - quarterly data,ei_bpm6ca_q,table,2025-07-03,2025-07-03,1991Q1,2025Q1,291800.0,16,Current account - quarterly data
5,6,Financial account - quarterly ...,ei_bpm6fa_q,table,2025-07-03,2025-07-03,1991Q1,2025Q1,51657.0,16,Financial account - quarterly data
6,7,Current account - monthly data,ei_bpm6ca_m,table,2025-06-19,2025-06-18,1991-01,2025-04,240315.0,16,Current account - monthly data
7,8,Financial account - monthly data,ei_bpm6fa_m,table,2025-06-19,2025-06-18,1991-01,2025-04,79123.0,16,Financial account - monthly data
8,9,International investment posit...,ei_bpm6iip_q,table,2025-07-03,2025-07-03,1992Q4,2025Q1,63632.0,16,International investment position - quarterly ...


In [50]:
# Conta indentazione: numero di spazi iniziali nel titolo
df['indent'] = df['title'].str.extract(r'^( *)')[0].str.len().fillna(0).astype(int)

# Rimuove spazi per il titolo leggibile
df['title_clean'] = df['title'].str.strip()

In [51]:
df.sample()

Unnamed: 0,row_id,title,code,type,last update of data,last table structure change,data start,data end,values,indent,title_clean
9911,9912,Seats held by women in nationa...,sdg_05_50,dataset,2025-04-11,2025-04-11,NaT,NaT,1551.0,16,Seats held by women in national parliaments an...


In [53]:
# Trova la riga che corrisponde a "na10"
row_index = df[df['code'] == 'na10'].index[0]
base_indent = df.at[row_index, 'indent']

# Seleziona righe sotto "na10" e con indent > base
sub_df = df.iloc[row_index + 1:]
sub_df = sub_df[sub_df['indent'] > base_indent]

# Fermati appena trovi un nodo dello stesso livello o superiore
stop_index = sub_df[sub_df['indent'] <= base_indent].index
if not stop_index.empty:
    sub_df = sub_df[:stop_index[0]]

# Filtro finale: solo le tabelle sotto 'na10'
tables_na10 = sub_df[sub_df['type'] == 'table'].copy()

print(tables_na10[['code', 'title_clean']])

               code                                        title_clean
1287       tipsen10    Net trade balance of energy products - % of GDP
1288       tipsna20    Gross fixed capital formation at current prices
1289       tec00115                      Real GDP growth rate - volume
1290       tipsna10  Gross domestic product (GDP) at market prices ...
1291       tipspd10              Private sector debt, non-consolidated
1292      cei_pc032  Generation of waste excluding major mineral wa...
1293       tec00001            Gross domestic product at market prices
1294       tec00013                          Compensation of employees
1295       tipsna30         Terms of trade by items - 5 years % change
1296       tec00010  Final consumption expenditure of general gover...
1297       tet00004          Imports of goods and services in % of GDP
1298       tipsna51  Final consumption expenditure of households an...
1299       tet00003          Exports of goods and services in % of GDP
1300  

In [58]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

na = df[df['code'].str.startswith("na")][['title_clean', 'code', 'type','values','indent', 'row_id']]

In [60]:
def classificazione_codice(code):
    if code.startswith("nama_10r"):
        return "Conti regionali"
    elif code.startswith("nama_10"):
        return "Conti nazionali annuali"
    elif code.startswith("namq_10"):
        return "Conti nazionali trimestrali"
    elif code.startswith("naio_10"):
        return "Conti input-output"
    elif code.startswith("nasa_10") or code.startswith("nasq_10"):
        return "Conti satellite"
    elif code.startswith("naidq_10") or code.startswith("naida_10") or code.startswith("naid_10"):
        return "Conti disaggregati"
    elif code.startswith("naidsa_10") or code.startswith("naidsq_10"):
        return "Conti disaggregati (destagionalizzati)"
    else:
        return "Altro"

na['argomento'] = na['code'].apply(classificazione_codice)
na

Unnamed: 0,title_clean,code,type,values,indent,row_id,argomento
62,Gross domestic product (GDP) and main componen...,namq_10_gdp,dataset,7892886.0,20,63,Conti nazionali trimestrali
64,Household final consumption expenditure by dur...,namq_10_fcs,dataset,1047860.0,20,65,Conti nazionali trimestrali
65,Exports and imports by counterpart area,namq_10_exi,dataset,671376.0,20,66,Conti nazionali trimestrali
67,Gross value added and income by main industry ...,namq_10_a10,dataset,5683144.0,20,68,Conti nazionali trimestrali
68,Gross fixed capital formation by main asset type,namq_10_an6,dataset,2654625.0,20,69,Conti nazionali trimestrali
69,Employment by main industry (NACE Rev.2) - nat...,namq_10_a10_e,dataset,3381631.0,20,70,Conti nazionali trimestrali
73,Key indicators - quarterly data,nasq_10_ki,dataset,24575.0,20,74,Conti satellite
74,Non-financial transactions - quarterly data,nasq_10_nf_tr,dataset,3224131.0,20,75,Conti satellite
75,Gross domestic product (GDP) and main componen...,nama_10_gdp,dataset,1050045.0,16,76,Conti nazionali annuali
225,Gross domestic product (GDP) at current market...,nama_10r_2gdp,dataset,71581.0,20,226,Conti regionali
