In [1]:
import pandas as pd
import requests
import json

url_base = "http://sdmx.istat.it/SDMXWS/rest/"
decoding = "utf-8-sig"

In [2]:
def get_available(dataflow_name:str=None, first=-1) -> pd.DataFrame:
    url = f"{url_base}dataflow?format=jsonstructure"
    response = requests.get(url)

    if response.status_code != 200:
        print(f"Failed to retrieve dataflows, status code: {response.status_code}")
        return
    
    content = response.content.decode(decoding)
    dataflows = json.loads(content).get('data', {}).get('dataflows', [])
    filtered_data = []

    for df in dataflows[:first]:
        name = df.get('name', {}).get('en', '')
        if not dataflow_name or dataflow_name.lower() in name.lower():
            filtered_data.append({
                'ID': df.get('id', ''),
                'Name': name,
                'Version': df.get('version', ''),
                'Agency': df.get('agencyID', '')
            })
    
    dataflows_df = pd.DataFrame(filtered_data)
    print(dataflows_df)


def get_structure() -> pd.DataFrame:
    return None



def search_by_id(dataflow_id:str, startPeriod:int, endPeriod:int):
    data_url = f"{url_base}data/{dataflow_id}?startPeriod={startPeriod}&endPeriod={endPeriod}&format=jsondata"
    response = requests.get(data_url)

    if response.status_code != 200:
        print(f"Failed to retrieve dataflows, status code: {response.status_code}")
        return
    
    content = response.content.decode(decoding)
    data = json.loads(content)

    series_data = data['data']['dataSets'][0]['series']
    structure = data['data']['structure']
    series_dimensions = structure['dimensions']['series']
    observation_dimension = structure['dimensions']['observation'][0]

    # Mappa degli indici temporali
    time_map = {str(i): val['id'] for i, val in enumerate(observation_dimension['values'])}

    # ID delle dimensioni (es. FREQ, CROP, ecc.)
    dim_ids = [dim['id'] for dim in series_dimensions]

    # Mappa di ciascun valore della dimensione: es. {'0': 'A', '1': 'M'} etc.
    dim_value_maps = [
        {str(i): val['id'] for i, val in enumerate(dim['values'])}
        for dim in series_dimensions
    ]

    # Dictionary for each code in the database
    data_dict = {value['id']: value['name']['en'] for dim in series_dimensions for value in dim['values']}

    records = []

    for series_key, series in series_data.items():
        dim_indices = series_key.split(':')

        # Estrai i valori delle dimensioni per questa serie
        dim_values = {
            dim_ids[i]: dim_value_maps[i].get(dim_indices[i], f"unknown_{dim_indices[i]}")
            for i in range(len(dim_indices))
        }

        for obs_index, obs_value in series['observations'].items():
            value = obs_value[0] if obs_value else None
            time_period = time_map.get(obs_index, f"unknown_{obs_index}")

            record = {
                **dim_values,
                'period': time_period,
                'value': value
            }
        records.append(record)

    df = pd.DataFrame(records)
    return data_dict, df if dict else df

In [3]:
get_available(dataflow_name='pop')

          ID                                               Name Version Agency
0   152_1181  Inactive population  - monthly data- previous ...     1.0    IT1
1   152_1183  Inactive population - quarterly seasonally adj...     1.0    IT1
2   152_1185  Inactive  population - previous regulation (un...     1.0    IT1
3    152_879                Inactive population  - monthly data     1.2    IT1
4    152_887  Inactive population - quarterly seasonally adj...     1.3    IT1
5    152_928                               Inactive  population     1.2    IT1
6    164_164    Estimated resident population - Years 2002-2019     1.1    IT1
7    22_1201  Semi-supercentenarian population (105 years an...     1.1    IT1
8     22_289                Resident population  on 1st January     1.5    IT1
9     22_315                     Resident population  - balance     2.1    IT1
10   283_138  New series of estimates on the resident popula...     1.0    IT1
11   52_1194  Population 15 years and over by highes

In [4]:
data_dict, df = search_by_id('532_930', startPeriod=2018, endPeriod=2019)

In [5]:
df

Unnamed: 0,FREQ,CITTADINANZA,CONDIZIONE_PROF,CONDIZIONE_PROF_EU,CLASSE_ETA,ITTER107,RUOLO_FAM,SESSO,STUDENTE,TIPO_DATO,period,value
0,A,FRG,99,EMP,Y_GE15,IT,TOT,1,9,POP,2019,1338.588
1,A,FRG,99,EMP,Y_GE15,IT,TOT,2,9,POP,2019,1041.116
2,A,FRG,99,EMP,Y_GE15,IT,TOT,9,9,POP,2019,2379.704
3,A,FRG,99,EMP,Y_GE15,ITC,TOT,1,9,POP,2019,469.214
4,A,FRG,99,EMP,Y_GE15,ITC,TOT,2,9,POP,2019,339.216
...,...,...,...,...,...,...,...,...,...,...,...,...
17384,Q,TOTAL,99,UNEM,Y70-74,ITE,TOT,1,9,POP,2018-Q4,0.198
17385,Q,TOTAL,99,UNEM,Y70-74,ITE,TOT,9,9,POP,2018-Q4,0.198
17386,Q,TOTAL,99,UNEM,Y70-74,ITFG,TOT,1,9,POP,2018-Q3,0.365
17387,Q,TOTAL,99,UNEM,Y70-74,ITFG,TOT,2,9,POP,2018-Q3,0.352


In [11]:
data_url = f"{url_base}data/532_930?startPeriod=2018&endPeriod=2019&format=jsondata"
response = requests.get(data_url)

content = response.content.decode(decoding)
data = json.loads(content)

In [12]:
series_data = data['data']['dataSets'][0]['series']
structure = data['data']['structure']
series_dimensions = structure['dimensions']['series']
observation_dimension = structure['dimensions']['observation'][0]

# Mappa degli indici temporali
time_map = {str(i): val['id'] for i, val in enumerate(observation_dimension['values'])}

# ID delle dimensioni (es. FREQ, CROP, ecc.)
dim_ids = [dim['id'] for dim in series_dimensions]

# Mappa di ciascun valore della dimensione: es. {'0': 'A', '1': 'M'} etc.
dim_value_maps = [
    {str(i): val['id'] for i, val in enumerate(dim['values'])}
    for dim in series_dimensions
]

In [38]:
data_dict = {value['id']: value['name']['en'] for dim in series_dimensions for value in dim['values']}
data_dict['A']

'annual'