# D3
## ISTAT SDMX - Resident population on 1st January
### [See on IstatData](https://esploradati.istat.it/databrowser/#/it/dw/categories/IT1,POP,1.0/POP_POPULATION/DCIS_POPRES1/IT1,22_289_DF_DCIS_POPRES1_1,1.0)

In [3]:
import pandas as pd
import json
import requests
import xmltodict
from datetime import datetime
import os

## 1 - Explore datastructure
`http://sdmx.istat.it/SDMXWS/rest/datastructure/IT1/DCIS_POPRES1/`

## 2 - The meaning of the dimensions of the dataset
`http://sdmx.istat.it/SDMXWS/rest/codelist/IT1/CL_ETA1`

## 3 - Explore values in dimensions
`http://sdmx.istat.it/SDMXWS/rest/availableconstraint/29_956`

## 4 - Explore meaning of the values of a dimension
`http://sdmx.istat.it/SDMXWS/rest/codelist/IT1/CL_STATCIV2/`

## 5 - Test query with filters
`http://sdmx.istat.it/SDMXWS/rest/data/22_289/.TOTAL.IT.9.99.`

In [41]:
# 5 - QUERY WITH FILTERS
# Query all residents, both sexes, total not divided by age and all marital status.
response = requests.get('https://esploradati.istat.it/SDMXWS/rest/data/22_289/A.IT.JAN.9.TOTAL.99')
print(response.status_code)

if response.status_code == 200:
    content = response.content
    
    if len(content) > 0:
        try:
            xml_data = xmltodict.parse(content)
            json_string_data = json.dumps(xml_data,
                                    allow_nan = True, # If we hadn't set allow_nan to
                                                      # true we would have got
                                                      # ValueError: Out of range float
                                                      # values are not JSON compliant
                                    indent = 6) # Indentation can be used for pretty-printing
            # Now you can work with the parsed JSON data
        except json.JSONDecodeError as e:
            print("Error decoding JSON:", e)
    else:
        print("Empty content received.")
else:
    print("Request failed with status code:", response.status_code)

print(json_string_data)
type(json_string_data)

200
{
      "message:GenericData": {
            "@xmlns:footer": "http://www.sdmx.org/resources/sdmxml/schemas/v2_1/message/footer",
            "@xmlns:generic": "http://www.sdmx.org/resources/sdmxml/schemas/v2_1/data/generic",
            "@xmlns:message": "http://www.sdmx.org/resources/sdmxml/schemas/v2_1/message",
            "@xmlns:common": "http://www.sdmx.org/resources/sdmxml/schemas/v2_1/common",
            "@xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance",
            "@xmlns:xml": "http://www.w3.org/XML/1998/namespace",
            "message:Header": {
                  "message:ID": "IREF000015",
                  "message:Test": "true",
                  "message:Prepared": "2023-11-06T18:07:52",
                  "message:Sender": {
                        "@id": "NOT_CONFIGURED"
                  },
                  "message:Structure": {
                        "@structureID": "IT1_22_289_1_0",
                        "@dimensionAtObservation": "TIME_PERIOD",


str

In [42]:
# Parse the JSON string into a dictionary
data = json.loads(json_string_data)

# Extract the relevant data in the desired format
series = data['message:GenericData']['message:DataSet']['generic:Series']

resident_italian_population = {}
territory = None

for key in series['generic:SeriesKey']['generic:Value']:
    if key['@id'] == 'REF_AREA':
        territory = key['@value']
        break

if territory:
    resident_italian_population[territory] = []

for obs in series['generic:Obs']:
    time_period = obs['generic:ObsDimension']['@value']
    obs_value = obs['generic:ObsValue']['@value']
    resident_italian_population[territory].append({"TIME_PERIOD": time_period, "ObsValue": obs_value})

# Convert the result to JSON
resident_italian_population_json = json.dumps(resident_italian_population, indent=4)
print(resident_italian_population_json)
type(resident_italian_population_json)

{
    "IT": [
        {
            "TIME_PERIOD": "2019",
            "ObsValue": "59816673"
        },
        {
            "TIME_PERIOD": "2020",
            "ObsValue": "59641488"
        },
        {
            "TIME_PERIOD": "2021",
            "ObsValue": "59236213"
        },
        {
            "TIME_PERIOD": "2022",
            "ObsValue": "59030133"
        },
        {
            "TIME_PERIOD": "2023",
            "ObsValue": "58850717"
        }
    ]
}


str

In [45]:
# Save the new file

# Specify the folder path to save the JSON file
folder_path = "../_datasets/Clean/D3"

# Create the folder if it doesn't exist
os.makedirs(folder_path, exist_ok=True)

# Define the filename for the JSON file
filename = "resident_italian_population.json"

# Generate the file path
file_path = os.path.join(folder_path, filename)

# Save the resident Italian population JSON to the file
with open(file_path, "w") as file:
    file.write(resident_italian_population_json)

print(f"JSON data saved to: {file_path}")

JSON data saved to: ../_datasets/Clean/D3/resident_italian_population.json


# Query all ages in order to calculate the average age of the population

In [5]:
# QUERY WITH FILTERS
# Query all residents, both sexes, total not divided by age and all marital status.
response = requests.get('https://esploradati.istat.it/SDMXWS/rest/data/22_289/A.IT.JAN.9..99')
print(response.status_code)

if response.status_code == 200:
    content = response.content
    
    if len(content) > 0:
        try:
            xml_data = xmltodict.parse(content)
            json_string_data_age = json.dumps(xml_data,
                                    allow_nan = True, # If we hadn't set allow_nan to
                                                      # true we would have got
                                                      # ValueError: Out of range float
                                                      # values are not JSON compliant
                                    indent = 6) # Indentation can be used for pretty-printing
            # Now you can work with the parsed JSON data
        except json.JSONDecodeError as e:
            print("Error decoding JSON:", e)
    else:
        print("Empty content received.")
else:
    print("Request failed with status code:", response.status_code)

# print(json_string_data_age)
type(json_string_data_age)

200


str

In [6]:
# Initialize an empty list to store age data
age_data = []

# Parse the JSON string into a dictionary
age_data_dict = json.loads(json_string_data_age)

# Extract the series data
series = age_data_dict["message:GenericData"]["message:DataSet"]["generic:Series"]

for series_data in series:
    series_key = series_data["generic:SeriesKey"]
    obs_list = series_data["generic:Obs"]

    # Extract territory (REF_AREA)
    territory = None
    for key in series_key["generic:Value"]:
        if key["@id"] == "REF_AREA":
            territory = key["@value"]
            break

    for obs in obs_list:
        # Extract age from SeriesKey
        age_key = next(
            (value for value in series_key["generic:Value"] if value["@id"] == "AGE"),
            None,
        )

        # Skip the age "TOTAL"
        if age_key and age_key["@value"] != "TOTAL":
            time_period = int(obs["generic:ObsDimension"]["@value"])  # Convert to integer
            obs_value = int(obs["generic:ObsValue"]["@value"])  # Convert to integer

            # Handle the exception for age "Y_GE100"
            age = age_key["@value"]
            if age == "Y_GE100":
                age = 100  # Set the age to "100 or more"
            else:
                # Translate age to a string by removing the 'Y'
                age = int(age[1:])  # Convert to a string after removing 'Y'

            # Store the data by age
            age_data.append({
                "AGE": age,
                "YEAR": time_period,
                "POPULATION": obs_value,
            })

# Convert the age data to JSON
resident_italian_population_age_json = json.dumps(age_data, indent=4)

print(resident_italian_population_age_json)
type(resident_italian_population_age_json)

[
    {
        "AGE": 100,
        "YEAR": 2019,
        "POPULATION": 14132
    },
    {
        "AGE": 100,
        "YEAR": 2020,
        "POPULATION": 14804
    },
    {
        "AGE": 100,
        "YEAR": 2021,
        "POPULATION": 17177
    },
    {
        "AGE": 100,
        "YEAR": 2022,
        "POPULATION": 19714
    },
    {
        "AGE": 100,
        "YEAR": 2023,
        "POPULATION": 21872
    },
    {
        "AGE": 0,
        "YEAR": 2019,
        "POPULATION": 431007
    },
    {
        "AGE": 0,
        "YEAR": 2020,
        "POPULATION": 414974
    },
    {
        "AGE": 0,
        "YEAR": 2021,
        "POPULATION": 404956
    },
    {
        "AGE": 0,
        "YEAR": 2022,
        "POPULATION": 400626
    },
    {
        "AGE": 0,
        "YEAR": 2023,
        "POPULATION": 395348
    },
    {
        "AGE": 1,
        "YEAR": 2019,
        "POPULATION": 455746
    },
    {
        "AGE": 1,
        "YEAR": 2020,
        "POPULATION": 436785
    },
    {
   

str

## ATTENTION: Istat doesn't distinguish beetween the age 100 or more, the all gruopped under "Y_GE100".

In [17]:
# Save the new file

# Specify the folder path to save the JSON file
folder_path = "../_datasets/Clean/D3"

# Create the folder if it doesn't exist
os.makedirs(folder_path, exist_ok=True)

# Define the filename for the JSON file
filename = "resident_italian_population_age.json"

# Generate the file path
file_path = os.path.join(folder_path, filename)

# Save the resident Italian population JSON to the file
with open(file_path, "w") as file:
    file.write(resident_italian_population_age_json)

print(f"JSON data saved to: {file_path}")

JSON data saved to: ../_datasets/Clean/D3/resident_italian_population_age.json
