# D1(a)
## Resident foreigners on 1st January (2019-2023)
### [IstatData](https://esploradati.istat.it/databrowser/#/it/dw/categories/IT1,POP,1.0/POP_FOREIGNIM/DCIS_POPSTRRES1/IT1,29_7_DF_DCIS_POPSTRRES1_1,1.0)

In [1]:
#!pip install pandas requests requests_cache xmltodict

In [2]:
import pandas as pd
import json
import requests
import xmltodict
from datetime import datetime
import os

In [4]:
# 1 -  Explore datastructure

response = requests.get('https://esploradati.istat.it/SDMXWS/rest/datastructure/IT1/DCIS_POPSTRRES1/')
print(response.status_code)

if response.status_code == 200:
    content = response.content
    
    if len(content) > 0:
        try:
            xml_data = xmltodict.parse(content)
            json_string_data = json.dumps(xml_data,
                                    allow_nan = True, # If we hadn't set allow_nan to
                                                      # true we would have got
                                                      # ValueError: Out of range float
                                                      # values are not JSON compliant
                                    indent = 6) # Indentation can be used for pretty-printing
            # Now you can work with the parsed JSON data
        except json.JSONDecodeError as e:
            print("Error decoding JSON:", e)
    else:
        print("Empty content received.")
else:
    print("Request failed with status code:", response.status_code)

# Uncomment the following line to see the resulting JSON string
# print(json_string_data)
type(json_string_data)

200


str

In [8]:
# 2 - Explore the meaning of the dimensions of the dataset

response = requests.get('https://esploradati.istat.it/SDMXWS/rest/codelist/IT1/CL_ITTER107')
print(response.status_code)

if response.status_code == 200:
    content = response.content
    
    if len(content) > 0:
        try:
            xml_data = xmltodict.parse(content)
            json_string_data = json.dumps(xml_data,
                                    allow_nan = True, # If we hadn't set allow_nan to
                                                      # true we would have got
                                                      # ValueError: Out of range float
                                                      # values are not JSON compliant
                                    indent = 6) # Indentation can be used for pretty-printing
            # Now you can work with the parsed JSON data
        except json.JSONDecodeError as e:
            print("Error decoding JSON:", e)
    else:
        print("Empty content received.")
else:
    print("Request failed with status code:", response.status_code)

# Uncomment the following line to see the resulting JSON string
# print(json_string_data)

200


In [9]:
# 3 -  Explore values in Dimensions

response = requests.get('https://esploradati.istat.it/SDMXWS/rest/availableconstraint/29_7_DF_DCIS_POPSTRRES1_1')
print(response.status_code)

if response.status_code == 200:
    content = response.content
    
    if len(content) > 0:
        try:
            xml_data = xmltodict.parse(content)
            json_string_data = json.dumps(xml_data,
                                    allow_nan = True, # If we hadn't set allow_nan to
                                                      # true we would have got
                                                      # ValueError: Out of range float
                                                      # values are not JSON compliant
                                    indent = 6) # Indentation can be used for pretty-printing
            # Now you can work with the parsed JSON data
        except json.JSONDecodeError as e:
            print("Error decoding JSON:", e)
    else:
        print("Empty content received.")
else:
    print("Request failed with status code:", response.status_code)

# Print is disable in documentation since the response is too long to be shown here. Uncomment to see it.
# print(json_string_data)

200


In [3]:
# 4 -  Final query with filters

response = requests.get('https://esploradati.istat.it/SDMXWS/rest/data/29_7_DF_DCIS_POPSTRRES1_1/A.IT+ITCD+ITC+ITD+ITE+ITF+ITG.JAN.9.TOTAL')
print(response.status_code)

if response.status_code == 200:
    content = response.content
    
    if len(content) > 0:
        try:
            xml_data = xmltodict.parse(content)
            json_string_data = json.dumps(xml_data,
                                    allow_nan = True, # If we hadn't set allow_nan to
                                                      # true we would have got
                                                      # ValueError: Out of range float
                                                      # values are not JSON compliant
                                    indent = 6) # Indentation can be used for pretty-printing
            # Now you can work with the parsed JSON data
        except json.JSONDecodeError as e:
            print("Error decoding JSON:", e)
    else:
        print("Empty content received.")
else:
    print("Request failed with status code:", response.status_code)

# Uncomment the following line to see the resulting JSON string
# print(json_string_data)

200


In [6]:
# Creating a nested dictonary from the response in order to create a clean JSON for our visualization

nested_dict = json.loads(json_string_data)

# Translation dictionary
sex_translation = {
    '1': 'Male',
    '2': 'Female',
    '9': 'TOTAL'
}

# Extracting information
result = []

series_data = nested_dict['message:GenericData']['message:DataSet']['generic:Series']
for series in series_data:
    series_key = series['generic:SeriesKey']
    territory = None
    sex = None

    for value in series_key['generic:Value']:
        if value['@id'] == 'REF_AREA':
            territory = value['@value']
        elif value['@id'] == 'SEX':
            sex_value = value['@value']
            sex = sex_translation.get(sex_value)

    obs_values = series['generic:Obs']
    for obs in obs_values:
        year = obs['generic:ObsDimension']['@value']
        age = series_key['generic:Value'][4]['@value']
        quantity = obs['generic:ObsValue']['@value']

        entry = {
            'Territory': territory,
            'Year': int(year),
            'Sex': sex,
            'Age': age,
            'Quantity': int(quantity)
        }
        result.append(entry)

# Convert result to JSON
immigrants_distribution = json.dumps(result)

# Uncomment the following line to see the resulting JSON string
# print(immigrants_distribution)
type(immigrants_distribution)

str

In [7]:
# Convert immigrants_distribution string to JSON object
immigrants_distribution_json = json.loads(immigrants_distribution)

# Specify the folder path to save the JSON file
folder_path = "../_datasets/Clean/D1(a)"

# Create the folder if it doesn't exist
os.makedirs(folder_path, exist_ok=True)

# Define the filename for the JSON file
filename = "immigrants_distribution.json"

# Generate the file path
file_path = os.path.join(folder_path, filename)

# Save the immigrants_demographic JSON object to file
with open(file_path, "w") as file:
    json.dump(immigrants_distribution_json, file, indent=4)

print(f"JSON data saved to: {file_path}")

JSON data saved to: ../_datasets/Clean/D1(a)/immigrants_distribution.json
