# D2
## Country of Citizenship
### [See on IstatData](https://esploradati.istat.it/databrowser/#/en/dw/categories/IT1,POP,1.0/POP_FOREIGNIM/DCIS_POPSTRCIT1/IT1,29_317_DF_DCIS_POPSTRCIT1_1,1.0)

In [1]:
#!pip install pandas requests requests_cache xmltodict pycountry

In [2]:
import pandas as pd
import json
import requests
import xmltodict
from datetime import datetime
import os
import pycountry # converts ISO country code to country name

In [3]:
# 1 -  Explore datastructure

response = requests.get('https://esploradati.istat.it/SDMXWS/rest/datastructure/IT1/DCIS_POPSTRCIT1/')
print(response.status_code)

if response.status_code == 200:
    content = response.content
    
    if len(content) > 0:
        try:
            xml_data = xmltodict.parse(content)
            json_string_data = json.dumps(xml_data,
                                    allow_nan = True, # If we hadn't set allow_nan to
                                                      # true we would have got
                                                      # ValueError: Out of range float
                                                      # values are not JSON compliant
                                    indent = 6) # Indentation can be used for pretty-printing
            # Now you can work with the parsed JSON data
        except json.JSONDecodeError as e:
            print("Error decoding JSON:", e)
    else:
        print("Empty content received.")
else:
    print("Request failed with status code:", response.status_code)

# Uncomment the following line to see the resulting JSON string
# print(json_string_data)
type(json_string_data)

200


str

In [4]:
# 2 - Explore the meaning of the dimensions of the dataset

response = requests.get('https://esploradati.istat.it/SDMXWS/rest/codelist/IT1/CL_TIPO_DATO15')
print(response.status_code)

if response.status_code == 200:
    content = response.content
    
    if len(content) > 0:
        try:
            xml_data = xmltodict.parse(content)
            json_string_data = json.dumps(xml_data,
                                    allow_nan = True, # If we hadn't set allow_nan to
                                                      # true we would have got
                                                      # ValueError: Out of range float
                                                      # values are not JSON compliant
                                    indent = 6) # Indentation can be used for pretty-printing
            # Now you can work with the parsed JSON data
        except json.JSONDecodeError as e:
            print("Error decoding JSON:", e)
    else:
        print("Empty content received.")
else:
    print("Request failed with status code:", response.status_code)

# Uncomment the following line to see the resulting JSON string
#print(json_string_data)

200


In [5]:
# 3 -  Explore values in Dimensions

response = requests.get('https://esploradati.istat.it/SDMXWS/rest/availableconstraint/29_317_DF_DCIS_POPSTRCIT1_1')
print(response.status_code)

if response.status_code == 200:
    content = response.content
    
    if len(content) > 0:
        try:
            xml_data = xmltodict.parse(content)
            json_string_data = json.dumps(xml_data,
                                    allow_nan = True, # If we hadn't set allow_nan to
                                                      # true we would have got
                                                      # ValueError: Out of range float
                                                      # values are not JSON compliant
                                    indent = 6) # Indentation can be used for pretty-printing
            # Now you can work with the parsed JSON data
        except json.JSONDecodeError as e:
            print("Error decoding JSON:", e)
    else:
        print("Empty content received.")
else:
    print("Request failed with status code:", response.status_code)

# Print is disable in documentation since the response is too long to be shown here. Uncomment to see it.
# print(json_string_data)

200


In [6]:
# 4 -  Final query with filters

response = requests.get('https://esploradati.istat.it/SDMXWS/rest/data/29_317_DF_DCIS_POPSTRCIT1_1/A.IT.FJAN.9..')
print(response.status_code)

if response.status_code == 200:
    content = response.content
    
    if len(content) > 0:
        try:
            xml_data = xmltodict.parse(content)
            json_string_data = json.dumps(xml_data,
                                    allow_nan = True, # If we hadn't set allow_nan to
                                                      # true we would have got
                                                      # ValueError: Out of range float
                                                      # values are not JSON compliant
                                    indent = 6) # Indentation can be used for pretty-printing
            # Now you can work with the parsed JSON data
        except json.JSONDecodeError as e:
            print("Error decoding JSON:", e)
    else:
        print("Empty content received.")
else:
    print("Request failed with status code:", response.status_code)

# Uncomment the following line to see the resulting JSON string
#print(json_string_data)

200


In [7]:
import json

# Assuming json_string_data contains the JSON structure provided

data = json.loads(json_string_data)

result = []

for series in data['message:GenericData']['message:DataSet']['generic:Series']:
    series_key = series['generic:SeriesKey']['generic:Value']
    obs_list = series['generic:Obs']
    
    territory = None
    sex = None
    citizenship = None
    
    for item in series_key:
        if item['@id'] == 'REF_AREA':
            territory = item['@value']
        elif item['@id'] == 'SEX':
            sex = item['@value']
        elif item['@id'] == 'CITIZENSHIP':
            citizenship = item['@value']
    
    for obs in obs_list:
        if isinstance(obs, dict):
            obs_dimensions = obs.get('generic:ObsDimension')
            if isinstance(obs_dimensions, dict) and obs_dimensions.get('@id') == 'TIME_PERIOD':
                year = int(obs_dimensions.get('@value'))
                obs_value = obs['generic:ObsValue']['@value']
                population = int(obs_value)
                
                result.append({
                    'TERRITORY': territory,
                    'YEAR': year,
                    'SEX': sex,
                    'CITIZENSHIP': citizenship,
                    'POPULATION': population
                })

# Convert the result list to JSON
cleaned_json = json.dumps(result, indent=4)
#print(cleaned_json)

In [8]:
# Convert the cleaned JSON string back to a list of dictionaries
data = json.loads(cleaned_json)

# Filter out entries with specific citizenship values
filtered_data = [entry for entry in data if entry['CITIZENSHIP'] not in ('ASI_W', 'ASI_E', 'XASI_C_S', 'AME_N', 'AME_C_S', 'EU28', 'AFR_E', 'AFR_C_S', 'AFR_W', 'AFR_N')]

# Convert the filtered list back to JSON
cleaned_filtered_json = json.dumps(filtered_data, indent=4)
#print(cleaned_filtered_json)

In [10]:
import json
import pycountry
# Assuming 'cleaned_filtered_json' contains the JSON data as a string after filtering

# Convert the JSON string back to a list of dictionaries
data = json.loads(cleaned_filtered_json)

# Function to get country name from abbreviation
def get_country_name(abbreviation):
    try:
        return pycountry.countries.get(alpha_2=abbreviation).name
    except AttributeError:
        return None

# Iterate through the data and replace the abbreviations with country names
for entry in data:
    country_name = get_country_name(entry['CITIZENSHIP'])
    if country_name:
        entry['CITIZENSHIP'] = country_name

# Convert the updated list back to JSON
updated_json = json.dumps(data, indent=4)
#print(updated_json)

In [11]:
# Convert immigrants_distribution string to JSON object
immigrants_citizenship_json = json.loads(updated_json)

# Specify the folder path to save the JSON file
folder_path = "../_datasets/Clean/D2"

# Create the folder if it doesn't exist
os.makedirs(folder_path, exist_ok=True)

# Define the filename for the JSON file
filename = "immigrants_citizenship.json"

# Generate the file path
file_path = os.path.join(folder_path, filename)

# Save the immigrants_demographic JSON object to file
with open(file_path, "w") as file:
    json.dump(immigrants_citizenship_json, file, indent=4)

print(f"JSON data saved to: {file_path}")

JSON data saved to: ../_datasets/Clean/D2/immigrants_citizenship.json




## RETRIVING GEOGRAPHICAL ARRIVAL FOR TOTAL IMMIGRANTS.

We will use the same dataset for two different measures. We saw the one and we will also retrive the distribution of total immigrants divided by region.

In order to do so we will mantain the same filters besides for the 

- "CITIZENSHIP" whete the geopolitical origin is declared, we will select the total.
- "TERRITORY_NEXT_RESID" for the new residence's geographic area. In our case we decided to look at group of regions. So we filter for: ITC for Northwest Italy; ITD for Northeast Italy; ITE for Central Italy; ITF for South Italy; ITG Insular Italy (Sicily and Sardinia).

In [34]:
# 5 - QUERY WITH FILTERS
# Query all immigrants from the whole world, both sexes, every age, divided by region of arrival.
response = requests.get('https://esploradati.istat.it/SDMXWS/rest/data/28_185/A..TREG.FREIGN..9.TOTAL.ITC+ITD+ITE+ITF+ITG.X1033.X1033')
print(response.status_code)

if response.status_code == 200:
    content = response.content
    
    if len(content) > 0:
        try:
            xml_data = xmltodict.parse(content)
            json_string_data = json.dumps(xml_data,
                                    allow_nan = True, # If we hadn't set allow_nan to
                                                      # true we would have got
                                                      # ValueError: Out of range float
                                                      # values are not JSON compliant
                                    indent = 6) # Indentation can be used for pretty-printing
            # Now you can work with the parsed JSON data
        except json.JSONDecodeError as e:
            print("Error decoding JSON:", e)
    else:
        print("Empty content received.")
else:
    print("Request failed with status code:", response.status_code)

# print(json_string_data)
type(json_string_data)

200


str

In [1]:
region_data = {}

series = data['message:GenericData']['message:DataSet']['generic:Series']

for s in series:
    series_key = s['generic:SeriesKey']['generic:Value']
    obs = s['generic:Obs']

    region = None

    for key in series_key:
        if key['@id'] == 'TERRITORY_NEXT_RESID':
            region = key['@value']
            break

    if region:
        if region not in region_data:
            region_data[region] = []

        for o in obs:
            year = int(o['generic:ObsDimension']['@value'])
            value = o['generic:ObsValue']['@value']

            # Set the year as a date type with the last day of the year
            year_date = datetime(year=year, month=12, day=31)

            region_data[region].append({"date": year_date.strftime("%Y-%m-%d"), "tot_immigrants": int(value)})

# Convert the region data to JSON
region_json = json.dumps(region_data, indent=4)

# print(region_json)
type(region_json)

NameError: name 'data' is not defined

In [35]:
# Save the new file

# Specify the folder path to save the JSON file
folder_path = "../_datasets/Clean"

# Create the folder if it doesn't exist
os.makedirs(folder_path, exist_ok=True)

# Define the filename for the JSON file
filename = "regional_immigration_data.json"

# Generate the file path
file_path = os.path.join(folder_path, filename)

# Save the continent JSON to the file
with open(file_path, "w") as file:
    json.dump(region_data, file, indent=4)

print(f"JSON data saved to: {file_path}")

JSON data saved to: ../_datasets/Clean/regional_immigration_data.json
