# D6
## Presence of foreigners divided by industry sector
### [IstatData](https://esploradati.istat.it/databrowser/#/en/dw/categories/IT1,Z0500LAB,1.0/LAB_OFFER/LAB_OFF_EMPLOY/DCCV_OCCUPATIT1/DCCV_OCCUPATIT1_SECTECOACT/IT1,150_938_DF_DCCV_OCCUPATIT1_9,1.0)

In [None]:
#!pip install pandas requests requests_cache xmltodict numpy

In [14]:
import pandas as pd
import json
import requests
import xmltodict
from datetime import datetime
import os
import numpy as np

In [13]:
# 5 - QUERY WITH FILTERS
# Query all residents, both sexes, total not divided by age and all marital status.
response = requests.get('https://esploradati.istat.it/SDMXWS/rest/data/150_938_DF_DCCV_OCCUPATIT1_9/A.IT.9.Y15-89..FRG+ITL..+0020+0025+0026+A+F.9.....')
print(response.status_code)

if response.status_code == 200:
    content = response.content
    
    if len(content) > 0:
        try:
            xml_data = xmltodict.parse(content)
            json_string_data = json.dumps(xml_data,
                                    allow_nan = True, # If we hadn't set allow_nan to
                                                      # true we would have got
                                                      # ValueError: Out of range float
                                                      # values are not JSON compliant
                                    indent = 6) # Indentation can be used for pretty-printing
            # Now you can work with the parsed JSON data
        except json.JSONDecodeError as e:
            print("Error decoding JSON:", e)
    else:
        print("Empty content received.")
else:
    print("Request failed with status code:", response.status_code)

#print(json_string_data)
type(json_string_data)

200


str

In [18]:
# Load the JSON string
data = json.loads(json_string_data)

# Translation dictionary for occupation codes
occupation_translation = {
    "A": "agriculture, forestry and fishing",
    "0020": "industry",
    "F": "construction",
    "0026": "trade, hotels and restaurants",
    "0025": "other services activity"
}

# Extract information and structure it as needed
result = {
    "foreign": [],
    "italian": []
}

# Loop through each series in the JSON data
for series in data["message:GenericData"]["message:DataSet"]["generic:Series"]:
    series_key = series["generic:SeriesKey"]
    obs_values = series["generic:Obs"]

    # Extract relevant values from the series
    obs_values_list = [
        {
            "OCCUPATION": occupation_translation.get(series_key["generic:Value"][7]["@value"], series_key["generic:Value"][7]["@value"]),
            "YEAR": obs["generic:ObsDimension"]["@value"],
            "VALUE": int(np.rint(float(obs["generic:ObsValue"]["@value"])))
        }
        for obs in obs_values
    ]

    # Append the extracted values to the corresponding category
    if series_key["generic:Value"][5]["@value"] == "FRG":
        result["foreign"].extend(obs_values_list)
    elif series_key["generic:Value"][5]["@value"] == "ITL":
        result["italian"].extend(obs_values_list)

# Convert the resulting dictionary to a JSON string
result_json = json.dumps(result, indent=2)

# Print or use the resulting JSON string
#print(result_json)

In [20]:
# Convert immigrants_distribution string to JSON object
occupations_rate_json = json.loads(result_json)

# Specify the folder path to save the JSON file
folder_path = "../_datasets/Clean/D6"

# Create the folder if it doesn't exist
os.makedirs(folder_path, exist_ok=True)

# Define the filename for the JSON file
filename = "occupations_rate.json"

# Generate the file path
file_path = os.path.join(folder_path, filename)

# Save the immigrants_demographic JSON object to file
with open(file_path, "w") as file:
    json.dump(occupations_rate_json, file, indent=4)

print(f"JSON data saved to: {file_path}")

JSON data saved to: ../_datasets/Clean/D6/occupations_rate.json
