# Preparation
This block is for preparing libraries and modules which will be used in the project. New libraries may be added during the project.

In [1]:
# Uncomment the following lines to install the required packages
# !pip install pandas
# !pip install matplotlib
# !pip install seaborn
# !pip install pycountry

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pycountry

datasets_path = 'datasets/'

# Questions

## What are the main countries of origin of immigrants in Italy, and how have these trends changed over the past decade?

## Generate mashed-up data

In [3]:
def convert_to_iso3(code):
    if pd.isna(code):  # Check if the value is NaN
        return None  # Keep it as NaN
    if code == "UK":  # Manually correct "UK" to "GBR"
        return "GBR"
    try:
        return pycountry.countries.get(alpha_2=code).alpha_3
    except AttributeError:
        # print(f"Warning: Country code '{code}' not found in ISO 3166-1 alpha-2!")  # Debugging
        return code  # Keep the original if not found

In [60]:
# load datasets
d4 = pd.read_csv(f'{datasets_path}IstatData/Immigrants - citizenship (IT1,28_185_DF_DCIS_MIGRAZIONI_2,1.0).csv')
d5 = pd.read_csv(f'{datasets_path}IstatData/Type of residence permit and citizenship (IT1,29_348_DF_DCIS_PERMSOGG1_1,1.0).csv')
d15 = pd.read_csv(f'{datasets_path}OECD/OECD.ELS.IMD,DSD_MIG@DF_MIG,1.0+ITA..A.B11.csv')

Sex_dictionary = {
  1: "Male",
  2: "Female",
  9: "Total",
}

Permit_Type_dictionary = {
  "LONGT": "Long-term",
  "NLONGT": "Short-term",
  "TOTAL": "Total",
}

d4_filtered = d4[d4['AGE'] == 'TOTAL'][['CITIZENSHIP',
                                        'SEX',
                                        # 'AGE',
                                        'TIME_PERIOD',
                                        'OBS_VALUE']]
d4_filtered = d4_filtered.rename(columns={'CITIZENSHIP': 'Country_Code',
                                          'SEX': 'Sex',
                                        #   'AGE': 'Age_range',
                                          'TIME_PERIOD': 'Year',
                                          'OBS_VALUE': 'Value'})
d4_filtered = d4_filtered[d4_filtered['Country_Code'].str.len() == 2]
# Convert country codes to ISO 3166-1 alpha-3
d4_filtered['Country_Code'] = d4_filtered['Country_Code'].apply(convert_to_iso3)
# Add column dataset = d4
d4_filtered['Dataset'] = 'Immigrants - citizenship'
d4_filtered['Dataset_Code'] = 'D4'


d5_filtered = d5[['MOSTREL_CCITENSHIP',
                  'SEX', 'TYPE_RES_PERMIT',
                  'TIME_PERIOD',
                  'OBS_VALUE']]
d5_filtered = d5_filtered.rename(columns={'MOSTREL_CCITENSHIP': 'Country_Code',
                                          'SEX': 'Sex',
                                          'TYPE_RES_PERMIT': 'Permit_Type',
                                          'TIME_PERIOD': 'Year',
                                          'OBS_VALUE': 'Value'})
d5_filtered = d5_filtered[d5_filtered['Country_Code'].str.len() == 2]
# Convert country codes to ISO 3166-1 alpha-3
d5_filtered['Country_Code'] = d5_filtered['Country_Code'].apply(convert_to_iso3)
# Add column dataset = d5
d5_filtered['Dataset'] = 'Type of residence permit and citizenship'
d5_filtered['Dataset_Code'] = 'D5'


# print All d13 columns
d15_filtered = d15[['CITIZENSHIP', 'Citizenship', 'Sex', 'TIME_PERIOD', 'OBS_VALUE']]
d15_filtered = d15_filtered.rename(columns={'CITIZENSHIP': 'Country_Code',
                                          'Citizenship': 'Country_Name',
                                          'TIME_PERIOD': 'Year',
                                          'OBS_VALUE': 'Value'})

# Add country name to d4 and d5
d4_filtered = d4_filtered.merge(d15_filtered[['Country_Code', 'Country_Name']], on='Country_Code', how='left')
d5_filtered = d5_filtered.merge(d15_filtered[['Country_Code', 'Country_Name']], on='Country_Code', how='left')

# Join d4 and d5
combined = pd.concat([d4_filtered, d5_filtered], ignore_index=True)

# Change Sex from integer to string
combined['Sex'] = combined['Sex'].map(Sex_dictionary)

# Change Permit_Type to improve readability
combined['Permit_Type'] = combined['Permit_Type'].map(Permit_Type_dictionary)

# Order columns: [Year, Country_Code, Country_Name, Sex, Value,  Dataset_Code, Dataset]
combined = combined[['Year', 'Country_Code', 'Country_Name', 'Sex', 'Value','Permit_Type' , 'Dataset_Code', 'Dataset']]
combined = combined.drop_duplicates()

# order by: [Country_Code, ]
combined = combined.sort_values(['Country_Name', 'Dataset_Code', 'Year'])

# Save to CSV
combined.to_csv(f'{datasets_path}mashup/italy_immigration_trends_by_country_and_permit.csv', index=False)


## What are the demographic profiles of immigrants in Italy (age, gender, educationÂ level)?

In [None]:
# python codes