In [2]:
import pandas as pd 
import plotly.express as px
import plotly.graph_objects as go

Cel: Profilowanie społeczno-demograficzne migrantów
* Zbudowanie profili migrantów (wiek, płeć, wykształcenie, status zawodowy) migrujących do krajów UE, jak i do poszczególnych krajów spośród tych wybrancyh do analizy - liberal = ["France", "Italy", "Germany", "Sweden", "Spain", "Austria"], conservs = ["Poland", "Bulgaria", "Romania", "Hungary",  "Greece"]
* kierunki i wzorce dotyczące mirgacji są już dostępne w migration_patterns - w tej częsci całkowicie to pominięto, zapobiegając redundancji
* wybrane zbiory danych:
    * Population on 1 January by age group, sex and citizenship
    * Population by educational attainment level, sex, age and citizenship (%)

# Population on 1 January by age group, sex and citizenship

In [3]:
df = pd.read_csv('../processed_data/population_foreigners.csv')

In [4]:
df.head()

Unnamed: 0,Sex,geo,Age class,TIME_PERIOD,Country of citizenship,Number
0,Females,Austria,Total,2012,Foreign country and stateless,470822
1,Females,Austria,Total,2013,Foreign country and stateless,496844
2,Females,Austria,Total,2014,Foreign country and stateless,526542
3,Females,Austria,Total,2015,Foreign country and stateless,562672
4,Females,Austria,Total,2016,Foreign country and stateless,611580


In [6]:
liberal = ["France", "Italy", "Germany", "Sweden", "Spain", "Austria"]
conservs = ["Poland", "Bulgaria", "Romania", "Hungary",  "Greece"]
selected = liberal + conservs
df['Group'] = df['geo'].apply(
    lambda x: 'Liberal' if x in liberal else ('Conservative' if x in conservs else 'Other')
)
only_foreigners = df[df['Country of citizenship'] == 'Foreign country and stateless']

### Struktura wiekowa migrantów wg płci i kraju

In [20]:
age_order = [
    'Less than 15 years', 'From 15 to 19 years', 'From 20 to 24 years',
    'From 25 to 29 years', 'From 30 to 34 years', 'From 35 to 39 years',
    'From 40 to 44 years', 'From 45 to 49 years', 'From 50 to 54 years',
    'From 55 to 59 years', 'From 60 to 64 years', '65 years or over'
]

In [23]:
def plot_age_structure(df, title, age_order):
    age_structure = df.groupby(['geo', 'Sex', 'Age class'], as_index=False)['Number'].mean()
    age_structure = age_structure[age_structure['Age class'].isin(age_order)]
    age_structure['Age class'] = pd.Categorical(age_structure['Age class'], categories=age_order, ordered=True)
    fig = px.bar(
    age_structure,
    x='Age class',
    y='Number',
    color='Sex',
    facet_col='geo',
    facet_col_wrap=3,
    category_orders={'Age class': age_order},
    title=title,
    labels={'Age class': 'Age class', 'Number': 'Foreigners number'}
)

    fig.update_layout(height=1000, width=1200, showlegend=True)
    fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
    fig.show()

In [28]:
plot_age_structure(only_foreigners[(only_foreigners['geo'].isin(liberal))
                                    & (only_foreigners['Sex'] != 'Total')
                                    & (only_foreigners['TIME_PERIOD'] >= 2014)],
                   'Age structure of foreigners in selected liberal countries', age_order)

In [29]:
plot_age_structure(only_foreigners[(only_foreigners['geo'].isin(conservs))
                                    & (only_foreigners['Sex'] != 'Total') &
                                    (only_foreigners['TIME_PERIOD'] >= 2014)],
                   'Age structure of foreigners in selected conservative countries', age_order)

In [31]:
age_merge = {
    'Less than 15 years': '0-14',
    'From 15 to 19 years': '15-24',
    'From 20 to 24 years': '15-24',
    'From 25 to 29 years': '25-34',
    'From 30 to 34 years': '25-34',
    'From 35 to 39 years': '35-49',
    'From 40 to 44 years': '35-49',
    'From 45 to 49 years': '35-49',
    'From 50 to 54 years': '50-64',
    'From 55 to 59 years': '50-64',
    'From 60 to 64 years': '50-64',
    '65 years or over': '65+'
}
new_age_order = ['0-14', '15-24', '25-34', '35-49', '50-64', '65+']

temp_df = only_foreigners.copy()
temp_df['Age class'] = temp_df['Age class'].map(age_merge)

plot_age_structure(temp_df[(temp_df['geo'].isin(liberal))
                            & (temp_df['Sex'] != 'Total')
                            & (temp_df['TIME_PERIOD'] >= 2014)],
                   'Age structure of foreigners in selected liberal countries',
                   new_age_order)


plot_age_structure(temp_df[(temp_df['geo'].isin(conservs)) &
                            (temp_df['Sex'] != 'Total') &
                            (only_foreigners['TIME_PERIOD'] >= 2014)],
                   'Age structure of foreigners in selected conservative countries',
                   new_age_order)

Wszystkie analizy dla ostatnich 10 lat (2014-2024). Jako średnia liczba na przestrzeni ostatnich lat.

In [34]:
# 2024 - latest data
latest_df = temp_df[temp_df['TIME_PERIOD'] == 2024]
plot_age_structure(latest_df[(latest_df['geo'].isin(liberal))
                            & (latest_df['Sex'] != 'Total')],
                   'Age structure of foreigners in selected liberal countries in 2024',
                   new_age_order)


plot_age_structure(latest_df[(latest_df['geo'].isin(conservs)) &
                            (latest_df['Sex'] != 'Total')],
                   'Age structure of foreigners in selected conservative countries in 2024',
                   new_age_order)

### Struktura płciowa migrantów 

In [35]:
import plotly.express as px

def plot_sex_distribution(df, title):
    sex_structure = df.groupby(['geo', 'Sex'], as_index=False)['Number'].mean()
    
    fig = px.bar(
        sex_structure,
        x='Sex',
        y='Number',
        color='Sex',
        facet_col='geo',
        facet_col_wrap=3,
        title=title,
        labels={'Sex': 'Płeć', 'Number': 'Liczba migrantów (średnia roczna)'}
    )

    fig.update_layout(
        height=1000,
        width=1200,
        showlegend=False
    )
    fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
    fig.show()

In [37]:
only_foreigners_liberal_f = only_foreigners[
    (only_foreigners['geo'].isin(liberal)) &  
    (only_foreigners['Sex'].isin(['Males', 'Females'])) &
    (only_foreigners['Age class'] == 'Total')
]

only_foreigners_conservs_f = only_foreigners[
    (only_foreigners['geo'].isin(conservs)) &  
    (only_foreigners['Sex'].isin(['Males', 'Females'])) &
    (only_foreigners['Age class'] == 'Total')
]

plot_sex_distribution(only_foreigners_liberal_f, title='Structure of foreigners in selected liberal countries (2014-2024, mean annual)')
plot_sex_distribution(only_foreigners_conservs_f, title='Structure of foreigners in selected conservative countries (2014-2024, mean annual)')



50 na 50