In [5]:
import pandas as pd 
import plotly.express as px
import plotly.graph_objects as go

Cel: Profilowanie społeczno-demograficzne migrantów
* Zbudowanie profili migrantów (wiek, płeć, wykształcenie, status zawodowy) migrujących do krajów UE, jak i do poszczególnych krajów spośród tych wybrancyh do analizy - liberal = ["France", "Italy", "Germany", "Sweden", "Spain", "Austria"], conservs = ["Poland", "Bulgaria", "Romania", "Hungary",  "Greece"]
* kierunki i wzorce dotyczące mirgacji są już dostępne w migration_patterns - w tej częsci całkowicie to pominięto, zapobiegając redundancji
* wybrane zbiory danych:
    * Population on 1 January by age group, sex and citizenship
    * Population by educational attainment level, sex, age and citizenship (%)
    * Population by sex, age, citizenship and labour status (1 000)
    (jak wszystkie dane dotyczące zatrudnień - te również - obejmują osoby (15-64))

# Population on 1 January by age group, sex and citizenship

In [20]:
df = pd.read_csv('../processed_data/population_foreigners.csv')

In [21]:
df.head()

Unnamed: 0,Sex,geo,Age class,year,Country of citizenship,Number
0,Females,Austria,Total,2012,Foreign country and stateless,470822
1,Females,Austria,Total,2013,Foreign country and stateless,496844
2,Females,Austria,Total,2014,Foreign country and stateless,526542
3,Females,Austria,Total,2015,Foreign country and stateless,562672
4,Females,Austria,Total,2016,Foreign country and stateless,611580


In [22]:
liberal = ["France", "Italy", "Germany", "Sweden", "Spain", "Austria"]
conservs = ["Poland", "Bulgaria", "Romania", "Hungary",  "Greece"]
selected = liberal + conservs
df['Group'] = df['geo'].apply(
    lambda x: 'Liberal' if x in liberal else ('Conservative' if x in conservs else 'Other')
)
only_foreigners = df[df['Country of citizenship'] == 'Foreign country and stateless']

In [23]:
only_foreigners.head()

Unnamed: 0,Sex,geo,Age class,year,Country of citizenship,Number,Group
0,Females,Austria,Total,2012,Foreign country and stateless,470822,Liberal
1,Females,Austria,Total,2013,Foreign country and stateless,496844,Liberal
2,Females,Austria,Total,2014,Foreign country and stateless,526542,Liberal
3,Females,Austria,Total,2015,Foreign country and stateless,562672,Liberal
4,Females,Austria,Total,2016,Foreign country and stateless,611580,Liberal


### Struktura wiekowa migrantów wg płci i kraju

In [24]:
age_order = [
    'Less than 15 years', 'From 15 to 19 years', 'From 20 to 24 years',
    'From 25 to 29 years', 'From 30 to 34 years', 'From 35 to 39 years',
    'From 40 to 44 years', 'From 45 to 49 years', 'From 50 to 54 years',
    'From 55 to 59 years', 'From 60 to 64 years', '65 years or over'
]

In [25]:
age_merge = {
    'Less than 15 years': '0-14',
    'From 15 to 19 years': '15-24',
    'From 20 to 24 years': '15-24',
    'From 25 to 29 years': '25-34',
    'From 30 to 34 years': '25-34',
    'From 35 to 39 years': '35-49',
    'From 40 to 44 years': '35-49',
    'From 45 to 49 years': '35-49',
    'From 50 to 54 years': '50-64',
    'From 55 to 59 years': '50-64',
    'From 60 to 64 years': '50-64',
    '65 years or over': '65+'
}
new_age_order = ['0-14', '15-24', '25-34', '35-49', '50-64', '65+']

temp_df = only_foreigners.copy()

temp_df['age_class'] = temp_df['Age class'].map(age_merge)
temp_df = temp_df.dropna()

In [26]:
tick_vals = sorted(temp_df['year'].unique())

fig = px.line(
    temp_df[
        (temp_df['geo'].isin(liberal)) & 
        (temp_df['Sex'] == 'Total')
    ].groupby(['geo', 'year', 'age_class'], as_index=False)['Number'].sum(),
    x="year",
    y="Number",
    color="age_class",
    facet_col="geo",
    facet_col_wrap=3,
    markers=True,
    title="Trends in age groups of foreigners in selected liberal countries (2012-2024)",
    labels={
        "year": "Year",
        "Number": "Number of Foreigners in Population",
        "age_class": "Age class"
    }
)

fig.for_each_xaxis(
    lambda ax: ax.update(
        tickmode='array',
        tickvals=tick_vals,
        ticktext=[str(year) for year in tick_vals],
        ticks="outside"
    )
)

fig.update_layout(
    height=1000,
    width=1200,
    showlegend=True,
    xaxis_title="Year",
    yaxis_title="Number of Foreigners in Population"
)
fig.show()



In [28]:
fig = px.line(
    temp_df[(temp_df['geo'].isin(conservs)) & (temp_df['Sex'] == 'Total')].dropna().groupby(['geo', 'year', 'Age class'], as_index=False)['Number'].sum(),
    x="year",
    y="Number",
    color="Age class",
    facet_col="geo",
    facet_col_wrap=3,
    markers=True,
    title="Trends in age class of foreigners in selected convervative countries (2012-2024)",
    labels={
        "year": "Year",
        "Number": "Number of Foreigners in Population",
        "Age class": "Age class"
    }
)
fig.update_layout(height=800, width=1200)
fig.show()

In [29]:
fig = px.line(
    temp_df[(temp_df['geo'] == 'Poland') & (temp_df['Sex'] == 'Total')].dropna().groupby(['geo', 'year', 'age_class'], as_index=False)['Number'].sum(),
    x="year",
    y="Number",
    color="age_class",
    facet_col="geo",
    facet_col_wrap=3,
    markers=True,
    title="Trends in age class of foreigners in Poland (2012-2024)",
    labels={
        "year": "Year",
        "Number": "Number of Foreigners in Population",
        "age_class": "Age class"
    }
)
fig.update_layout(height=800, width=1200)
fig.show()

In [30]:
totals = only_foreigners[(only_foreigners['Age class'] == 'Total') &
                         (only_foreigners['Sex'] == 'Total')]

In [31]:
totals.head()

Unnamed: 0,Sex,geo,Age class,year,Country of citizenship,Number,Group
776,Total,Austria,Total,2012,Foreign country and stateless,946610,Liberal
777,Total,Austria,Total,2013,Foreign country and stateless,999095,Liberal
778,Total,Austria,Total,2014,Foreign country and stateless,1060446,Liberal
779,Total,Austria,Total,2015,Foreign country and stateless,1139736,Liberal
780,Total,Austria,Total,2016,Foreign country and stateless,1260188,Liberal


In [32]:
selected_countries = ['Italy', 'Spain', 'Germany', 'France']
selected_sum = totals[totals['geo'].isin(selected_countries)].groupby('year')['Number'].sum().reset_index()
selected_sum.rename(columns={'Number': 'Selected_Total'}, inplace=True)

total_all = totals.groupby('year')['Number'].sum().reset_index()
total_all.rename(columns={'Number': 'All_Total'}, inplace=True)

result = pd.merge(selected_sum, total_all, on='year')
result['Share (%)'] = (result['Selected_Total'] / result['All_Total']) * 100

result['Share (%)'] = result['Share (%)'].round(2)

print(result)

    year  Selected_Total  All_Total  Share (%)
0   2012        19842350   29679692      66.85
1   2013        20410882   30436019      67.06
2   2014        20705980   30946040      66.91
3   2015        21260380   31857029      66.74
4   2016        22447127   33540226      66.93
5   2017        23061944   34659446      66.54
6   2018        24090358   36065177      66.80
7   2019        24978383   37480707      66.64
8   2020        25852542   38980570      66.32
9   2021        26462159   39789520      66.51
10  2022        26890942   40170932      66.94
11  2023        28354819   43259203      65.55
12  2024        29893440   45814952      65.25


In [18]:
tick_vals = sorted(temp_df['year'].unique())

fig = px.bar(
    temp_df[
        (temp_df['geo'].isin(liberal)) & 
        (temp_df['Sex'] == 'Total')
    ].groupby(['geo', 'year', 'age_class'], as_index=False)['Number'].sum(),
    x="year",
    y="Number",
    color="age_class",
    facet_col="geo",
    facet_col_wrap=3,
    title="Trends in age groups of foreigners in selected liberal countries (2012-2024)",
    labels={
        "year": "Year",
        "Number": "Number of Foreigners in Population",
        "age_class": "Age class"
    }
)

fig.for_each_xaxis(
    lambda ax: ax.update(
        tickmode='array',
        tickvals=tick_vals,
        ticktext=[str(year) for year in tick_vals],
        ticks="outside"
    )
)

fig.update_layout(
    height=1000,
    width=1200,
    showlegend=True,
    xaxis_title="Year",
    yaxis_title="Number of Foreigners in Population"
)
fig.show()



In [34]:
fig = px.line(
    temp_df[
        (temp_df['geo'].isin(['France', 'Germany', 'Italy', 'Spain'])) & 
        (temp_df['Sex'] == 'Total')
    ].groupby(['geo', 'year', 'age_class'], as_index=False)['Number'].sum(),
    x="year",
    y="Number",
    color="age_class",
    facet_col="geo",
    facet_col_wrap=4,
    title="Age trends of foreigners in selected liberal countries (2012–2024)",
    labels={
        "year": "Year",
        "Number": "Number of Foreigners",
        "age_class": "Age group",
    }
)

fig.update_layout(
    showlegend=True,
    xaxis_title="Year",
    yaxis_title="Number of Foreigners in Population"
)
fig.show()

In [19]:
fig = px.line(
    temp_df[
        (temp_df['geo'].isin(['France', 'Germany', 'Italy', 'Spain'])) & 
        (temp_df['Sex'] != 'Total')
    ].groupby(['geo', 'year', 'age_class', 'Sex'], as_index=False)['Number'].sum(),
    x="year",
    y="Number",
    color="age_class",
    line_dash="Sex", 
    facet_col="geo",
    facet_col_wrap=4,
    title="Age and sex trends of foreigners in selected liberal countries (2012–2024)",
    labels={
        "year": "Year",
        "Number": "Number of Foreigners",
        "age_class": "Age group",
        "Sex": "Sex"
    }
)

fig.update_layout(
    showlegend=True,
    xaxis_title="Year",
    yaxis_title="Number of Foreigners in Population"
)
fig.show()

### Struktura płciowa migrantów 

In [None]:
def plot_sex_distribution(df, title):
    sex_structure = df.groupby(['geo', 'Sex'], as_index=False)['Number'].mean()
    
    fig = px.bar(
        sex_structure,
        x='Sex',
        y='Number',
        color='Sex',
        facet_col='geo',
        facet_col_wrap=3,
        title=title,
        labels={'Sex': 'Sex', 'Number': 'Number of foreigners'}
    )

    fig.update_layout(
        height=1000,
        width=1200,
        showlegend=False
    )
    fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
    fig.show()

In [56]:
only_foreigners_liberal_f = only_foreigners[
    (only_foreigners['geo'].isin(liberal)) &  
    (only_foreigners['Sex'].isin(['Males', 'Females'])) &
    (only_foreigners['Age class'] == 'Total') &
    (only_foreigners['year'] >= 2014)
]

only_foreigners_conservs_f = only_foreigners[
    (only_foreigners['geo'].isin(conservs)) &  
    (only_foreigners['Sex'].isin(['Males', 'Females'])) &
    (only_foreigners['Age class'] == 'Total') &
    (only_foreigners['year'] >= 2014)
]

plot_sex_distribution(only_foreigners_liberal_f, title='Structure of foreigners in selected liberal countries (2014-2024, mean annual)')
plot_sex_distribution(only_foreigners_conservs_f, title='Structure of foreigners in selected conservative countries (2014-2024, mean annual)')



50 na 50 dla wszystkich analiz

# Population by educational attainment level, sex, age and citizenship (%)

In [205]:
df = pd.read_csv('../processed_data/education_levels_foreigners.csv')

In [206]:
df.head()

Unnamed: 0,ISCED,geo,year,Percentage
0,levels 0-2,Austria,2014,29.6
1,levels 0-2,Austria,2015,28.1
2,levels 0-2,Austria,2016,28.6
3,levels 0-2,Austria,2017,28.9
4,levels 0-2,Austria,2018,28.6


In [100]:
fig = px.line(
    df[df['geo'].isin(liberal)],
    x="year",
    y="Percentage",
    color="ISCED",
    facet_col="geo",
    facet_col_wrap=3,
    markers=True,
    title="Trends in education levels of foreigners in selected liberal countries (2014-2024)",
    labels={
        "year": "Year",
        "Percentage": "Percentage of foreigners",
        "ISCED": "ISCED Level"
    }
)
fig.update_layout(height=800, width=1200)
fig.show()


In [101]:
fig = px.line(
    df[df['geo'].isin(conservs)],
    x="year",
    y="Percentage",
    color="ISCED",
    facet_col="geo",
    facet_col_wrap=3,
    markers=True,
    title="Trends in education levels of foreigners in selected conservative countries (2014-2024)",
    labels={
        "year": "Year",
        "Percentage": "Percentage of foreigners",
        "ISCED": "ISCED Level"
    }
)
fig.update_layout(height=800, width=1200)
fig.show()


In [102]:
last_year = df['year'].max()
last_year_df = df[df['year'] == last_year]

fig = px.bar(
    last_year_df,
    x='geo',
    y='Percentage',
    color='ISCED',
    barmode='group',
    title=f"Percentage of educational levels among foreigners in {last_year}",
    labels={'geo': 'Country', 'Percentage': 'foreigners (%)', 'ISCED': 'ISCED'}
)
fig.update_layout(height=500, width=1000)
fig.show()


In [103]:
last_year = df['year'].max()
last_year_df = df[df['year'] == last_year]

fig = px.bar(
    last_year_df[last_year_df['geo'].isin(selected)],
    x='geo',
    y='Percentage',
    color='ISCED',
    barmode='group',
    title=f"Percentage of educational levels among foreigners in {last_year} in selected countries",
    labels={'geo': 'Country', 'Percentage': 'foreigners (%)', 'ISCED': 'ISCED'}
)
fig.update_layout(height=500, width=1000)
fig.show()

In [104]:
fig = px.line(
    df[df['geo'].isin(liberal)],
    x="year",
    y="Percentage",
    color="ISCED",
    facet_col="geo",
    facet_col_wrap=3,
    markers=True,
    title="Trends in education levels of foreigners in selected liberal countries (2014-2024)",
    labels={
        "year": "Year",
        "Percentage": "Percentage of foreigners",
        "ISCED": "ISCED Level"
    }
)
fig.update_layout(height=800, width=1200)
fig.show()


In [None]:
fig = px.bar(
    df[df['geo'].isin(liberal)],
    x="year",
    y="Percentage",
    color="ISCED",
    facet_col="geo",
    facet_col_wrap=3,
    title="Structure of migrants’ education levels in liberal countries (stacked by year)",
    labels={
        "year": "Year",
        "Percentage": "Percentage of migrants",
        "ISCED": "Education Level"
    },
    barmode="stack"
)

fig.update_layout(height=800, width=1200)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))  
fig.show()


In [106]:
fig = px.bar(
    df[df['geo'].isin(conservs)],
    x="year",
    y="Percentage",
    color="ISCED",
    facet_col="geo",
    facet_col_wrap=3,
    title="Structure of migrants’ education levels in conservative countries (stacked by year)",
    labels={
        "year": "Year",
        "Percentage": "Percentage of migrants",
        "ISCED": "Education Level"
    },
    barmode="stack"
)

fig.update_layout(height=800, width=1200)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))  
fig.show()


In [208]:
fig = px.bar(
    df[df['geo'] == 'Poland'],
    x="year",
    y="Percentage",
    color="ISCED",
    facet_col="geo",
    facet_col_wrap=3,
    title="Structure of migrants’ education levels in Poland (stacked by year)",
    labels={
        "year": "Year",
        "Percentage": "Percentage of migrants",
        "ISCED": "Education Level"
    },
    barmode="stack"
)

fig.update_layout(height=800, width=1200)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))  
fig.show()

Braki danych

# Population by sex, age, citizenship and labour status (1 000)

In [4]:
df = pd.read_csv('../processed_data/employment_status_foreigners.csv')

In [5]:
df.head()

Unnamed: 0,Sex,Employment status,geo,year,In thousands
0,Females,Employed persons,Austria,2014,244.8
1,Females,Employed persons,Austria,2015,253.2
2,Females,Employed persons,Austria,2016,282.5
3,Females,Employed persons,Austria,2017,293.7
4,Females,Employed persons,Austria,2018,311.2


In [6]:
pivot_df = df.pivot_table(
    index=['geo', 'year', 'Sex'],
    columns='Employment status',
    values='In thousands'
).reset_index()


In [7]:
pivot_df.head()

Employment status,geo,year,Sex,Employed persons,Persons outside the labour force,Unemployed persons
0,Austria,2014,Females,244.8,146.5,27.9
1,Austria,2014,Males,272.2,80.6,36.6
2,Austria,2014,Total,517.0,227.1,64.5
3,Austria,2015,Females,253.2,161.7,29.5
4,Austria,2015,Males,297.6,85.7,39.9


In [8]:
liberal = ["France", "Italy", "Germany", "Sweden", "Spain", "Austria"]
conservs = ["Poland", "Bulgaria", "Romania", "Hungary",  "Greece"]
selected = liberal + conservs

In [9]:
categories = ['Employed persons', 'Persons outside the labour force', 'Unemployed persons']
def plot_employment_structure(df, title):
    employment_structure = df.groupby(['geo', 'Sex', 'Employment status'], as_index=False)['In thousands'].mean()
    employment_structure['Employment status'] = pd.Categorical(employment_structure['Employment status'], categories=categories, ordered=True)
    fig = px.bar(
    employment_structure,
    x='Employment status',
    y='In thousands',
    color='Sex',
    facet_col='geo',
    facet_col_wrap=3,
    category_orders={'Employment status': categories},
    title=title,
    labels={'Employment status': 'Employment status', 'In thousands': 'Foreigners in thousands'}
)

    fig.update_layout(height=1000, width=1200, showlegend=True)
    fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
    fig.show()

In [10]:
liberals_filtered = df[(df['geo'].isin(liberal)) &
                       (df['Sex'] != 'Total')]

conserve_filtered = df[(df['geo'].isin(conservs)) &
                       (df['Sex'] != 'Total')]

plot_employment_structure(liberals_filtered, "Employment structure of foreigners in selected liberal countries (2014-2024, mean)")
plot_employment_structure(conserve_filtered, "Employment structure of foreigners in selected conservative countries(2014-2024, mean)")

Dla krajów konserwatywnych w kontekście migracji bardzo mało danych - brak możliwości analizy.

In [145]:

employment_structure = liberals_filtered.groupby(['geo', 'Employment status'], as_index=False)['In thousands'].mean()
fig = px.bar(
employment_structure,
x='geo',
y='In thousands',
color='Employment status',
title="Employment structure of foreigners in selected liberal countries (2014-2024, mean)",
labels={'geo': 'Country', 'In thousands': 'Foreigners in thousands'}
)
fig.update_layout(showlegend=True)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.show()