In [1]:
import pandas as pd
import numpy as np

In [2]:
# Load clean dataset
df = pd.read_csv('../data/cleaned_albania_youth_data.csv')

In [3]:
# Summary statistics
print(df.describe())
print(df.info())

             year  youth_emigration  neet_rate  tertiary_enrollment_total  \
count     8.00000          8.000000   8.000000                   8.000000   
mean   2019.50000      16173.750000  27.387500              128928.375000   
std       2.44949       5449.100575   1.951876                8061.619918   
min    2016.00000      11765.000000  25.000000              120063.000000   
25%    2017.75000      13277.750000  25.875000              123185.750000   
50%    2019.50000      14012.500000  27.250000              127072.000000   
75%    2021.25000      16793.750000  28.875000              133474.250000   
max    2023.00000      27790.000000  30.000000              141410.000000   

       youth_population  tertiary_enrollment_rate  avg_gross_wage  asylum_pct  \
count          8.000000                  8.000000        8.000000    8.000000   
mean      229988.000000                 56.287500    55343.375000   30.397500   
std        22814.057465                  2.965091     7684.1665

In [4]:
# Yearly averages and totals
print(df.groupby('year')['neet_rate'].mean())
print(df.groupby('year')['youth_emigration'].sum())

year
2016    30.0
2017    29.7
2018    28.6
2019    26.6
2020    27.9
2021    26.1
2022    25.2
2023    25.0
Name: neet_rate, dtype: float64
year
2016    27790
2017    20624
2018    11765
2019    13904
2020    14121
2021    15517
2022    13721
2023    11948
Name: youth_emigration, dtype: int64


In [5]:
# Percentage changes
df['neet_rate_change'] = df['neet_rate'].pct_change()
df['wage_change'] = df['avg_gross_wage'].pct_change()

In [6]:
# Correlations
corr = df.corr(numeric_only=True)
print(corr)

                               year  youth_emigration  neet_rate  \
year                       1.000000         -0.716752  -0.951664   
youth_emigration          -0.716752          1.000000   0.702849   
neet_rate                 -0.951664          0.702849   1.000000   
tertiary_enrollment_total -0.914124          0.582860   0.832066   
youth_population          -0.990292          0.633848   0.940441   
tertiary_enrollment_rate   0.817455         -0.504542  -0.813633   
avg_gross_wage             0.936804         -0.578622  -0.880721   
asylum_pct                -0.718481          0.117143   0.704582   
emigration_rate_per_1000  -0.517186          0.967128   0.515870   
neet_rate_change           0.031924          0.065164   0.289011   
wage_change                0.848834         -0.395359  -0.791884   

                           tertiary_enrollment_total  youth_population  \
year                                       -0.914124         -0.990292   
youth_emigration                   

In [8]:
# Interesting queries
high_neet = df[df['neet_rate'] >= 30]
print("Years with high NEET:", high_neet)


Years with high NEET:    year  youth_emigration  neet_rate  tertiary_enrollment_total  \
0  2016             27790       30.0                     141410   

   youth_population  tertiary_enrollment_rate  avg_gross_wage  asylum_pct  \
0            256899                     55.06         47522.0       30.24   

   emigration_rate_per_1000  neet_rate_change  wage_change  
0                108.174808               NaN          NaN  


In [9]:
top_emigration = df.sort_values('youth_emigration', ascending=False).head()
print("Top emigration years:", top_emigration)

Top emigration years:    year  youth_emigration  neet_rate  tertiary_enrollment_total  \
0  2016             27790       30.0                     141410   
1  2017             20624       29.7                     131618   
5  2021             15517       26.1                     123880   
4  2020             14121       27.9                     123797   
3  2019             13904       26.6                     130264   

   youth_population  tertiary_enrollment_rate  avg_gross_wage  asylum_pct  \
0            256899                     55.06         47522.0       30.24   
1            254172                     51.81         48967.0       32.62   
5            218787                     56.63         57190.0       29.54   
4            226425                     54.69         53662.0       32.83   
3            236981                     54.97         52380.0       32.62   

   emigration_rate_per_1000  neet_rate_change  wage_change  
0                108.174808               NaN      