In [1]:
import sys
import os
sys.path.append(os.path.abspath("../src"))

import datetime as dt
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 100)

from etl import quarter_to_dt

## Methodology:

I will clean and transform each dataframe separated by question, so for the first question I will verify and make the transformations needed in the related dataframes, and the same for the remaining questions.  

**Question 1.** Which economic sectors have experienced the highest growth since 2010, and how did the COVID-19 pandemic shift their trajectories?  
**Related dataframes**: QLF03, QLF05, QLF07, QLF32, QES01, QES03

In [2]:
# quick visualization of each df
q1_related = ['QLF03', 
              'QLF05', 
              'QLF07']
# deleted from the related QES01, QES03, QLF32 because of the lack of info compared to the others
# will delete QLF05, QLF07 because of the inconsistency of what makes an "adult"
# checked if the total (all region, sex and sector) for QLF03 and QLF05 + QLF07, and returned different numbers (27.1 difference)
# decided to go with QLF03, cause it provide the exact data that I need

for df in q1_related:
    df_v = pd.read_csv(f'../data/raw/{df}.csv')

    print(f'{df} Dataframe')
    display(df_v.head())
    display(df_v.tail())
    print('-------------------')

QLF03 Dataframe


Unnamed: 0,Statistic,Quarter,Sex,NACE Rev 2 Economic Sector,value
0,Persons aged 15-89 years in Employment,1998Q1,Both sexes,All NACE economic sectors,1550.3
1,Persons aged 15-89 years in Employment,1998Q1,Both sexes,"Agriculture, forestry and fishing (A)",135.6
2,Persons aged 15-89 years in Employment,1998Q1,Both sexes,Construction (F),103.7
3,Persons aged 15-89 years in Employment,1998Q1,Both sexes,"Wholesale and retail trade, repair of motor ve...",219.5
4,Persons aged 15-89 years in Employment,1998Q1,Both sexes,Transportation and storage (H),65.6


Unnamed: 0,Statistic,Quarter,Sex,NACE Rev 2 Economic Sector,value
11767,Persons aged 15-89 years in Employment (Season...,2025Q1,Female,Industry and Construction (B to F),
11768,Persons aged 15-89 years in Employment (Season...,2025Q1,Female,Services (G to U),
11769,Persons aged 15-89 years in Employment (Season...,2025Q1,Female,"Financial, insurance and real estate activitie...",72.4
11770,Persons aged 15-89 years in Employment (Season...,2025Q1,Female,Other NACE activities (R to U),71.8
11771,Persons aged 15-89 years in Employment (Season...,2025Q1,Female,Not stated,


-------------------
QLF05 Dataframe


Unnamed: 0,Statistic,Sex,NACE Rev 2 Economic Sector,Region,Quarter,value
0,Person aged 15 years and over in Employment,Both sexes,All NACE economic sectors,State,1998Q1,1550.4
1,Person aged 15 years and over in Employment,Both sexes,All NACE economic sectors,State,1998Q2,1572.1
2,Person aged 15 years and over in Employment,Both sexes,All NACE economic sectors,State,1998Q3,1636.3
3,Person aged 15 years and over in Employment,Both sexes,All NACE economic sectors,State,1998Q4,1615.9
4,Person aged 15 years and over in Employment,Both sexes,All NACE economic sectors,State,1999Q1,1644.5


Unnamed: 0,Statistic,Sex,NACE Rev 2 Economic Sector,Region,Quarter,value
27211,Person aged 15 years and over in Employment,Female,Not stated,South-West,2010Q4,
27212,Person aged 15 years and over in Employment,Female,Not stated,South-West,2011Q1,
27213,Person aged 15 years and over in Employment,Female,Not stated,South-West,2011Q2,
27214,Person aged 15 years and over in Employment,Female,Not stated,South-West,2011Q3,
27215,Person aged 15 years and over in Employment,Female,Not stated,South-West,2011Q4,


-------------------
QLF07 Dataframe


Unnamed: 0,STATISTIC,Quarter,Sex,NACE Rev 2 Economic Sector,Region,value
0,Persons aged 15-89 years in Employment,2012Q1,Both sexes,All NACE economic sectors,State,1862.6
1,Persons aged 15-89 years in Employment,2012Q1,Both sexes,All NACE economic sectors,Border,145.1
2,Persons aged 15-89 years in Employment,2012Q1,Both sexes,All NACE economic sectors,West,175.8
3,Persons aged 15-89 years in Employment,2012Q1,Both sexes,All NACE economic sectors,Mid-West,188.8
4,Persons aged 15-89 years in Employment,2012Q1,Both sexes,All NACE economic sectors,South-East,153.3


Unnamed: 0,STATISTIC,Quarter,Sex,NACE Rev 2 Economic Sector,Region,value
25753,Persons aged 15-89 years in Employment,2025Q1,Female,Not stated,South-East,
25754,Persons aged 15-89 years in Employment,2025Q1,Female,Not stated,South-West,
25755,Persons aged 15-89 years in Employment,2025Q1,Female,Not stated,Dublin,
25756,Persons aged 15-89 years in Employment,2025Q1,Female,Not stated,Mid-East,
25757,Persons aged 15-89 years in Employment,2025Q1,Female,Not stated,Midland,


-------------------


In [3]:
# Transforming the Quarter column from string to datetime
df_qlf03 = pd.read_csv(f'../data/raw/QLF03.csv')
quarter_to_dt(df_qlf03)

**2. How has foreign workforce participation evolved in Ireland since the early 2000s?**

In [4]:
# quick visualization of each df
q2_related = ['QLF34', 'QLF35', 'QLF47', 'QLF48']

# can use all of those dataframes, being QLF34/QLF47 for explaining how much more people from outside work
# QLF35/QLF48 to show which sectors most employ foreigners

for df in q2_related:
    df_v = pd.read_csv(f'../data/raw/{df}.csv')

    print(f'{df} Dataframe')
    display(df_v.head())
    display(df_v.tail())
    print('-------------------')

QLF34 Dataframe


Unnamed: 0,STATISTIC,Quarter,Nationality,NACE Rev 2 Economic Sector,value
0,Persons aged 15 years and over in Employment,1998Q1,All nationalities,All NACE economic sectors,1550.3
1,Persons aged 15 years and over in Employment,1998Q1,All nationalities,"Agriculture, forestry and fishing (A)",135.6
2,Persons aged 15 years and over in Employment,1998Q1,All nationalities,Construction (F),103.7
3,Persons aged 15 years and over in Employment,1998Q1,All nationalities,Wholesale and retail trade; repair of motor ve...,219.5
4,Persons aged 15 years and over in Employment,1998Q1,All nationalities,Transportation and storage (H),65.6


Unnamed: 0,STATISTIC,Quarter,Nationality,NACE Rev 2 Economic Sector,value
10299,Persons aged 15 years and over in Employment,2020Q4,Other nationalities (7),Human health and social work activities (Q),21.7
10300,Persons aged 15 years and over in Employment,2020Q4,Other nationalities (7),Industry (B to E),
10301,Persons aged 15 years and over in Employment,2020Q4,Other nationalities (7),"Financial, insurance and real estate activitie...",
10302,Persons aged 15 years and over in Employment,2020Q4,Other nationalities (7),Other NACE activities (R to U),
10303,Persons aged 15 years and over in Employment,2020Q4,Other nationalities (7),Not stated,


-------------------
QLF35 Dataframe


Unnamed: 0,STATISTIC,Quarter,Nationality,ILO Economic Status,value
0,Persons aged 15 years and over,1998Q1,All nationalities,All ILO economic status,2857.2
1,Persons aged 15 years and over,1998Q1,All nationalities,In labour force,1699.0
2,Persons aged 15 years and over,1998Q1,All nationalities,In employment,1550.3
3,Persons aged 15 years and over,1998Q1,All nationalities,Unemployed,148.7
4,Persons aged 15 years and over,1998Q1,All nationalities,Not in labour force,1158.2


Unnamed: 0,STATISTIC,Quarter,Nationality,ILO Economic Status,value
3215,Persons aged 15 years and over,2020Q4,Other nationalities (7),All ILO economic status,196.2
3216,Persons aged 15 years and over,2020Q4,Other nationalities (7),In labour force,133.7
3217,Persons aged 15 years and over,2020Q4,Other nationalities (7),In employment,121.0
3218,Persons aged 15 years and over,2020Q4,Other nationalities (7),Unemployed,
3219,Persons aged 15 years and over,2020Q4,Other nationalities (7),Not in labour force,62.5


-------------------
QLF47 Dataframe


Unnamed: 0,Statistic,Quarter,NACE Rev 2 Economic Sector,Citizenship,value
0,Persons aged 15 years and over in Employment,2021Q1,All NACE economic sectors,All Countries,2259.9
1,Persons aged 15 years and over in Employment,2021Q1,All NACE economic sectors,EU14 excl Irl (countries in the EU pre 2004 ex...,60.7
2,Persons aged 15 years and over in Employment,2021Q1,All NACE economic sectors,EU15 to EU27 (accession countries joined post ...,137.1
3,Persons aged 15 years and over in Employment,2021Q1,All NACE economic sectors,United Kingdom,46.4
4,Persons aged 15 years and over in Employment,2021Q1,All NACE economic sectors,Ireland,1898.0


Unnamed: 0,Statistic,Quarter,NACE Rev 2 Economic Sector,Citizenship,value
2137,Persons aged 15 years and over in Employment,2025Q1,Not stated,EU15 to EU27 (accession countries joined post ...,
2138,Persons aged 15 years and over in Employment,2025Q1,Not stated,United Kingdom,
2139,Persons aged 15 years and over in Employment,2025Q1,Not stated,Ireland,
2140,Persons aged 15 years and over in Employment,2025Q1,Not stated,All countries excluding Ireland,
2141,Persons aged 15 years and over in Employment,2025Q1,Not stated,"All countries excluding Ireland,United Kingdom...",


-------------------
QLF48 Dataframe


Unnamed: 0,Statistic,Quarter,ILO Economic Status,Citizenship,value
0,Persons aged 15 years and over,2021Q1,All ILO economic status,All Countries,4056.3
1,Persons aged 15 years and over,2021Q1,All ILO economic status,EU14 excl Irl (countries in the EU pre 2004 ex...,80.9
2,Persons aged 15 years and over,2021Q1,All ILO economic status,EU15 to EU27 (accession countries joined post ...,207.1
3,Persons aged 15 years and over,2021Q1,All ILO economic status,United Kingdom,91.3
4,Persons aged 15 years and over,2021Q1,All ILO economic status,Ireland,3478.3


Unnamed: 0,Statistic,Quarter,ILO Economic Status,Citizenship,value
590,Persons aged 15 years and over,2025Q1,Not in labour force,EU15 to EU27 (accession countries joined post ...,38.4
591,Persons aged 15 years and over,2025Q1,Not in labour force,United Kingdom,34.1
592,Persons aged 15 years and over,2025Q1,Not in labour force,Ireland,1358.5
593,Persons aged 15 years and over,2025Q1,Not in labour force,All countries excluding Ireland,157.9
594,Persons aged 15 years and over,2025Q1,Not in labour force,"All countries excluding Ireland,United Kingdom...",71.2


-------------------


In [5]:
df_qlf34 = pd.read_csv(f'../data/raw/QLF34.csv')
quarter_to_dt(df_qlf34)

df_qlf35 = pd.read_csv(f'../data/raw/QLF35.csv')
quarter_to_dt(df_qlf35)

df_qlf47 = pd.read_csv(f'../data/raw/QLF47.csv')
quarter_to_dt(df_qlf47)

df_qlf48 = pd.read_csv(f'../data/raw/QLF48.csv')
quarter_to_dt(df_qlf48)

In [6]:
print('QLF34')
print(df_qlf34['Nationality'].value_counts())
print('------------------')
print('QLF35')
print(df_qlf35['Nationality'].value_counts())
print('------------------')
print('QLF47')
print(df_qlf47['Citizenship'].value_counts())
print('------------------')
print('QLF48')
print(df_qlf48['Citizenship'].value_counts())

QLF34
Nationality
All nationalities                                                      1472
EU14 excl Irl (countries in the EU pre 2004 excluding UK & Ireland)    1472
EU15 to EU27 (accession countries joined post 2004)                    1472
UK                                                                     1472
Irish                                                                  1472
Non-Irish                                                              1472
Other nationalities (7)                                                1472
Name: count, dtype: int64
------------------
QLF35
Nationality
All nationalities                                                      460
EU14 excl Irl (countries in the EU pre 2004 excluding UK & Ireland)    460
EU15 to EU27 (accession countries joined post 2004)                    460
UK                                                                     460
Irish                                                                  460
Non-Irish   

In [7]:
print((df_qlf48.loc[(df_qlf48['Citizenship'] == 'Ireland') & (df_qlf48['ILO Economic Status'] == 'All ILO economic status'), 'value']).sum())
print((df_qlf48.loc[(df_qlf48['Citizenship'] == 'All countries excluding Ireland')  & (df_qlf48['ILO Economic Status'] == 'All ILO economic status'), 'value']).sum())
#print(df_qlf48.loc[df_qlf48['Citizenship'] == 'Other nationalities (7)', 'value'].sum())
print((df_qlf48.loc[(df_qlf48['Citizenship'] == 'All Countries') & (df_qlf48['ILO Economic Status'] == 'All ILO economic status'), 'value']).sum())

60831.1
11309.9
72141.0


In [8]:
# dictionary for each nationality in QLF34 and QLF35, all of them being mutually exclusive
natio_dict = {'EU14 excl Irl (countries in the EU pre 2004 excluding UK & Ireland)': 'EU14',
              'EU15 to EU27 (accession countries joined post 2004)': 'EU15-27',
              'UK': 'UK',
              'Irish' : 'Irish',
              'Other nationalities (7)': 'Non-EU'}

# dictionary for each citizenship in QLF47 and QLF48
citi_dict = {'EU14 excl Irl (countries in the EU pre 2004 excluding UK & Ireland)': 'EU14',
             'EU15 to EU27 (accession countries joined post 2004)': 'EU15-27',
             'United Kingdom': 'UK',
             'Ireland' : 'Irish',
             'All countries excluding Ireland,United Kingdom and EU272020': 'Non-EU'}

In [9]:
df_qlf47.rename(columns={'Citizenship': 'Nationality'},
                         inplace=True)
df_qlf48.rename(columns={'Citizenship': 'Nationality'}, 
                         inplace=True)
df_qlf48

Unnamed: 0,Statistic,Quarter,ILO Economic Status,Nationality,value,period,date
0,Persons aged 15 years and over,2021Q1,All ILO economic status,All Countries,4056.3,2021Q1,2021-01-01
1,Persons aged 15 years and over,2021Q1,All ILO economic status,EU14 excl Irl (countries in the EU pre 2004 ex...,80.9,2021Q1,2021-01-01
2,Persons aged 15 years and over,2021Q1,All ILO economic status,EU15 to EU27 (accession countries joined post ...,207.1,2021Q1,2021-01-01
3,Persons aged 15 years and over,2021Q1,All ILO economic status,United Kingdom,91.3,2021Q1,2021-01-01
4,Persons aged 15 years and over,2021Q1,All ILO economic status,Ireland,3478.3,2021Q1,2021-01-01
...,...,...,...,...,...,...,...
590,Persons aged 15 years and over,2025Q1,Not in labour force,EU15 to EU27 (accession countries joined post ...,38.4,2025Q1,2025-01-01
591,Persons aged 15 years and over,2025Q1,Not in labour force,United Kingdom,34.1,2025Q1,2025-01-01
592,Persons aged 15 years and over,2025Q1,Not in labour force,Ireland,1358.5,2025Q1,2025-01-01
593,Persons aged 15 years and over,2025Q1,Not in labour force,All countries excluding Ireland,157.9,2025Q1,2025-01-01


In [10]:
q2_dataframes = [df_qlf34, df_qlf35, df_qlf47, df_qlf48]

In [11]:
# renaming the nationality and citizenship columns in data to merge them later
for df in q2_dataframes:
    if df is df_qlf34 or df is df_qlf35:
        df['Nationality'] = df['Nationality'].map(natio_dict)
    else:
        df['Nationality'] = df['Nationality'].map(citi_dict)

    df.drop(df[df['Nationality'].isnull()].index, inplace=True)
    df.reset_index(drop=True, inplace=True)



#df_qlf34['Nationality'] = df_qlf34['Nationality'].map(natio_dict)
#df_qlf35['Nationality'] = df_qlf35['Nationality'].map(natio_dict)
#df_qlf47['Nationality'] = df_qlf47['Nationality'].map(citi_dict)
#df_qlf48['Nationality'] = df_qlf48['Nationality'].map(citi_dict)

#df.drop(df[df['Nationality'].isnull()].index, inplace=True)
#df.reset_index(drop=True,
#                     inplace=True)
df_qlf34

Unnamed: 0,STATISTIC,Quarter,Nationality,NACE Rev 2 Economic Sector,value,period,date
0,Persons aged 15 years and over in Employment,1998Q1,EU14,All NACE economic sectors,7.0,1998Q1,1998-01-01
1,Persons aged 15 years and over in Employment,1998Q1,EU14,"Agriculture, forestry and fishing (A)",,1998Q1,1998-01-01
2,Persons aged 15 years and over in Employment,1998Q1,EU14,Construction (F),,1998Q1,1998-01-01
3,Persons aged 15 years and over in Employment,1998Q1,EU14,Wholesale and retail trade; repair of motor ve...,,1998Q1,1998-01-01
4,Persons aged 15 years and over in Employment,1998Q1,EU14,Transportation and storage (H),,1998Q1,1998-01-01
...,...,...,...,...,...,...,...
7355,Persons aged 15 years and over in Employment,2020Q4,Non-EU,Human health and social work activities (Q),21.7,2020Q4,2020-10-01
7356,Persons aged 15 years and over in Employment,2020Q4,Non-EU,Industry (B to E),,2020Q4,2020-10-01
7357,Persons aged 15 years and over in Employment,2020Q4,Non-EU,"Financial, insurance and real estate activitie...",,2020Q4,2020-10-01
7358,Persons aged 15 years and over in Employment,2020Q4,Non-EU,Other NACE activities (R to U),,2020Q4,2020-10-01
