In [1]:
import os
import sys
import pandas as pd

TOP_DIR=os.path.abspath("../../")
if not TOP_DIR in sys.path: sys.path.append(TOP_DIR)

In [2]:
from prepare import LMS_EXTRACT, DASHBOARD_DIR, create_table, save_files, summarise, labour_market_status_variables, long_term_unemployed_variables
from scripts.util.metadata import read_meta, filter_for_dataset, extract_dates

Load in the LMS data

In [3]:
lms_extract = pd.read_csv(
    LMS_EXTRACT,
    parse_dates=['lms_period']
)

Extract relevant metrics, pivot and save file for labour market

In [4]:
labour_market_status = lms_extract.pipe(
    create_table, labour_market_status_variables
).pipe(
    save_files, 'labour_market_status'
)
labour_market_status

variable,age_16_to_17_not_in_ft_education_in_employment_sa,age_18_to_24_not_in_ft_education_in_employment_sa,age_16_to_24_not_in_ft_education_in_employment_sa,men_16_to_24_not_in_ft_education_in_employment_sa,women_16_to_24_not_in_ft_education_in_employment_sa,age_16_to_24_in_ft_education_in_employment_sa,age_16_to_17_not_in_ft_education_unemployed_sa,age_18_to_24_not_in_ft_education_unemployed_sa,age_16_to_24_not_in_ft_education_unemployed_sa,men_16_to_24_not_in_ft_education_unemployed_sa,...,men_16_to_24_not_in_ft_education_total_sa,women_16_to_24_not_in_ft_education_total_sa,unemployment_rate_sa,economic_inactivity_rate_sa,age_16_to_24_in_employment_sa,age_16_to_24_unemployed_sa,age_16_to_24_economically_inactive_sa,age_16_to_24_unemployment_rate_sa,quarter_label,quarter_axis_label
lms_period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-01-01,249.0,2765.0,3013.0,1632.0,1381.0,920.0,79.0,329.0,408.0,243.0,...,2017.0,1966.0,5.9,23.3,3933.0,570.0,1756.0,12.7,Dec-Feb 2000,Dec-Feb\n2000
2000-04-01,246.0,2748.0,2994.0,1625.0,1369.0,946.0,85.0,337.0,422.0,255.0,...,2031.0,1942.0,5.7,23.2,3940.0,579.0,1744.0,12.8,Mar-May 2000,Mar-May\n2000
2000-07-01,240.0,2750.0,2989.0,1627.0,1362.0,951.0,77.0,316.0,393.0,230.0,...,2011.0,1934.0,5.3,23.3,3941.0,534.0,1801.0,11.9,Jun-Aug 2000,Jun-Aug\n2000
2000-10-01,226.0,2749.0,2976.0,1614.0,1361.0,934.0,85.0,317.0,402.0,237.0,...,2013.0,1939.0,5.3,23.5,3910.0,553.0,1842.0,12.4,Sep-Nov 2000,Sep-Nov\n2000
2001-01-01,241.0,2735.0,2976.0,1622.0,1354.0,952.0,82.0,324.0,406.0,251.0,...,2030.0,1928.0,5.2,23.3,3928.0,550.0,1856.0,12.3,Dec-Feb 2001,Dec-Feb\n2001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-04-01,76.0,2741.0,2817.0,1463.0,1354.0,925.0,25.0,255.0,281.0,182.0,...,1948.0,1774.0,3.8,21.1,3742.0,434.0,2645.0,10.4,Mar-May 2022,Mar-May\n2022
2022-07-01,74.0,2716.0,2789.0,1448.0,1342.0,952.0,22.0,203.0,225.0,143.0,...,1874.0,1736.0,3.6,21.7,3741.0,372.0,2724.0,9.0,Jun-Aug 2022,Jun-Aug\n2022
2022-10-01,66.0,2661.0,2727.0,1397.0,1330.0,965.0,22.0,282.0,303.0,191.0,...,1897.0,1763.0,3.7,21.5,3692.0,461.0,2686.0,11.1,Sep-Nov 2022,Sep-Nov\n2022
2023-01-01,74.0,2746.0,2821.0,1492.0,1328.0,955.0,25.0,298.0,323.0,203.0,...,2011.0,1824.0,3.9,21.1,3776.0,459.0,2604.0,10.8,Dec-Feb 2023,Dec-Feb\n2023


Extract relevant metrics for long-term unemployment, pivot and save file

In [5]:
def construct_16_24_stats(data):
    data['age_16_to_24_unemployed_sa'] = (data.age_18_to_24_unemployed_sa + data.age_16_to_17_unemployed_sa).round(0)
    data['age_16_to_24_unemployed_6_to_12_months_sa'] = (data.age_18_to_24_unemployed_6_to_12_months_sa + data.age_16_to_17_unemployed_6_to_12_months_sa).round(0)
    data['age_16_to_24_unemployed_over_12_months_sa'] = (data.age_18_to_24_unemployed_over_12_months_sa + data.age_16_to_17_unemployed_over_12_months_sa).round(0)
    data['age_16_to_24_unemployed_6_to_12_months_rate_sa'] = (data['age_16_to_24_unemployed_6_to_12_months_sa'] / \
      data['age_16_to_24_unemployed_sa'] * 100).round(1)
    data['age_16_to_24_unemployed_over_12_months_rate_sa'] = (data['age_16_to_24_unemployed_over_12_months_sa'] / \
      data['age_16_to_24_unemployed_sa'] * 100).round(1)

    # Handle missing data by using the 18-24 figure instead
    data['missing_over_12_months_data'] = data.age_16_to_24_unemployed_over_12_months_sa.isna()
    data.age_16_to_24_unemployed_over_12_months_sa = data.age_16_to_24_unemployed_over_12_months_sa.mask(
        data.missing_over_12_months_data,
        data.age_18_to_24_unemployed_over_12_months_sa
    )
    data.age_16_to_24_unemployed_over_12_months_rate_sa = data.age_16_to_24_unemployed_over_12_months_rate_sa.mask(
        data.missing_over_12_months_data,
        data.age_18_to_24_unemployed_over_12_months_rate_sa.round(1)
    )
    return data

def add_axis_labels(data):
    data['quarter_axis_label'] = data.quarter_label.str.replace(' ', '\\n')
    return data    

In [6]:
long_term_unemployed = lms_extract.pipe(create_table, long_term_unemployed_variables).pipe(construct_16_24_stats).pipe(add_axis_labels)
long_term_unemployed.pipe(save_files, 'long_term_unemployed')

variable,age_16_to_17_unemployed_sa,age_18_to_24_unemployed_sa,age_16_to_17_unemployed_6_to_12_months_sa,age_16_to_17_unemployed_over_12_months_sa,age_16_to_17_unemployed_over_12_months_rate_sa,age_18_to_24_unemployed_6_to_12_months_sa,age_18_to_24_unemployed_over_12_months_sa,age_18_to_24_unemployed_over_12_months_rate_sa,quarter_label,quarter_axis_label,age_16_to_24_unemployed_sa,age_16_to_24_unemployed_6_to_12_months_sa,age_16_to_24_unemployed_over_12_months_sa,age_16_to_24_unemployed_6_to_12_months_rate_sa,age_16_to_24_unemployed_over_12_months_rate_sa,missing_over_12_months_data
lms_period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2000-01-01,172.0,398.0,23.0,10.0,5.6,63.0,56.0,14.2,Dec-Feb 2000,Dec-Feb\n2000,570.0,86.0,66.0,15.1,11.6,False
2000-04-01,177.0,402.0,24.0,9.0,5.2,53.0,65.0,16.2,Mar-May 2000,Mar-May\n2000,579.0,77.0,74.0,13.3,12.8,False
2000-07-01,163.0,371.0,25.0,12.0,7.1,59.0,62.0,16.8,Jun-Aug 2000,Jun-Aug\n2000,534.0,84.0,74.0,15.7,13.9,False
2000-10-01,171.0,382.0,20.0,11.0,6.5,56.0,52.0,13.7,Sep-Nov 2000,Sep-Nov\n2000,553.0,76.0,63.0,13.7,11.4,False
2001-01-01,160.0,390.0,22.0,9.0,5.8,61.0,53.0,13.5,Dec-Feb 2001,Dec-Feb\n2001,550.0,83.0,62.0,15.1,11.3,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-04-01,96.0,338.0,15.0,5.0,4.9,21.0,59.0,17.4,Mar-May 2022,Mar-May\n2022,434.0,36.0,64.0,8.3,14.7,False
2022-07-01,95.0,277.0,10.0,6.0,6.5,33.0,55.0,19.8,Jun-Aug 2022,Jun-Aug\n2022,372.0,43.0,61.0,11.6,16.4,False
2022-10-01,103.0,358.0,9.0,,,51.0,52.0,14.5,Sep-Nov 2022,Sep-Nov\n2022,461.0,60.0,52.0,13.0,14.5,True
2023-01-01,82.0,377.0,9.0,5.0,5.7,36.0,52.0,13.7,Dec-Feb 2023,Dec-Feb\n2023,459.0,45.0,57.0,9.8,12.4,False


Create a summary file

In [7]:
summary = pd.concat([
    summarise(
      long_term_unemployed=long_term_unemployed,
      labour_market_status=labour_market_status,
    ),
    read_meta().pipe(filter_for_dataset, 'LMS').pipe(extract_dates),
])

summary.to_json(os.path.join(DASHBOARD_DIR, 'latest.json'), indent=2, date_format='iso')

In [8]:
summary

unemployment_rate_all_working_age                                      4.1
economic_inactivity_rate_all_working_age                              20.8
unemployment_rate_young_people                                        11.4
economic_inactivity_rate_young_people                                 38.0
unemployment_rate_not_in_full_time_education                          10.6
economic_inactivity_rate_not_in_full_time_education                   17.8
long_term_unemployment_rate_6_to_12_months                            19.5
long_term_unemployment_rate_over_12_months                            53.0
most_recent_lms_period                                        Mar-May 2023
last_update                                            2023-07-11 00:00:00
next_update                                            2023-08-15 00:00:00
dtype: object