In [1]:
import os
import sys
import pandas as pd

TOP_DIR=os.path.abspath("../../")
if not TOP_DIR in sys.path: sys.path.append(TOP_DIR)

In [2]:
from prepare import LMS_EXTRACT, DASHBOARD_DIR, create_table, save_files, summarise, labour_market_status_variables, long_term_unemployed_variables
from scripts.util.metadata import read_meta, filter_for_dataset, extract_dates

Load in the LMS data

In [3]:
lms_extract = pd.read_csv(
    LMS_EXTRACT,
    parse_dates=['lms_period']
)

Extract relevant metrics, pivot and save file for labour market

In [4]:
labour_market_status = lms_extract.pipe(
    create_table, labour_market_status_variables
).pipe(
    save_files, 'labour_market_status'
)
labour_market_status

variable,age_16_to_17_not_in_ft_education_in_employment_sa,age_18_to_24_not_in_ft_education_in_employment_sa,age_16_to_24_not_in_ft_education_in_employment_sa,men_16_to_24_not_in_ft_education_in_employment_sa,women_16_to_24_not_in_ft_education_in_employment_sa,age_16_to_24_in_ft_education_in_employment_sa,age_16_to_17_not_in_ft_education_unemployed_sa,age_18_to_24_not_in_ft_education_unemployed_sa,age_16_to_24_not_in_ft_education_unemployed_sa,men_16_to_24_not_in_ft_education_unemployed_sa,...,men_16_to_24_not_in_ft_education_total_sa,women_16_to_24_not_in_ft_education_total_sa,unemployment_rate_sa,economic_inactivity_rate_sa,age_16_to_24_in_employment_sa,age_16_to_24_unemployed_sa,age_16_to_24_economically_inactive_sa,age_16_to_24_unemployment_rate_sa,quarter_label,quarter_axis_label
lms_period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-02-01,251.0,2756.0,3006.0,1638.0,1369.0,925.0,78.0,335.0,413.0,241.0,...,2025.0,1959.0,5.9,23.2,3931.0,581.0,1748.0,12.9,Jan-Mar 2000,Jan-Mar\n2000
2000-05-01,242.0,2723.0,2965.0,1612.0,1353.0,955.0,76.0,330.0,406.0,249.0,...,2016.0,1918.0,5.6,23.3,3921.0,552.0,1792.0,12.3,Apr-Jun 2000,Apr-Jun\n2000
2000-08-01,234.0,2755.0,2989.0,1626.0,1363.0,956.0,85.0,316.0,401.0,231.0,...,2023.0,1933.0,5.4,23.2,3945.0,545.0,1795.0,12.1,Jul-Sep 2000,Jul-Sep\n2000
2000-11-01,233.0,2744.0,2977.0,1625.0,1352.0,927.0,77.0,318.0,395.0,236.0,...,2013.0,1933.0,5.3,23.5,3903.0,548.0,1863.0,12.3,Oct-Dec 2000,Oct-Dec\n2000
2001-02-01,245.0,2744.0,2989.0,1628.0,1361.0,953.0,80.0,322.0,402.0,248.0,...,2035.0,1936.0,5.1,23.4,3942.0,536.0,1866.0,12.0,Jan-Mar 2001,Jan-Mar\n2001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-05-01,75.0,2708.0,2782.0,1442.0,1340.0,925.0,23.0,246.0,269.0,182.0,...,1908.0,1762.0,3.9,21.4,3708.0,429.0,2687.0,10.4,Apr-Jun 2022,Apr-Jun\n2022
2022-08-01,69.0,2683.0,2752.0,1424.0,1328.0,942.0,19.0,234.0,253.0,164.0,...,1899.0,1739.0,3.7,21.6,3694.0,399.0,2746.0,9.8,Jul-Sep 2022,Jul-Sep\n2022
2022-11-01,61.0,2668.0,2729.0,1406.0,1323.0,971.0,26.0,295.0,321.0,203.0,...,1930.0,1799.0,3.8,21.4,3700.0,471.0,2667.0,11.3,Oct-Dec 2022,Oct-Dec\n2022
2023-02-01,84.0,2750.0,2834.0,1507.0,1327.0,964.0,20.0,311.0,331.0,199.0,...,1998.0,1818.0,4.0,21.0,3798.0,482.0,2561.0,11.3,Jan-Mar 2023,Jan-Mar\n2023


Extract relevant metrics for long-term unemployment, pivot and save file

In [5]:
def construct_16_24_stats(data):
    data['age_16_to_24_unemployed_sa'] = (data.age_18_to_24_unemployed_sa + data.age_16_to_17_unemployed_sa).round(0)
    data['age_16_to_24_unemployed_6_to_12_months_sa'] = (data.age_18_to_24_unemployed_6_to_12_months_sa + data.age_16_to_17_unemployed_6_to_12_months_sa).round(0)
    data['age_16_to_24_unemployed_over_12_months_sa'] = (data.age_18_to_24_unemployed_over_12_months_sa + data.age_16_to_17_unemployed_over_12_months_sa).round(0)
    data['age_16_to_24_unemployed_6_to_12_months_rate_sa'] = (data['age_16_to_24_unemployed_6_to_12_months_sa'] / \
      data['age_16_to_24_unemployed_sa'] * 100).round(1)
    data['age_16_to_24_unemployed_over_12_months_rate_sa'] = (data['age_16_to_24_unemployed_over_12_months_sa'] / \
      data['age_16_to_24_unemployed_sa'] * 100).round(1)

    # Handle missing data by using the 18-24 figure instead
    data['missing_over_12_months_data'] = data.age_16_to_24_unemployed_over_12_months_sa.isna()
    data.age_16_to_24_unemployed_over_12_months_sa = data.age_16_to_24_unemployed_over_12_months_sa.mask(
        data.missing_over_12_months_data,
        data.age_18_to_24_unemployed_over_12_months_sa
    )
    data.age_16_to_24_unemployed_over_12_months_rate_sa = data.age_16_to_24_unemployed_over_12_months_rate_sa.mask(
        data.missing_over_12_months_data,
        data.age_18_to_24_unemployed_over_12_months_rate_sa.round(1)
    )
    return data

def add_axis_labels(data):
    data['quarter_axis_label'] = data.quarter_label.str.replace(' ', '\\n')
    return data    

In [6]:
long_term_unemployed = lms_extract.pipe(create_table, long_term_unemployed_variables).pipe(construct_16_24_stats).pipe(add_axis_labels)
long_term_unemployed.pipe(save_files, 'long_term_unemployed')

variable,age_16_to_17_unemployed_sa,age_18_to_24_unemployed_sa,age_16_to_17_unemployed_6_to_12_months_sa,age_16_to_17_unemployed_over_12_months_sa,age_16_to_17_unemployed_over_12_months_rate_sa,age_18_to_24_unemployed_6_to_12_months_sa,age_18_to_24_unemployed_over_12_months_sa,age_18_to_24_unemployed_over_12_months_rate_sa,quarter_label,quarter_axis_label,age_16_to_24_unemployed_sa,age_16_to_24_unemployed_6_to_12_months_sa,age_16_to_24_unemployed_over_12_months_sa,age_16_to_24_unemployed_6_to_12_months_rate_sa,age_16_to_24_unemployed_over_12_months_rate_sa,missing_over_12_months_data
lms_period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2000-02-01,175.0,407.0,23.0,8.0,4.8,60.0,57.0,13.9,Jan-Mar 2000,Jan-Mar\n2000,582.0,83.0,65.0,14.3,11.2,False
2000-05-01,165.0,386.0,26.0,10.0,6.2,56.0,64.0,16.5,Apr-Jun 2000,Apr-Jun\n2000,551.0,82.0,74.0,14.9,13.4,False
2000-08-01,173.0,373.0,23.0,13.0,7.3,53.0,61.0,16.4,Jul-Sep 2000,Jul-Sep\n2000,546.0,76.0,74.0,13.9,13.6,False
2000-11-01,161.0,387.0,21.0,11.0,6.6,60.0,50.0,12.8,Oct-Dec 2000,Oct-Dec\n2000,548.0,81.0,61.0,14.8,11.1,False
2001-02-01,153.0,383.0,16.0,10.0,6.5,56.0,55.0,14.4,Jan-Mar 2001,Jan-Mar\n2001,536.0,72.0,65.0,13.4,12.1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-05-01,103.0,326.0,13.0,2.0,2.0,26.0,59.0,18.1,Apr-Jun 2022,Apr-Jun\n2022,429.0,39.0,61.0,9.1,14.2,False
2022-08-01,90.0,309.0,11.0,7.0,7.3,39.0,55.0,17.6,Jul-Sep 2022,Jul-Sep\n2022,399.0,50.0,62.0,12.5,15.5,False
2022-11-01,101.0,370.0,8.0,4.0,4.3,44.0,54.0,14.7,Oct-Dec 2022,Oct-Dec\n2022,471.0,52.0,58.0,11.0,12.3,False
2023-02-01,87.0,395.0,13.0,,,53.0,58.0,14.8,Jan-Mar 2023,Jan-Mar\n2023,482.0,66.0,58.0,13.7,14.8,True


Create a summary file

In [7]:
summary = pd.concat([
    summarise(
      long_term_unemployed=long_term_unemployed,
      labour_market_status=labour_market_status,
    ),
    read_meta().pipe(filter_for_dataset, 'LMS').pipe(extract_dates),
])

summary.to_json(os.path.join(DASHBOARD_DIR, 'latest.json'), indent=2, date_format='iso')

In [8]:
summary

unemployment_rate_all_working_age                                      4.3
economic_inactivity_rate_all_working_age                              20.9
unemployment_rate_young_people                                        12.3
economic_inactivity_rate_young_people                                 38.0
unemployment_rate_not_in_full_time_education                          11.6
economic_inactivity_rate_not_in_full_time_education                   17.3
long_term_unemployment_rate_6_to_12_months                            19.3
long_term_unemployment_rate_over_12_months                            65.0
most_recent_lms_period                                        Apr-Jun 2023
last_update                                            2023-08-15 00:00:00
next_update                                            2023-09-12 00:00:00
dtype: object