In [2]:
# LABOUR FLOW CHARTS
# To contribute to a descriptive paper on UK business dynamics since the pandemic
# Will Shepherd, Nov 2025

import pandas as pd
import geopandas as gpd
import numpy as np
import altair as alt
from pandas.api.types import CategoricalDtype
import os
import eco_style 
alt.themes.enable("light")

ThemeRegistry.enable('light')

In [31]:
# Read in data

flows_df = pd.read_excel("labourmarketflows_nov2025.xls",sheet_name='Labour market flows SA', skiprows=6, skipfooter=9)

flows_df = flows_df[['Unnamed: 0','Still in Employment','Levels','Rates']]
flows_df = flows_df.rename(columns={'Unnamed: 0':'DATE'})

col_name = 'DATE'

# Extract the month and year and format it as 'Month Year'
flows_df['Temp_Date'] = flows_df[col_name].str.replace(r'-\w{3}\s', ' ', regex=True)

# Convert the 'Month Year' string to a standard pandas datetime (Timestamp).
# We specify the format of the string: '%b' for abbreviated month name, '%Y' for 4-digit year.
flows_df['Datetime'] = pd.to_datetime(flows_df['Temp_Date'], format='%b %Y')

# Convert the Timestamp to a PeriodIndex with Quarterly frequency ('Q').
#flows_df['YearQuarter'] = flows_df['Datetime'].dt.to_period('Q')

flows_df

Unnamed: 0,DATE,Still in Employment,Levels,Rates,Temp_Date,Datetime
0,Oct-Dec 2001,2.637576e+07,821003.280549,3.080762,Oct 2001,2001-10-01
1,Jan-Mar 2002,2.637546e+07,813983.073192,3.054621,Jan 2002,2002-01-01
2,Apr-Jun 2002,2.642387e+07,781956.599578,2.929035,Apr 2002,2002-04-01
3,Jul-Sep 2002,2.652558e+07,763238.371703,2.847630,Jul 2002,2002-07-01
4,Oct-Dec 2002,2.661772e+07,760258.705350,2.825146,Oct 2002,2002-10-01
...,...,...,...,...,...,...
91,Jul-Sep 2024,3.117688e+07,566755.472198,1.770062,Jul 2024,2024-07-01
92,Oct-Dec 2024,3.132441e+07,653938.611286,2.031127,Oct 2024,2024-10-01
93,Jan-Mar 2025,3.137927e+07,704609.928793,2.183493,Jan 2025,2025-01-01
94,Apr-Jun 2025,3.148927e+07,749666.431001,2.309961,Apr 2025,2025-04-01


In [25]:
chart = alt.Chart(flows_df).mark_line().encode(
    x=alt.X('Datetime:T'),
    y=alt.Y('Rates:Q')
)

chart

In [41]:
# Introduce industry moves as well

industry_flows_df = pd.read_excel("labourmarketflows_nov2025.xls",sheet_name='Industry moves', skiprows=5, skipfooter=12)
industry_flows_df = industry_flows_df.rename(columns={'Unnamed: 0':'DATE'})

industry_flows_df = flows_df.merge(industry_flows_df, on='DATE')
industry_flows_df['Different section rate'] = industry_flows_df['Moves to a different section'] / industry_flows_df['Still in Employment']
industry_flows_df['Rates'] = industry_flows_df['Rates'] / 100
industry_flows_df = industry_flows_df.rename(columns={'Rates':'New job',
                                                      'Different section rate':'New job and industry'})

industry_flows_df

Unnamed: 0,DATE,Still in Employment,Levels,New job,Temp_Date,Datetime,Total moves12,Moves to same section,Moves to a different section,New job and industry
0,Oct-Dec 2001,2.637576e+07,821003.280549,0.030808,Oct 2001,2001-10-01,940588.057073,421608.611182,514071.630661,0.019490
1,Jan-Mar 2002,2.637546e+07,813983.073192,0.030546,Jan 2002,2002-01-01,709158.921748,316498.635507,390531.093988,0.014807
2,Apr-Jun 2002,2.642387e+07,781956.599578,0.029290,Apr 2002,2002-04-01,737276.445725,332493.623099,400673.852559,0.015163
3,Jul-Sep 2002,2.652558e+07,763238.371703,0.028476,Jul 2002,2002-07-01,793098.286426,349677.593949,440645.769725,0.016612
4,Oct-Dec 2002,2.661772e+07,760258.705350,0.028251,Oct 2002,2002-10-01,872448.594061,399469.069877,467938.774418,0.017580
...,...,...,...,...,...,...,...,...,...,...
90,Jul-Sep 2024,3.117688e+07,566755.472198,0.017701,Jul 2024,2024-07-01,578882.000000,307213.000000,266171.000000,0.008537
91,Oct-Dec 2024,3.132441e+07,653938.611286,0.020311,Oct 2024,2024-10-01,758927.000000,427075.000000,326471.000000,0.010422
92,Jan-Mar 2025,3.137927e+07,704609.928793,0.021835,Jan 2025,2025-01-01,633624.000000,357737.000000,275887.000000,0.008792
93,Apr-Jun 2025,3.148927e+07,749666.431001,0.023100,Apr 2025,2025-04-01,708565.000000,385416.000000,314347.000000,0.009983


In [51]:
# Plot job-to-job moves AND sector moves
industry_flows_to_plot = industry_flows_df.melt(id_vars='Datetime',value_vars=['New job','New job and industry'])

chart = alt.Chart(industry_flows_to_plot).mark_line().encode(
    x=alt.X('Datetime:T'),
    y=alt.Y('value:Q', title='Proportion of workers moving to a new job per quarter: UK', axis=alt.Axis(format=('%'))),
    color=alt.Color('variable:O', legend=None)
)

# Add end labels
end_point = industry_flows_to_plot.groupby(
    ['variable']
)['Datetime'].idxmax()

end_point_data = industry_flows_to_plot.loc[end_point]

text_labels = alt.Chart(end_point_data).mark_text(
    align='left',     
    dx=5,           
    baseline='middle'
).encode(
    x=alt.X('Datetime:T'),         
    y=alt.Y('value:Q', axis=alt.Axis(format='%')),   
    text='variable:N',
    color=alt.Color('variable:N')
)

final_chart = chart + text_labels

final_chart.save('Charts/job_to_job_moves.png', scale_factor=2)
final_chart.save('Charts/job_to_job_moves.json')
