In [16]:
# HEADLINE DYNAMISM CHARTS 1997-2023
# To contribute to a descriptive paper on UK business dynamics since the pandemic
# Will Shepherd, Nov 2025

import pandas as pd
import geopandas as gpd
import numpy as np
import altair as alt
from pandas.api.types import CategoricalDtype
import os
import eco_style 
alt.themes.enable("light")

ThemeRegistry.enable('light')

In [3]:
# Import data
whole_economy_df = pd.read_excel('business_dynamism_BSD_1997_2023.xlsx', sheet_name='whole_economy')
firm_size_df = pd.read_excel('business_dynamism_BSD_1997_2023.xlsx', sheet_name='firm_size')
firm_age_df = pd.read_excel('business_dynamism_BSD_1997_2023.xlsx', sheet_name='firm_age')
industry_df = pd.read_excel('business_dynamism_BSD_1997_2023.xlsx', sheet_name='industry')
region_df = pd.read_excel('business_dynamism_BSD_1997_2023.xlsx', sheet_name='region')

In [10]:
# Define order for categorical variables

# 1. Employment sizeband
size_order = [
    'Micro (0-9)'
    'Small (10-49)'
    'Medium (50-249)'
    'Large (250+)'
]

# 2. Age group
age_order = [
    'New (0-2 years)',
    'Young (3-5 years)',
    'Old (5-10 years)',
    'Mature (10+ years)'
]

#Â What are our measures of dynamism?

The data output from BSD is in counts (of firms). We want to calculate rates to measure dynamism.

Entry rate = number of entering firms in t / number of total firms in t
Exit rate = number of exiting firms in t/ number of total firms in t
Job creation rate = number of jobs created by firms in t/ number of total jobs in t-1
Job destruction rate = number of jobs destroyed by firms in t / number of total jobs in t-1
Reallocation rate = job creation rate + job destruction rate

In [8]:
# Write function to calculate rates for dynamism measures, apply this across dataframes
def calculate_dynamism_rates(df, group_by_cols=None):
    # Make a copy to avoid modifying the original
    df = df.copy()
    
    # Sort data
    sort_cols = group_by_cols + ['year'] if group_by_cols else ['year']
    df = df.sort_values(sort_cols)
    
    # Create lagged employment (with or without grouping)
    if group_by_cols is None:
        df['total_employment_lagged'] = df['employment'].shift(1)
    else:
        df['total_employment_lagged'] = df.groupby(group_by_cols)['employment'].shift(1)
    
    # Calculate rates (same regardless of grouping)
    df['Entry rate'] = (df['n_entrants'] + df['n_entry_and_exit']) / df['n_firms']
    df['Exit rate'] = (df['n_exiters'] + df['n_entry_and_exit']) / df['n_firms']
    df['Job creation rate'] = (df['jc_incumbents'] + df['jc_entrants']) / df['total_employment_lagged']
    df['Job destruction rate'] = (df['jd_incumbents'] + df['jd_exiters']) / df['total_employment_lagged']
    df['Entry job creation rate'] = (df['jc_entrants']) / df['total_employment_lagged']
    df['Incumbent job creation rate'] = (df['jc_incumbents']) / df['total_employment_lagged']
    df['Exit job destruction rate'] = (df['jd_exiters']) / df['total_employment_lagged']
    df['Incumbent job destruction rate'] = (df['jd_incumbents']) / df['total_employment_lagged']


    # We can't use the first/last year for dynamic variables due to no backward/forward looking observatinons
    years = df['year'].unique()
    df = df[~df['year'].isin([years.min(), years.max()])]

    return df

# Apply function to dataframes
whole_economy_dynamism = calculate_dynamism_rates(whole_economy_df)
firm_size_dynamism = calculate_dynamism_rates(firm_size_df, group_by_cols=['emp_sizeband'])
firm_age_dynamism = calculate_dynamism_rates(firm_age_df, group_by_cols=['age_group'])
industry_dynamism = calculate_dynamism_rates(industry_df, group_by_cols=['industry_name'])
region_dynamism = calculate_dynamism_rates(region_df, group_by_cols=['region'])

In [6]:
# How many firms do we see in the BSD each year?

chart = alt.Chart(whole_economy_df).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
        labelExpr="datum.value % 2 == 0 ? datum.label : ''",
        labelAngle=0)),
    y=alt.Y('n_firms:Q',
          title='Number of firms in market economy',
          # 1. Set the lower y-axis limit
          scale=alt.Scale(domainMin=1700000),
          # 2. Format the axis labels to millions (e.g., "1.5M")
          axis=alt.Axis(format=".2s")
         )
)

chart

#chart.save('Charts/total_annual_firm_count.json')
#chart.save('Charts/total_annual_firm_count.png', scale_factor=2)

In [24]:
# What is the total employment of firms on the BSD each year?


chart = alt.Chart(whole_economy_df).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
        labelExpr="datum.value % 2 == 0 ? datum.label : ''",
        labelAngle=0)),
    y=alt.Y('employment:Q',
          title='Total employment in market economy firms',
          # 1. Set the lower y-axis limit
          scale=alt.Scale(domainMin=17000000),
          # 2. Format the axis labels to millions (e.g., "1.5M")
          axis=alt.Axis(format=".2s")
         )
)

chart.save('Charts/total_firm_employment.png', scale_factor=2)
chart.save('Charts/total_firm_employment.json')

# What do we anticipate has happened to firm entry and exit?



In [40]:
# ENTRY AND EXIT HEADLINE
entry_exit_df = whole_economy_dynamism.melt(id_vars=['year'],
                                                 value_vars=['Entry rate','Exit rate'])

chart = alt.Chart(entry_exit_df).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 == 0 ? datum.label : ''",  # Show every 2nd year
            labelAngle=0)),
    y=alt.Y('value:Q', axis=alt.Axis(format='%'), title='Market Sector Firm Entry and Exit Rate'),
    color=alt.Color('variable:N', title=None)

)

chart.save('Charts/entry_exit_we.png', scale_factor=2)
chart.save('Charts/entry_exit_we.json')

In [7]:
# Plot just entry rate
entry_df = whole_economy_dynamism.melt(id_vars=['year'],
                                                 value_vars=['Entry rate'])

chart = alt.Chart(entry_df).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 == 0 ? datum.label : ''",  # Show every 2nd year
            labelAngle=0)),
    y=alt.Y('value:Q', axis=alt.Axis(format='%'), title='Market Sector Firm Entry and Exit Rate'),
    color=alt.Color('variable:N', title=None)

)


chart.save('Charts/entry_we.png', scale_factor=2)
chart.save('Charts/entry_we.json')

In [8]:
entry_df

Unnamed: 0,year,variable,value
0,1998,Entry rate,0.153417
1,1999,Entry rate,0.125709
2,2000,Entry rate,0.131868
3,2001,Entry rate,0.128969
4,2002,Entry rate,0.129715
5,2003,Entry rate,0.134013
6,2004,Entry rate,0.158925
7,2005,Entry rate,0.150314
8,2006,Entry rate,0.14375
9,2007,Entry rate,0.150122


In [18]:
# ENTRY RATES FOR ALL SIZE FIRMS

entry_firmsize_chart = alt.Chart(firm_size_dynamism).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 == 0 ? datum.label : ''",  # Show every 2nd year
            labelAngle=0)),
    y=alt.Y('Entry rate:Q', axis=alt.Axis(format='%')),
    color=alt.Color('emp_sizeband:O', sort=size_order, legend=alt.Legend(orient="top"))
)

exit_firmsize_chart = alt.Chart(firm_size_dynamism).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 == 0 ? datum.label : ''",  # Show every 2nd year
            labelAngle=0)),
    y=alt.Y('Exit rate:Q', axis=alt.Axis(format='%')),
    color=alt.Color('emp_sizeband:O', title='Firm size (employment)', sort=size_order, legend=alt.Legend(orient="top"))
)

combined_chart = alt.hconcat(entry_firmsize_chart, exit_firmsize_chart)

combined_chart
#combined_chart.save('Charts/entry_exit_firmsize.png', scale_factor=2)
##combined_chart.save('Charts/entry_exit_firmsize.json')

In [15]:
# ENTRY AND EXIT RATES ON ONE PLOT FACETED BY FIRM SIZE

# First 
entry_exit_firmsize_df = firm_size_dynamism.melt(id_vars=['year','emp_sizeband'],
                                                 value_vars=['Entry rate','Exit rate'])



chart = alt.Chart(entry_exit_firmsize_df).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 == 0 ? datum.label : ''",  # Show every 2nd year
            labelAngle=0)),
    y=alt.Y('value:Q', axis=alt.Axis(format='%'), title=None),
    color=alt.Color('variable:O', title=None, legend=alt.Legend(orient="right", titleFontSize=13,   
                labelFontSize=13,    
                symbolStrokeWidth=3)),
    facet=alt.Facet('emp_sizeband:O', columns=2, sort=size_order, header=alt.Header(
            title='Entry and exit rates by firm size (employment)',
            titleFontSize=16,
            labelFontSize=12
        ))
)
chart
#chart.save('Charts/entry_exit_firmsize_faceted.png', scale_factor=2)
#chart.save('Charts/entry_exit_firmsize_faceted.json')


In [56]:
# ENTRY RATES FOR ALL AGE FIRMS

entry_age_chart = alt.Chart(firm_age_dynamism).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 == 0 ? datum.label : ''",  # Show every 2nd year
            labelAngle=0)),
    y=alt.Y('Entry rate:Q', axis=alt.Axis(format='%')),
    color=alt.Color('age_group:O', title='Age of firm', sort=age_order, legend=alt.Legend(orient="top"))
)

exit_age_chart = alt.Chart(firm_age_dynamism).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 == 0 ? datum.label : ''",  # Show every 2nd year
            labelAngle=0)),
    y=alt.Y('Exit rate:Q', axis=alt.Axis(format='%')),
    color=alt.Color('age_group:O', title='Age of firm', sort=age_order, legend=alt.Legend(orient="top"))
)

combined_chart = alt.hconcat(entry_age_chart, exit_age_chart)

combined_chart.save('Charts/entry_exit_firmage.json')
combined_chart.save('Charts/entry_exit_firmage.png', scale_factor=2)

In [60]:
# ENTRY AND EXIT RATES ON ONE PLOT FACETED BY FIRM SIZE

# First 
entry_exit_age_df = firm_age_dynamism.melt(id_vars=['year','age_group'],
                                                 value_vars=['Entry rate','Exit rate'])

chart = alt.Chart(entry_exit_age_df).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 == 0 ? datum.label : ''",  # Show every 2nd year
            labelAngle=0)),
    y=alt.Y('value:Q', axis=alt.Axis(format='%'), title=None),
    color=alt.Color('variable:O', title=None, legend=alt.Legend(orient="right", titleFontSize=13,   
                labelFontSize=13,    
                symbolStrokeWidth=3)),
    facet=alt.Facet('age_group:N', columns=2, sort=age_order,header=alt.Header(
            title='Entry and exit by firm age',
            titleFontSize=16,
            labelFontSize=12
        )),
)
chart
chart.save('Charts/entry_exit_firmage_faceted.json')
chart.save('Charts/entry_exit_firmage_faceted.png', scale_factor=2)


In [42]:
industry_dynamism.head()

Unnamed: 0,year,industry_name,n_firms,employment,n_entrants,n_exiters,n_entry_and_exit,n_incumbents,jc_entrants,jc_incumbents,...,jd_incumbents,site_exp_entrants,site_exp_incumbents,site_closure_exit,site_closure_incumbents,total_employment_lagged,Entry rate,Exit rate,Job creation rate,Job destruction rate
12,1998,Automotives,79929,600582,7677,6250,1648,64354,28745,53493,...,22317,390,2280,2524,767,621861.0,0.116666,0.098813,0.132245,0.101185
24,1999,Automotives,79415,601788,5718,8792,1647,63258,27378,39655,...,17367,339,1589,2535,277,600582.0,0.092741,0.131449,0.111613,0.105408
36,2000,Automotives,76664,588527,6067,7046,1580,61971,21904,43127,...,28683,287,1896,2797,797,601788.0,0.099747,0.112517,0.108063,0.114869
48,2001,Automotives,75088,590836,5808,6767,1280,61233,18366,43389,...,23586,185,2755,2951,507,588527.0,0.094396,0.107168,0.104931,0.111191
60,2002,Automotives,74424,599606,6059,6666,1255,60444,25596,61739,...,37005,281,2816,2820,570,590836.0,0.098275,0.106431,0.147816,0.135244


In [61]:
# ENTRY RATES FOR ALL INDUSTRY FIRMS

entry_industry_chart = alt.Chart(industry_dynamism).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 == 0 ? datum.label : ''",  # Show every 2nd year
            labelAngle=0)),
    y=alt.Y('Entry rate:Q', axis=alt.Axis(format='%')),
    color=alt.Color('industry_name:N', legend=alt.Legend(orient="top"))
)

exit_industry_chart = alt.Chart(industry_dynamism).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 == 0 ? datum.label : ''",  # Show every 2nd year
            labelAngle=0)),
    y=alt.Y('Exit rate:Q', axis=alt.Axis(format='%')),
    color=alt.Color('industry_name:N', legend=alt.Legend(orient="top"))
)

combined_chart = alt.hconcat(entry_industry_chart, exit_industry_chart)

combined_chart
#combined_chart.save('Charts/entry_exit_firmage.json')
#combined_chart.save('Charts/entry_exit_firmage.png', scale_factor=2)

In [68]:
# ENTRY AND EXIT RATES ON ONE PLOT FACETED BY FIRM INDUSTRY

# First 
entry_exit_industry_df = industry_dynamism.melt(id_vars=['year','industry_name'],
                                                 value_vars=['Entry rate','Exit rate'])

chart = alt.Chart(entry_exit_industry_df).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 == 0 ? datum.label : ''",  # Show every 2nd year
            labelAngle=0)),
    y=alt.Y('value:Q', axis=alt.Axis(format='%')),
    color=alt.Color('variable:O', title=None, legend=alt.Legend(orient="right", titleFontSize=18,   
                labelFontSize=18,    
                symbolStrokeWidth=3)),
    facet=alt.Facet('industry_name:N', columns=3, header=alt.Header(
            title='Entry and exit rates by industry',
            titleFontSize=18,
            labelFontSize=14
        )),
)


chart = chart.resolve_axis(
    x='independent'
)

chart
chart.save('Charts/entry_exit_industry_faceted.png', scale_factor=2)
chart.save('Charts/entry_exit_industry_faceted.json')


In [86]:
# ENTRY RATES FOR ALL REGIONS

entry_region_chart = alt.Chart(region_dynamism).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 == 0 ? datum.label : ''",  # Show every 2nd year
            labelAngle=0)),
    y=alt.Y('Entry rate:Q', axis=alt.Axis(format='%')),
    color=alt.Color('region:N', title=None, legend=alt.Legend(columns=6, orient="top"),
                sort=alt.EncodingSortField(
                field="Entry rate", 
                op="mean", 
                order="descending"
            ))
)

exit_region_chart = alt.Chart(region_dynamism).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 == 0 ? datum.label : ''",  # Show every 2nd year
            labelAngle=0)),
    y=alt.Y('Exit rate:Q', axis=alt.Axis(format='%')),
    color=alt.Color('region:N', title=None, legend=alt.Legend(columns=6, orient="top"),
                sort=alt.EncodingSortField(
                field="Entry rate", 
                op="mean", 
                order="descending"
            ))
)

combined_chart = alt.hconcat(entry_region_chart, exit_region_chart)

combined_chart
combined_chart.save('Charts/entry_exit_region.json')
combined_chart.save('Charts/entry_exit_region.png', scale_factor=2)

In [55]:
# ENTRY AND EXIT RATES ON ONE PLOT FACETED BY FIRM REGION

# First 
entry_exit_region_df = region_dynamism.melt(id_vars=['year','region'],
                                                 value_vars=['Entry rate','Exit rate'])

chart = alt.Chart(entry_exit_region_df).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 == 0 ? datum.label : ''",  # Show every 2nd year
            labelAngle=0)),
    y=alt.Y('value:Q', axis=alt.Axis(format='%')),
    color=alt.Color('variable:O', title=None, legend=alt.Legend(orient="top", titleFontSize=14,   
                labelFontSize=12,    
                symbolStrokeWidth=3)),
    facet=alt.Facet('region:N', columns=3, header=alt.Header(
            title='Region'
        )),
)

chart = chart.resolve_axis(
    x='independent',
)

chart
#chart.save('Charts/entry_exit_region_faceted.png', scale_factor=2)
#chart.save('Charts/entry_exit_region_faceted.json')


# What do we anticipate has happened to job creation and destruction?

The previous report found the reallocation of labour declined after the financial crisis and remained below 20%. We now add snapshots 2020, 2021 and 2022. 

The Coronavirus Job Retention Scheme ran from 1 March 2020 and ended on 30 September 2021. BSD snapshots are taken in March each year so both our 2020 and 2021 annual data will cover a period in which firms had access to this support. 

    - In an
    - The furlough scheme would have more impact on employment margins, rather than entry/exit, as firms may have reduced to a skeleton workforce whilst remaining open in absence of a furlough scheme.

Increases in energy costs have been another pressure. 

In [59]:
whole_economy_dynamism.head()

Unnamed: 0,year,n_firms,employment,n_entrants,n_exiters,n_entry_and_exit,n_incumbents,jc_entrants,jc_incumbents,jd_exiters,...,site_closure_incumbents,total_employment_lagged,Entry rate,Exit rate,Job creation rate,Job destruction rate,Entry job creation rate,Incumbent job creation rate,Exit job destruction rate,Incumbent job destruction rate
1,1998,1847011,17709729,234733,160782,48630,1402866,848394,1620884,1074088,...,21672,17612823.0,0.153417,0.113379,0.140198,0.118257,0.048169,0.092029,0.060983,0.057273
2,1999,1873060,17936146,187154,212458,48307,1425141,718589,1291775,1287866,...,6184,17709729.0,0.125709,0.139219,0.113517,0.111397,0.040576,0.072942,0.072721,0.038677
3,2000,1857200,18029246,199710,176270,45195,1436025,739561,1501027,1117071,...,21806,17936146.0,0.131868,0.119247,0.12492,0.111356,0.041233,0.083687,0.06228,0.049075
4,2001,1877929,18416010,196912,197356,45282,1438379,908243,1599402,1090562,...,17641,18029246.0,0.128969,0.129205,0.139088,0.11858,0.050376,0.088712,0.060488,0.058091
5,2002,1879029,18872566,200563,208480,43175,1426811,829886,2097098,1241772,...,23526,18416010.0,0.129715,0.133928,0.158937,0.141893,0.045063,0.113874,0.067429,0.074464


In [88]:
# JOB CREATION AND DESTRUCTION
jc_jd_df = whole_economy_dynamism.melt(id_vars='year',
                                       value_vars=['Entry job creation rate','Incumbent job creation rate','Incumbent job destruction rate','Exit job destruction rate'])


# Define colours for variables and order for plotting
domain_vars = [
    'Entry job creation rate',
    'Incumbent job creation rate',
    'Incumbent job destruction rate',
    'Exit job destruction rate'
]

variable_order_map = {
    'Entry job creation rate': 1,
    'Incumbent job creation rate': 3,
    'Incumbent job destruction rate': 3,
    'Exit job destruction rate': 4
}

# 2. Add the new column to the DataFrame
jc_jd_df['sort_order'] = jc_jd_df['variable'].map(variable_order_map)

# Example colors: two blues for creation, two reds/oranges for destruction
range_colors = [
    "#179FDB", 
    "#0063AF",  
    '#E54753',
    '#ff7f0e',  
]
chart = alt.Chart(jc_jd_df).mark_bar().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 == 0 ? datum.label : ''",  # Show every 2nd year
            labelAngle=0)),
    y=alt.Y('value:Q', title='Job creation and destruction rates', axis=alt.Axis(format='%')),
    color=alt.Color('variable:N', title=None, scale=alt.Scale(domain=domain_vars, range=range_colors)),
    order=alt.Order('sort_order:Q')
)

chart
chart.save('Charts/job_creation_destruction_total.png', scale_factor=2)
chart.save('Charts/job_creation_destruction_total.json')

In [100]:
# JOB CREATION AND DESTRUCTION FACETED BY FIRM SIZE
jc_jd_firmsize_df = firm_size_dynamism.melt(id_vars=['year','emp_sizeband'],
                                       value_vars=['Entry job creation rate','Incumbent job creation rate','Incumbent job destruction rate','Exit job destruction rate'])

# Order columns for plotting
jc_jd_firmsize_df['sort_order'] = jc_jd_firmsize_df['variable'].map(variable_order_map)

size_order_map = [
    'Micro (0-9)'
    'Small (10-49)'
    'Medium (50-249)'
    'Large (250+)'
]

chart = alt.Chart(jc_jd_firmsize_df).mark_bar().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 == 0 ? datum.label : ''",  # Show every 2nd year
            labelAngle=0)),
    y=alt.Y('value:Q', title=None, axis=alt.Axis(format='%')),
    color=alt.Color('variable:N', title=None, scale=alt.Scale(domain=domain_vars, range=range_colors),
                    legend=alt.Legend(orient="right", titleFontSize=14,   
                        labelFontSize=12,    
                        symbolStrokeWidth=3,
                        labelLimit=0)),
    order=alt.Order('sort_order:Q'),
    facet=alt.Facet('emp_sizeband:N', columns=2, sort=size_order_map, header=alt.Header(
            title='Job creation and destruction by firm size (employment)',
            titleFontSize=16,
            labelFontSize=12
        ))
)

chart
chart.save('Charts/job_creation_destruction_firmsize.png', scale_factor=2)
chart.save('Charts/job_creation_destruction_firmsize.json')

In [103]:
# JOB CREATION AND DESTRUCTION FACETED BY FIRM AGE
jc_jd_firmage_df = firm_age_dynamism.melt(id_vars=['year','age_group'],
                                       value_vars=['Entry job creation rate','Incumbent job creation rate','Incumbent job destruction rate','Exit job destruction rate'])


# Order columns for plotting
jc_jd_firmage_df['sort_order'] = jc_jd_firmsize_df['variable'].map(order_map)

age_order_map = [
    'Micro (0-9)'
    'Small (10-49)'
    'Medium (50-249)'
    'Large (250+)'
]

chart = alt.Chart(jc_jd_firmage_df).mark_bar().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 == 0 ? datum.label : ''",  # Show every 2nd year
            labelAngle=0)),
    y=alt.Y('value:Q', title=None, axis=alt.Axis(format='%')),
    color=alt.Color('variable:N', title=None, scale=alt.Scale(domain=domain_vars, range=range_colors),
                    legend=alt.Legend(orient="right", titleFontSize=14,   
                        labelFontSize=12,    
                        symbolStrokeWidth=3,
                        labelLimit=0)),
    order=alt.Order('sort_order:Q'),
    facet=alt.Facet('age_group:N', columns=2, sort=age_order, header=alt.Header(
            title='Job creation and destruction by firm age',
            titleFontSize=16,
            labelFontSize=12
        ))
)

chart
chart.save('Charts/job_creation_destruction_firmage.png', scale_factor=2)
chart.save('Charts/job_creation_destruction_firmage.json')

In [113]:
# ASIDE - HOW MANY FIRMS DO WE SEE OF EACH AGE EACH YEAR?
# I suspect we see a growing number of firms join the Old category which may influence total job reallocation

firm_count_age = firm_age_df.melt(id_vars=['year','age_group'],
                                       value_vars='n_firms')

firm_count_age.head()

Unnamed: 0,year,age_group,variable,value
0,1997,New (0-2 years),n_firms,412053
1,1997,Young (3-5 years),n_firms,387626
2,1997,Mature (6-10 years),n_firms,305141
3,1997,Old (11+ years),n_firms,676471
4,1998,New (0-2 years),n_firms,458590


In [114]:
chart = alt.Chart(firm_count_age).mark_bar().encode(
    x=alt.X('year:O'),
    y=alt.Y('value:Q'),
    color=alt.Color('age_group:N')
)
chart

In [107]:
# JOB CREATION AND DESTRUCTION FACETED BY FIRM INDUSTRY
jc_jd_industry_df = industry_dynamism.melt(id_vars=['year','industry_name'],
                                       value_vars=['Entry job creation rate','Incumbent job creation rate','Incumbent job destruction rate','Exit job destruction rate'])


jc_jd_df.head()



chart = alt.Chart(jc_jd_industry_df).mark_bar().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 == 0 ? datum.label : ''",  # Show every 2nd year
            labelAngle=0)),
    y=alt.Y('value:Q', title=None, axis=alt.Axis(format='%')),
    color=alt.Color('variable:N', title=None, scale=alt.Scale(domain=domain_vars, range=range_colors),
                    legend=alt.Legend(orient="right", titleFontSize=14,   
                        labelFontSize=12,    
                        symbolStrokeWidth=3,
                        labelLimit=0)),
    order=alt.Order('sort_order:Q'),
    facet=alt.Facet('industry_name:N', columns=3, header=alt.Header(
            title='Job creation and destruction by industry',
            titleFontSize=16,
            labelFontSize=12
        ))
)

chart
chart.save('Charts/job_creation_destruction_industry.png', scale_factor=2)
chart.save('Charts/job_creation_destruction_industry.json')

In [110]:
# JOB CREATION AND DESTRUCTION FACETED BY FIRM REGION
jc_jd_region_df = region_dynamism.melt(id_vars=['year','region'],
                                       value_vars=['Entry job creation rate','Incumbent job creation rate','Incumbent job destruction rate','Exit job destruction rate'])


jc_jd_df.head()

# For now just plot from 2001 onwards - North West has extremely abnormal values for 1998, 1999, 2000
exclude_years = [1998, 1999, 2000]
filtered_jc_jd_region_df = jc_jd_region_df[~jc_jd_region_df['year'].isin(exclude_years)]

chart = alt.Chart(filtered_jc_jd_region_df).mark_bar().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 == 0 ? datum.label : ''",  # Show every 2nd year
            labelAngle=0)),
    y=alt.Y('value:Q', title=None, axis=alt.Axis(format='%')),
    color=alt.Color('variable:N', title=None, scale=alt.Scale(domain=domain_vars, range=range_colors),
                    legend=alt.Legend(orient="right", titleFontSize=14,   
                        labelFontSize=12,    
                        symbolStrokeWidth=3,
                        labelLimit=0)),
    facet=alt.Facet('region:N', columns=3, header=alt.Header(
            title='Job creation and destruction by region',
            titleFontSize=16,
            labelFontSize=12
        )
        )
)


chart
chart.save('Charts/job_creation_destruction_region.png', scale_factor=2)
chart.save('Charts/job_creation_destruction_region.json')

In [6]:
whole_economy_dynamism.columns

Index(['year', 'n_firms', 'employment', 'n_entrants', 'n_exiters',
       'n_entry_and_exit', 'n_incumbents', 'jc_entrants', 'jc_incumbents',
       'jd_exiters', 'jd_incumbents', 'site_exp_entrants',
       'site_exp_incumbents', 'site_closure_exit', 'site_closure_incumbents',
       'total_employment_lagged', 'Entry rate', 'Exit rate',
       'Job creation rate', 'Job destruction rate', 'Entry job creation rate',
       'Incumbent job creation rate', 'Exit job destruction rate',
       'Incumbent job destruction rate'],
      dtype='object')

In [10]:
# SITE EXPANSION

# Write function with input df and group_by parameters
site_df = whole_economy_dynamism.melt(id_vars='year',
                                      value_vars=['site_exp_entrants','site_exp_incumbents','site_closure_incumbents','site_closure_exit'])


chart = alt.Chart(site_df).mark_bar().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 == 0 ? datum.label : ''",  # Show every 2nd year
            labelAngle=0)),
    y=alt.Y('value:Q'),
    color=alt.Color('variable:N')
)

chart

In [None]:
chart = alt.Chart(whole_economy_dynamism).mark_line()

In [None]:
# ZOOM IN ON 2020, 2021, 2022

In [None]:
def plot_entry_exit_rate(
    df: pd.DataFrame, 
    group_col: str, 
    metric_col: str = 'Entry rate', 
    independent_y: bool = False
) -> alt.Chart:
    """
    Creates a faceted line chart of entry or exit rates 
    over time, grouped by a specified column.

    Args:
        df: The pandas DataFrame. Must be in long format and contain 
            'year', the column specified in 'metric_col', 
            and the column specified in 'group_col'.
        group_col: The string name of the column to facet by 
                   (e.g., 'firm_size', 'firm_age').
        metric_col: The string name of the metric to plot on the y-axis
                    (e.g., 'Entry rate', 'Exit rate'). Default is 'Entry rate'.
        independent_y: If True, each facet will have its own y-axis scale.
                       If False (default), all facets share one y-axis.

    Returns:
        An Altair chart object.
    """
    
    # Clean up titles for the chart
    metric_title = metric_col.replace('_', ' ').title()
    group_title = group_col.replace('_', ' ').title()
    
    # Create the base chart
    chart = alt.Chart(df).mark_line().encode(
        x=alt.X('year:O', axis=alt.Axis(
                    labelExpr="datum.value % 2 == 0 ? datum.label : ''",  # Show every 2nd year
                labelAngle=0)),
        
        # Use the dynamic metric_col for the y-axis
        y=alt.Y(f'{metric_col}:Q', title=metric_title, axis=alt.Axis(format='%')),
        
        # Facet by the provided group_col
        facet=alt.Facet(f'{group_col}:O', columns=2, header=alt.Header(
            title=group_title
        )),

        # Add the dynamic metric_col to the tooltip
        tooltip=['year:O', f'{metric_col}:Q', f'{group_col}:N']

    ).properties(
        # Use the dynamic titles
        title=f'{metric_title} over Time by {group_title}'
    ).interactive()

    # Resolve the y-axis if specified
    if independent_y:
        chart = chart.resolve_scale(y='independent')

    return chart

In [None]:
plot_entry_exit_rate(firm_size_dynamism, 'emp_sizeband', 'Exit rate')
plot_entry_exit_rate(firm_size_dynamism, 'emp_sizeband', 'Exit rate')