In [3]:
# BSD validation against other sources

import pandas as pd
import geopandas as gpd
import numpy as np
import altair as alt
from pandas.api.types import CategoricalDtype
import os
import eco_style 
import requests
import json
import io
alt.themes.enable("light")

ThemeRegistry.enable('light')

In [4]:
# Import BSD statistics 
whole_economy_bsd = pd.read_excel('business_dynamism_BSD_1997_2023.xlsx', sheet_name='whole_economy')
firmsize_bsd = pd.read_excel('business_dynamism_BSD_1997_2023.xlsx', sheet_name='firm_size')

In [6]:
# Write function to calculate rates for dynamism measures, apply this across dataframes
def calculate_dynamism_rates(df, group_by_cols=None):
    # Make a copy to avoid modifying the original
    df = df.copy()
    
    # Sort data
    sort_cols = group_by_cols + ['year'] if group_by_cols else ['year']
    df = df.sort_values(sort_cols)
    
    # Create lagged employment (with or without grouping)
    if group_by_cols is None:
        df['total_employment_lagged'] = df['employment'].shift(1)
    else:
        df['total_employment_lagged'] = df.groupby(group_by_cols)['employment'].shift(1)
    
    # Calculate rates (same regardless of grouping)
    df['Entry rate'] = (df['n_entrants'] + df['n_entry_and_exit']) / df['n_firms']
    df['Exit rate'] = (df['n_exiters'] + df['n_entry_and_exit']) / df['n_firms']
    df['Job creation rate'] = (df['jc_incumbents'] + df['jc_entrants']) / df['total_employment_lagged']
    df['Job destruction rate'] = (df['jd_incumbents'] + df['jd_exiters']) / df['total_employment_lagged']
    df['Entry job creation rate'] = (df['jc_entrants']) / df['total_employment_lagged']
    df['Incumbent job creation rate'] = (df['jc_incumbents']) / df['total_employment_lagged']
    df['Exit job destruction rate'] = (df['jd_exiters']) / df['total_employment_lagged']
    df['Incumbent job destruction rate'] = (df['jd_incumbents']) / df['total_employment_lagged']


    # We can't use the first/last year for dynamic variables due to no backward/forward looking observatinons
    years = df['year'].unique()
    df = df[~df['year'].isin([years.min(), years.max()])]

    return df

# Apply function to dataframes
whole_economy_dynamism = calculate_dynamism_rates(whole_economy_bsd)
firmsize_dynamism = calculate_dynamism_rates(firmsize_bsd)

In [4]:
whole_economy_bsd

Unnamed: 0,year,n_firms,employment,n_entrants,n_exiters,n_entry_and_exit,n_incumbents,jc_entrants,jc_incumbents,jd_exiters,jd_incumbents,site_exp_entrants,site_exp_incumbents,site_closure_exit,site_closure_incumbents
0,1997,1781291,17612823,0,0,0,0,0,0,0,0,0,0,0,0
1,1998,1847011,17709729,234733,160782,48630,1402866,848394,1620884,1074088,1008743,11592,51295,52885,21672
2,1999,1873060,17936146,187154,212458,48307,1425141,718589,1291775,1287866,684953,10563,27226,58067,6184
3,2000,1857200,18029246,199710,176270,45195,1436025,739561,1501027,1117071,880223,7234,48872,49257,21806
4,2001,1877929,18416010,196912,197356,45282,1438379,908243,1599402,1090562,1047337,10241,82199,53575,17641
5,2002,1879029,18872566,200563,208480,43175,1426811,829886,2097098,1241772,1371331,9411,73550,55704,23526
6,2003,1879213,18804479,203886,214205,47953,1413169,812769,1508126,1253052,1166258,7846,69552,62201,25063
7,2004,1922606,18661685,247150,202611,58401,1414444,798028,1283703,1280367,989326,6544,93578,59600,16919
8,2005,1955539,18761138,237378,194044,56567,1467550,792623,1673070,1027365,1019602,7905,64746,56026,17555
9,2006,1991156,18906423,235783,187612,50445,1517316,718538,1506004,1047028,1038480,8198,61198,54176,18572


In [29]:
ONS_jc_jd = pd.read_excel('ONS_annualbusinessdynamism20012023.xlsx', sheet_name='Business Dynamism by transition', skiprows=7)
ONS_jc_jd

Unnamed: 0,year,Total job creation,Entrants,Incumbents,Total job destruction,Closing businesses,Incumbents.1,Net flow,Entrants and Closing,Incumbents.2,Total job reallocation,Entrants and Closing.1,Incumbents.3
0,2001,0.16222,0.046658,0.115561,0.144781,0.047534,0.097247,0.017439,-0.000876,0.018315,0.307001,0.094193,0.212808
1,2002,0.130082,0.030802,0.099279,0.127294,0.038375,0.088919,0.002788,-0.007572,0.01036,0.257376,0.069177,0.188199
2,2003,0.138038,0.059178,0.078861,0.120616,0.044062,0.076554,0.017422,0.015116,0.002306,0.258655,0.10324,0.155415
3,2004,0.142282,0.041762,0.10052,0.126663,0.040199,0.086464,0.015619,0.001563,0.014056,0.268945,0.081961,0.186984
4,2005,0.12958,0.040115,0.089465,0.122937,0.050145,0.072792,0.006643,-0.01003,0.016672,0.252517,0.09026,0.162257
5,2006,0.116602,0.037535,0.079068,0.115939,0.036946,0.078993,0.000663,0.000589,7.5e-05,0.232541,0.074481,0.158061
6,2007,0.118408,0.039486,0.078922,0.103669,0.032062,0.071607,0.01474,0.007424,0.007316,0.222077,0.071548,0.150529
7,2008,0.10749,0.033707,0.073783,0.107535,0.035832,0.071703,-4.4e-05,-0.002125,0.00208,0.215025,0.069538,0.145487
8,2009,0.102713,0.028983,0.07373,0.103555,0.035431,0.068124,-0.000842,-0.006448,0.005606,0.206269,0.064414,0.141855
9,2010,0.101202,0.035109,0.066093,0.117435,0.036743,0.080693,-0.016233,-0.001634,-0.0146,0.218637,0.071851,0.146786


In [30]:
# Import ONS dynamism statistics

ONS_entry_exit = pd.read_excel('ONS_annualbusinessdynamism20012023.xlsx',sheet_name='Entry Exit rates', skiprows=5)
ONS_entry_exit = ONS_entry_exit.rename(columns={'Entrants':'ONS entrants',
                                                'Closing businesses':'ONS closing businesses',
                                                'Total number of firms':'ONS total number of firms',
                                                'Entry rate':'ONS entry rate',
                                                'Exit rate':'ONS exit rate'})

ONS_jc_jd = pd.read_excel('ONS_annualbusinessdynamism20012023.xlsx', sheet_name='Business Dynamism by transition', skiprows=7)
ONS_jc_jd = ONS_jc_jd.rename(columns={'Total job creation':'ONS Job creation rate',
                                      'Entrants':'ONS Entry job creation rate',
                                      'Incumbents':'ONS Incumbent job creation rate',
                                      'Total job destruction':'ONS Job destruction rate',
                                      'Closing businesses':'ONS Exit job destruction rate',
                                      'Incumbents.1':'ONS Incumbent job destruction rate'})
ONS_jc_jd = ONS_jc_jd[['year','ONS Job creation rate','ONS Entry job creation rate','ONS Incumbent job creation rate',
                       'ONS Job destruction rate','ONS Exit job destruction rate','ONS Incumbent job destruction rate']]

ONS_jc_jd

Unnamed: 0,year,ONS Job creation rate,ONS Entry job creation rate,ONS Incumbent job creation rate,ONS Job destruction rate,ONS Exit job destruction rate,ONS Incumbent job destruction rate
0,2001,0.16222,0.046658,0.115561,0.144781,0.047534,0.097247
1,2002,0.130082,0.030802,0.099279,0.127294,0.038375,0.088919
2,2003,0.138038,0.059178,0.078861,0.120616,0.044062,0.076554
3,2004,0.142282,0.041762,0.10052,0.126663,0.040199,0.086464
4,2005,0.12958,0.040115,0.089465,0.122937,0.050145,0.072792
5,2006,0.116602,0.037535,0.079068,0.115939,0.036946,0.078993
6,2007,0.118408,0.039486,0.078922,0.103669,0.032062,0.071607
7,2008,0.10749,0.033707,0.073783,0.107535,0.035832,0.071703
8,2009,0.102713,0.028983,0.07373,0.103555,0.035431,0.068124
9,2010,0.101202,0.035109,0.066093,0.117435,0.036743,0.080693


In [None]:
# Import Workforce Jobs from NOMIS API?
#https://www.nomisweb.co.uk/api/v01/dataset/NM_130_1.data.csv?geography=2092957697&date=latestMINUS113,latestMINUS109,latestMINUS105,latestMINUS101,latestMINUS97,latestMINUS93,latestMINUS89,latestMINUS85,latestMINUS81,latestMINUS77,latestMINUS73,latestMINUS69,latestMINUS65,latestMINUS61,latestMINUS57,latestMINUS53,latestMINUS49,latestMINUS45,latestMINUS41,latestMINUS37,latestMINUS33,latestMINUS29,latestMINUS25,latestMINUS21,latestMINUS17,latestMINUS13,latestMINUS9,latestMINUS5,latestMINUS1&industry=150994945...150994954,150994957,150994958,150994962...150994964&item=2&measures=20100


base_url = "https://www.nomisweb.co.uk/api/v01/dataset/NM_130_1.data.csv"

GEO_LIST_STR = "1778384897...1778384901,1778384941,1778384950,1778385143...1778385146,1778385159,1778384902...1778384905,1778384942,1778384943,1778384956,1778384957,1778385033...1778385044,1778385124...1778385138,1778384906...1778384910,1778384958,1778385139...1778385142,1778385154...1778385158,1778384911...1778384914,1778384954,1778384955,1778384965...1778384972,1778385045...1778385058,1778385066...1778385072,1778384915...1778384917,1778384944,1778385078...1778385085,1778385100...1778385104,1778385112...1778385117,1778385147...1778385153,1778384925...1778384928,1778384948,1778384949,1778384960...1778384964,1778384986...1778384997,1778385015...1778385020,1778385059...1778385065,1778385086...1778385088,1778385118...1778385123,1778385160...1778385192,1778384929...1778384940,1778384953,1778384981...1778384985,1778385004...1778385014,1778385021...1778385032,1778385073...1778385077,1778385089...1778385099,1778385105...1778385111,1778384918...1778384924,1778384945...1778384947,1778384951,1778384952,1778384973...1778384980,1778384998...1778385003,1778384959,1778385193...1778385246"
DATE_STR = "latestMINUS113,latestMINUS109,latestMINUS105,latestMINUS101,latestMINUS97,latestMINUS93,latestMINUS89,latestMINUS85,latestMINUS81,latestMINUS77,latestMINUS73,latestMINUS69,latestMINUS65,latestMINUS61,latestMINUS57,latestMINUS53,latestMINUS49,latestMINUS45,latestMINUS41,latestMINUS37,latestMINUS33,latestMINUS29,latestMINUS25,latestMINUS21,latestMINUS17,latestMINUS13,latestMINUS9,latestMINUS5,latestMINUS1"
INDUSTRY_STR = "150994945...150994954,150994957,150994958,150994962...150994964"
params = {
        'geography': '2092957697',
        'date': DATE_STR,
        'industry':INDUSTRY_STR,
        'item':'2',
        'measures': '20100'
    #    'select': 'GEOGRAPHY_CODE, GEOGRAPHY_NAME, OBS_VALUE'
    }

response = requests.get(base_url, params=params)

        # Raise an error if the API returns 4xx or 5xx status code
response.raise_for_status()

        # Use io.StringIO to read the text content into a dataframe
chunk_df = pd.read_csv(io.StringIO(response.text))

        # Save the chunk to a file
chunk_df.to_csv('workforce_jobs_market_sector_nomis.csv', index=False)


Unnamed: 0,DATE,INDUSTRY_NAME,OBS_VALUE
0,1997-03,"A : Agriculture, forestry and fishing",273077
1,1997-03,B : Mining and quarrying,74820
2,1997-03,C : Manufacturing,4004297
3,1997-03,"D : Electricity, gas, steam and air conditioni...",105268
4,1997-03,"E : Water supply; sewerage, waste management a...",154201
...,...,...,...
430,2025-03,"M : Professional, scientific and technical act...",2976505
431,2025-03,N : Administrative and support service activities,2665280
432,2025-03,"R : Arts, entertainment and recreation",854937
433,2025-03,S : Other service activities,639042


In [33]:
# Aggregate to annual employment
annual_workforce_jobs = chunk_df.groupby('DATE', as_index=False).sum('OBS_VALUE')

annual_workforce_jobs = annual_workforce_jobs.rename(columns={'OBS_VALUE':'Workforce Jobs Employment'})
annual_workforce_jobs['year'] = pd.to_datetime(annual_workforce_jobs['DATE']).dt.year
annual_workforce_jobs = annual_workforce_jobs[['year','Workforce Jobs Employment']]

annual_workforce_jobs

# Join on to BSD whole economy df
whole_economy_bsd = whole_economy_bsd.merge(annual_workforce_jobs, on='year')
whole_economy_bsd



Unnamed: 0,year,n_firms,employment,n_entrants,n_exiters,n_entry_and_exit,n_incumbents,jc_entrants,jc_incumbents,jd_exiters,jd_incumbents,site_exp_entrants,site_exp_incumbents,site_closure_exit,site_closure_incumbents,WORKFORCE_JOBS_x,WORKFORCE_JOBS_y,WORKFORCE_JOBS,Workforce Jobs Employment
0,1997,1781291,17612823,0,0,0,0,0,0,0,0,0,0,0,0,17447898,17447898,17447898,17447898
1,1998,1847011,17709729,234733,160782,48630,1402866,848394,1620884,1074088,1008743,11592,51295,52885,21672,18046111,18046111,18046111,18046111
2,1999,1873060,17936146,187154,212458,48307,1425141,718589,1291775,1287866,684953,10563,27226,58067,6184,18228149,18228149,18228149,18228149
3,2000,1857200,18029246,199710,176270,45195,1436025,739561,1501027,1117071,880223,7234,48872,49257,21806,18487816,18487816,18487816,18487816
4,2001,1877929,18416010,196912,197356,45282,1438379,908243,1599402,1090562,1047337,10241,82199,53575,17641,18759162,18759162,18759162,18759162
5,2002,1879029,18872566,200563,208480,43175,1426811,829886,2097098,1241772,1371331,9411,73550,55704,23526,18861631,18861631,18861631,18861631
6,2003,1879213,18804479,203886,214205,47953,1413169,812769,1508126,1253052,1166258,7846,69552,62201,25063,18681187,18681187,18681187,18681187
7,2004,1922606,18661685,247150,202611,58401,1414444,798028,1283703,1280367,989326,6544,93578,59600,16919,18749241,18749241,18749241,18749241
8,2005,1955539,18761138,237378,194044,56567,1467550,792623,1673070,1027365,1019602,7905,64746,56026,17555,18949420,18949420,18949420,18949420
9,2006,1991156,18906423,235783,187612,50445,1517316,718538,1506004,1047028,1038480,8198,61198,54176,18572,19030742,19030742,19030742,19030742


In [43]:
# Chart BSD vs WFJ employment
whole_economy_bsd = whole_economy_bsd.rename(columns={'employment':'BSD Employment'})

BSD_vs_WFJ = whole_economy_bsd.melt(id_vars = 'year',
                                    value_vars=['BSD Employment','Workforce Jobs Employment'],
                                    var_name='source',
                                    value_name='employment')

chart = alt.Chart(BSD_vs_WFJ).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
        labelExpr="datum.value % 2 == 0 ? datum.label : ''",
        labelAngle=0)),
    y=alt.Y('employment:Q', 
            title='BSD vs Workforce Jobs Employment', 
            scale=alt.Scale(domainMin=17000000),
            axis=alt.Axis(format=".2s")),
    color=alt.Color('source', title=None)
)

chart.save('Charts/BSD_vs_workforcejobs.png', scale_factor=2)
chart.save('Charts/BSD_vs_workforcejobs.json')

In [62]:
# Import UK Business Counts from NOMIS API

#https://www.nomisweb.co.uk/api/v01/dataset/NM_142_1.data.csv?geography=2092957697&industry=150994945...150994954,150994957,150994958,150994962...150994965&employment_sizeband=0&legal_status=0&measures=20100
base_url = "https://www.nomisweb.co.uk/api/v01/dataset/NM_142_1.data.csv"

GEO_LIST_STR = "1778384897...1778384901,1778384941,1778384950,1778385143...1778385146,1778385159,1778384902...1778384905,1778384942,1778384943,1778384956,1778384957,1778385033...1778385044,1778385124...1778385138,1778384906...1778384910,1778384958,1778385139...1778385142,1778385154...1778385158,1778384911...1778384914,1778384954,1778384955,1778384965...1778384972,1778385045...1778385058,1778385066...1778385072,1778384915...1778384917,1778384944,1778385078...1778385085,1778385100...1778385104,1778385112...1778385117,1778385147...1778385153,1778384925...1778384928,1778384948,1778384949,1778384960...1778384964,1778384986...1778384997,1778385015...1778385020,1778385059...1778385065,1778385086...1778385088,1778385118...1778385123,1778385160...1778385192,1778384929...1778384940,1778384953,1778384981...1778384985,1778385004...1778385014,1778385021...1778385032,1778385073...1778385077,1778385089...1778385099,1778385105...1778385111,1778384918...1778384924,1778384945...1778384947,1778384951,1778384952,1778384973...1778384980,1778384998...1778385003,1778384959,1778385193...1778385246"

# Select industries excluding K, L, O, P and Q
INDUSTRY_STR = "150994945...150994954,150994957,150994958,150994962...150994965"

params = {
        'geography': '2092957697',
        'industry':INDUSTRY_STR,
        'employment_sizeband':'0',
        'legal_status':'0',
        'measures': '20100'
    #    'select': 'GEOGRAPHY_CODE, GEOGRAPHY_NAME, OBS_VALUE'
    }

response = requests.get(base_url, params=params)

        # Raise an error if the API returns 4xx or 5xx status code
response.raise_for_status()

        # Use io.StringIO to read the text content into a dataframe
chunk_df = pd.read_csv(io.StringIO(response.text))

        # Save the chunk to a file
chunk_df.to_csv('uk_business_counts.csv', index=False)

chunk_df

Unnamed: 0,DATE,DATE_NAME,DATE_CODE,DATE_TYPE,DATE_TYPECODE,DATE_SORTORDER,GEOGRAPHY,GEOGRAPHY_NAME,GEOGRAPHY_CODE,GEOGRAPHY_TYPE,...,MEASURES,MEASURES_NAME,OBS_VALUE,OBS_STATUS,OBS_STATUS_NAME,OBS_CONF,OBS_CONF_NAME,URN,RECORD_OFFSET,RECORD_COUNT
0,2010,2010,2010,date,0,0,2092957697,United Kingdom,K02000001,countries,...,20100,Value,137135,A,Normal Value,F,Free (free for publication),Nm-142d1d32160e0d2092957697d150994945d0d0d20100,0,256
1,2010,2010,2010,date,0,0,2092957697,United Kingdom,K02000001,countries,...,20100,Value,1195,A,Normal Value,F,Free (free for publication),Nm-142d1d32160e0d2092957697d150994946d0d0d20100,1,256
2,2010,2010,2010,date,0,0,2092957697,United Kingdom,K02000001,countries,...,20100,Value,127370,A,Normal Value,F,Free (free for publication),Nm-142d1d32160e0d2092957697d150994947d0d0d20100,2,256
3,2010,2010,2010,date,0,0,2092957697,United Kingdom,K02000001,countries,...,20100,Value,550,A,Normal Value,F,Free (free for publication),Nm-142d1d32160e0d2092957697d150994948d0d0d20100,3,256
4,2010,2010,2010,date,0,0,2092957697,United Kingdom,K02000001,countries,...,20100,Value,5865,A,Normal Value,F,Free (free for publication),Nm-142d1d32160e0d2092957697d150994949d0d0d20100,4,256
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
251,2025,2025,2025,date,0,15,2092957697,United Kingdom,K02000001,countries,...,20100,Value,222055,A,Normal Value,F,Free (free for publication),Nm-142d1d32400e0d2092957697d150994958d0d0d20100,251,256
252,2025,2025,2025,date,0,15,2092957697,United Kingdom,K02000001,countries,...,20100,Value,75855,A,Normal Value,F,Free (free for publication),Nm-142d1d32400e0d2092957697d150994962d0d0d20100,252,256
253,2025,2025,2025,date,0,15,2092957697,United Kingdom,K02000001,countries,...,20100,Value,116095,A,Normal Value,F,Free (free for publication),Nm-142d1d32400e0d2092957697d150994963d0d0d20100,253,256
254,2025,2025,2025,date,0,15,2092957697,United Kingdom,K02000001,countries,...,20100,Value,0,A,Normal Value,F,Free (free for publication),Nm-142d1d32400e0d2092957697d150994964d0d0d20100,254,256


In [64]:
# Aggregate to annual firm counts
annual_uk_business_counts = chunk_df.groupby('DATE', as_index=False).sum('OBS_VALUE')

annual_uk_business_counts = annual_uk_business_counts.rename(columns={'OBS_VALUE':'UK Business Counts',
                                                                      'DATE':'year'})
annual_uk_business_counts = annual_uk_business_counts[['year','UK Business Counts']]

annual_uk_business_counts

# Join on to BSD whole economy df
whole_economy_bsd = whole_economy_bsd.merge(annual_uk_business_counts, on='year',how='left')
whole_economy_bsd

Unnamed: 0,year,n_firms,employment,n_entrants,n_exiters,n_entry_and_exit,n_incumbents,jc_entrants,jc_incumbents,jd_exiters,jd_incumbents,site_exp_entrants,site_exp_incumbents,site_closure_exit,site_closure_incumbents,UK Business Counts
0,1997,1781291,17612823,0,0,0,0,0,0,0,0,0,0,0,0,
1,1998,1847011,17709729,234733,160782,48630,1402866,848394,1620884,1074088,1008743,11592,51295,52885,21672,
2,1999,1873060,17936146,187154,212458,48307,1425141,718589,1291775,1287866,684953,10563,27226,58067,6184,
3,2000,1857200,18029246,199710,176270,45195,1436025,739561,1501027,1117071,880223,7234,48872,49257,21806,
4,2001,1877929,18416010,196912,197356,45282,1438379,908243,1599402,1090562,1047337,10241,82199,53575,17641,
5,2002,1879029,18872566,200563,208480,43175,1426811,829886,2097098,1241772,1371331,9411,73550,55704,23526,
6,2003,1879213,18804479,203886,214205,47953,1413169,812769,1508126,1253052,1166258,7846,69552,62201,25063,
7,2004,1922606,18661685,247150,202611,58401,1414444,798028,1283703,1280367,989326,6544,93578,59600,16919,
8,2005,1955539,18761138,237378,194044,56567,1467550,792623,1673070,1027365,1019602,7905,64746,56026,17555,
9,2006,1991156,18906423,235783,187612,50445,1517316,718538,1506004,1047028,1038480,8198,61198,54176,18572,


In [73]:
# Chart BSD firm count vs UK bsuiness count 
whole_economy_bsd = whole_economy_bsd.rename(columns={'n_firms':'BSD Firm Count'})

BSD_vs_UK_businesscounts = whole_economy_bsd.melt(id_vars = 'year',
                                    value_vars=['BSD Firm Count','UK Business Counts'],
                                    var_name='Source',
                                    value_name='Firm Count')

chart = alt.Chart(BSD_vs_UK_businesscounts).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
        labelExpr="datum.value % 2 == 0 ? datum.label : ''",
        labelAngle=0)),
    y=alt.Y('Firm Count:Q', 
            title='BSD Firm Count vs UK Business Count', 
            scale=alt.Scale(domainMin=1750000),
            axis=alt.Axis(format=".2s")),
    color=alt.Color('Source', title=None)
)

chart
chart.save('Charts/BSD_vs_UKbusinesscounts.png', scale_factor=2)
chart.save('Charts/BSD_vs_UKbusinesscounts.json')

In [None]:
# Join ONS entry and exit figures to BSD table

entry_exit = whole_economy_dynamism.merge(ONS_entry_exit, on='year',how='left')
entry_exit = entry_exit[['year','Entry rate','Exit rate','ONS entry rate','ONS exit rate']]
entry_exit = entry_exit.rename(columns={'Entry rate':'BSD entry rate',
                                        'Exit rate':'BSD exit rate'})

entry_exit

entry_exit_long = entry_exit.melt(id_vars='year',value_vars=['BSD entry rate','BSD exit rate','ONS entry rate','ONS exit rate'], 
                                  value_name='value')


In [109]:
#Get the max year to position the labels at the end
max_year = int(entry_exit_long['year'].max())

#Define the Line Chart
line_base = alt.Chart(entry_exit_long).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
            labelExpr="datum.value % 2 == 0 ? datum.label : ''",
            labelAngle=0)),
    y=alt.Y('value:Q', axis=alt.Axis(format='%'), title='Rate'),
    # Set legend to None to hide the side box
    color=alt.Color('variable:N', legend=None) 
)

# Define the Text Labels (Filtered to just the last year)
label_base = alt.Chart(entry_exit_long).mark_text(align='left', dx=5).encode(
    x=alt.X('year:O'),
    y=alt.Y('value:Q'),
    text=alt.Text('variable:N'), # Use the variable name as the label
    color=alt.Color('variable:N', legend=None)
).transform_filter(
    alt.datum.year == max_year
)

# Layer them together for "Entry"
entry_filter = alt.FieldOneOfPredicate(field='variable', oneOf=['BSD entry rate', 'ONS entry rate'])
entry_chart = (
    line_base.transform_filter(entry_filter) + 
    label_base.transform_filter(entry_filter)
).properties(title='Entry Rates')

#  Layer them together for "Exit"
exit_filter = alt.FieldOneOfPredicate(field='variable', oneOf=['BSD exit rate', 'ONS exit rate'])
exit_chart = (
    line_base.transform_filter(exit_filter) + 
    label_base.transform_filter(exit_filter)
).properties(title='Exit Rates')


chart = entry_chart | exit_chart

chart.save('Charts/BSD_vs_ONS_entryexit.png', scale_factor=2)
chart.save('Charts/BSD_vs_ONS_entryexit.json')

In [23]:
jc_jd = whole_economy_dynamism.merge(ONS_jc_jd, on='year',how='left')
jc_jd.columns

Index(['year', 'n_firms', 'employment', 'n_entrants', 'n_exiters',
       'n_entry_and_exit', 'n_incumbents', 'jc_entrants', 'jc_incumbents',
       'jd_exiters', 'jd_incumbents', 'site_exp_entrants',
       'site_exp_incumbents', 'site_closure_exit', 'site_closure_incumbents',
       'total_employment_lagged', 'Entry rate', 'Exit rate',
       'Job creation rate', 'Job destruction rate', 'Entry job creation rate',
       'Incumbent job creation rate', 'Exit job destruction rate',
       'Incumbent job destruction rate', 'ONS Job creation rate',
       'ONS Entry job creation rate', 'ONS Incumbent job creation rate',
       'ONS Job destruction rate', 'ONS Exit job destruction rate',
       'ONS Incumbent job destruction rate'],
      dtype='object')

In [31]:
# Comparing job creation/destruction rates with ONS dynamism
jc_jd = whole_economy_dynamism.merge(ONS_jc_jd, on='year',how='left')


jc_jd = jc_jd[['year','Job creation rate','Job destruction rate',
               'Entry job creation rate','Incumbent job creation rate','Exit job destruction rate','Incumbent job destruction rate',
               'ONS Job creation rate','ONS Job destruction rate',
               'ONS Entry job creation rate','ONS Incumbent job creation rate','ONS Exit job destruction rate','ONS Incumbent job destruction rate']]
jc_jd = jc_jd.rename(columns={'Job creation rate':'BSD Job creation rate',
                              'Job destruction rate':'BSD Job destruction rate',
                              'Entry job creation rate':'BSD Entry job creation rate',
                              'Incumbent job creation rate':'BSD Incumbent job creation rate',
                              'Exit job destruction rate':'BSD Exit job destruction rate',
                              'Incumbent job destruction rate':'BSD Incumbent job destruction rate'})

jc_jd_long = jc_jd.melt(id_vars='year',value_vars=['BSD Job creation rate','BSD Job destruction rate',
                                                   'ONS Job creation rate','ONS Job destruction rate',
                                                   'BSD Entry job creation rate','BSD Incumbent job creation rate',
                                                   'BSD Exit job destruction rate','BSD Incumbent job destruction rate',
                                                   'ONS Entry job creation rate','ONS Incumbent job creation rate',
                                                   'ONS Exit job destruction rate','ONS Incumbent job destruction rate'], 
                                  value_name='value')

jc_jd_long


Unnamed: 0,year,variable,value
0,1998,BSD Job creation rate,0.140198
1,1999,BSD Job creation rate,0.113517
2,2000,BSD Job creation rate,0.124920
3,2001,BSD Job creation rate,0.139088
4,2002,BSD Job creation rate,0.158937
...,...,...,...
295,2018,ONS Incumbent job destruction rate,0.072877
296,2019,ONS Incumbent job destruction rate,0.065016
297,2020,ONS Incumbent job destruction rate,0.061537
298,2021,ONS Incumbent job destruction rate,0.064101


In [34]:
# First plot just total job creation and destruction rates

max_year = int(jc_jd_long['year'].max())

#Define the Line Chart
line_base = alt.Chart(jc_jd_long).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
            labelExpr="datum.value % 2 == 0 ? datum.label : ''",
            labelAngle=0)),
    y=alt.Y('value:Q', axis=alt.Axis(format='%'), title='Rate'),
    # Set legend to None to hide the side box
    color=alt.Color('variable:N', legend=None) 
)

# Define the Text Labels (Filtered to just the last year)
label_base = alt.Chart(jc_jd_long).mark_text(align='left', dx=5).encode(
    x=alt.X('year:O'),
    y=alt.Y('value:Q'),
    text=alt.Text('variable:N'), # Use the variable name as the label
    color=alt.Color('variable:N', legend=None)
).transform_filter(
    alt.datum.year == max_year
)

# Layer them together for "Job creation"
job_creation_filter = alt.FieldOneOfPredicate(field='variable', oneOf=['BSD Job creation rate','ONS Job creation rate'])
job_creation_chart = (
    line_base.transform_filter(job_creation_filter) + 
    label_base.transform_filter(job_creation_filter)
).properties(title='Job creation rates')

#  Layer them together for "Job destruction"
job_destruction_filter = alt.FieldOneOfPredicate(field='variable', oneOf=['BSD Job destruction rate','ONS Job destruction rate'])
job_destruction_chart = (
    line_base.transform_filter(job_destruction_filter) + 
    label_base.transform_filter(job_destruction_filter)
).properties(title='Job destruction rates')


chart = job_creation_chart | job_destruction_chart


chart
chart.save('Charts/BSD_vs_ONS_jobcreation_destruction.png', scale_factor=2)
chart.save('Charts/BSD_vs_ONS_jobcreation_destruction.json')

In [45]:

#Define the Line Chart
line_base = alt.Chart(jc_jd_long).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
            labelExpr="datum.value % 2 == 0 ? datum.label : ''",
            labelAngle=0)),
    y=alt.Y('value:Q', axis=alt.Axis(format='%'), title='Rate'),
    # Set legend to None to hide the side box
    color=alt.Color('variable:N', legend=None) 
)


# Add end labels
end_point = jc_jd_long.groupby(
    ['variable']
)['year'].idxmax()

end_point_data = jc_jd_long.loc[end_point]

# Shift labels up slightly to avoid overlap
end_point_data.loc[end_point_data['variable'] == 'ONS Entry job creation rate', 'value'] += 0.002
end_point_data.loc[end_point_data['variable'] == 'BSD Entry job creation rate', 'value'] -= 0.002


label_base = alt.Chart(end_point_data).mark_text(
    align='left',     
    dx=5,           
    baseline='middle'
).encode(
    x=alt.X('year:O'),         
    y=alt.Y('value:Q', axis=alt.Axis(format='%')),   
    text='variable:N',
    color=alt.Color('variable:N')
)

# Layer them together for "Job creation"
job_creation_filter = alt.FieldOneOfPredicate(field='variable', oneOf=['BSD Entry job creation rate', 
                                                                'BSD Incumbent job creation rate',
                                                                'ONS Entry job creation rate',
                                                                'ONS Incumbent job creation rate'])
job_creation_chart = (
    line_base.transform_filter(job_creation_filter) + 
    label_base.transform_filter(job_creation_filter)
).properties(title='Job creation rates')

#  Layer them together for "Job destruction"
job_destruction_filter = alt.FieldOneOfPredicate(field='variable', oneOf=['BSD Exit job destruction rate', 
                                                               'BSD Incumbent job destruction rate',
                                                               'ONS Exit job destruction rate',
                                                               'ONS Incumbent job destruction rate'])
job_destruction_chart = (
    line_base.transform_filter(job_destruction_filter) + 
    label_base.transform_filter(job_destruction_filter)
).properties(title='Job destruction rates')


chart = job_creation_chart | job_destruction_chart


chart
chart.save('Charts/BSD_vs_ONS_jc_jd_intensive_extensive.png', scale_factor=2)
chart.save('Charts/BSD_vs_ONS_jc_jd_intensive_extensive.json')


FIRM SIZES

In [None]:
# Compare exit rates for firm sizes between BSD and ONS

