## **Final publication charts for Britain's Disappearing Dynamism**

In [8]:
# Import packages and set filepaths
import pandas as pd
import numpy as np
import altair as alt
from pathlib import Path
from pandas.api.types import CategoricalDtype
import os
import eco_style 
alt.themes.enable("report")

script_dir = Path.cwd()
import_path = script_dir.parent / "Data"
chart_path = script_dir.parent / "Charts"

In [9]:
# Load data
population_df = pd.read_excel(import_path / 'BSD/29_02_2026_BSD_Dynamism_Stats.xlsx', sheet_name='population')
firm_dynamics_df =  pd.read_excel(import_path / 'BSD/29_02_2026_BSD_Dynamism_Stats.xlsx', sheet_name='firm_dynamics')
job_flows_df =  pd.read_excel(import_path / 'BSD/29_02_2026_BSD_Dynamism_Stats.xlsx', sheet_name='job_flows')
site_dynamics_df =  pd.read_excel(import_path / 'BSD/29_02_2026_BSD_Dynamism_Stats.xlsx', sheet_name='site_dynamics')
cohort_df =  pd.read_excel(import_path / 'BSD/29_02_2026_BSD_Dynamism_Stats.xlsx', sheet_name='cohort_analysis')
growth_rates_df =  pd.read_excel(import_path / 'BSD/29_02_2026_BSD_Dynamism_Stats.xlsx', sheet_name='growth_rates')
growth_cats_df =  pd.read_excel(import_path / 'BSD/29_02_2026_BSD_Dynamism_Stats.xlsx', sheet_name='growth_cats')
prod_df =  pd.read_excel(import_path / 'BSD/29_02_2026_BSD_Dynamism_Stats.xlsx', sheet_name='prod')


#### **Chart 1 - The UK's evaporating productivity growth**

Output per hour worked between 1973 - 2025. This has been rebased to the four quarters average in 2008.

Data source: https://www.ons.gov.uk/economy/economicoutputandproductivity/productivitymeasures/datasets/outputperhourworkeduk

In [None]:
import pandas as pd
import altair as alt
import numpy as np

# Load and prepare data
df = pd.read_csv('output_per_hour_worked_ons_dec25.csv', skiprows=6)
df = df.rename(columns={df.columns[0]: 'year', df.columns[1]: 'output_per_hour_worked'})

# Convert "1971 Q1" format to datetime
def quarter_to_date(quarter_str):
    """Convert '1971 Q1' to '1971-01-01' (start of quarter)"""
    year, quarter = quarter_str.split()
    quarter_num = int(quarter[1])  # Extract number from 'Q1', 'Q2', etc.
    
    # Map quarter to month (start of each quarter)
    month_map = {1: 1, 2: 4, 3: 7, 4: 10}
    month = month_map[quarter_num]
    
    return pd.Timestamp(year=int(year), month=month, day=1)

df['date'] = df['year'].apply(quarter_to_date)

# Rebase to 2008 = 100
value_2008 = df[df['date'].dt.year == 2008]['output_per_hour_worked'].mean()
print(f"2008 base value: {value_2008:.2f}")

df['value'] = (df['output_per_hour_worked'] / value_2008) * 100

print(f"Data range: {df['date'].min().date()} to {df['date'].max().date()}")

# Sort data by date to ensure smooth lines
df = df.sort_values('date')

# Calculate pre-recession trend
recession_start = pd.to_datetime('2008-01-01')
pre_recession = df[df['date'] < recession_start].copy()

# Use the average value at 2008 as the baseline
value_2008_actual = df[df['date'].dt.year == 2008]['value'].mean()

# Fit trend to pre-2008 data
pre_recession['time'] = (pre_recession['date'] - pre_recession['date'].min()).dt.days
z = np.polyfit(pre_recession['time'], pre_recession['value'], 1)

# Calculate what the trend value would be at 2008
time_at_2008 = (recession_start - pre_recession['date'].min()).days
trend_value_at_2008 = np.polyval(z, time_at_2008)

# Adjust the trend to pass through the actual 2008 value
# Shift the intercept so trend matches actual at 2008
adjustment = value_2008_actual - trend_value_at_2008
z_adjusted = [z[0], z[1] + adjustment]

# Extend adjusted trend to all dates
df['time'] = (df['date'] - pre_recession['date'].min()).dt.days
df['trend'] = np.polyval(z_adjusted, df['time'])

# Create base chart
base = alt.Chart(df).encode(
    x=alt.X('year(date):T', 
            title='', 
            axis=alt.Axis(
                grid=True, 
                gridDash=[2, 2], 
                gridOpacity=0.4,
                tickCount=10
            ))
)

# Actual productivity line (blue)
actual_line = alt.Chart(df).mark_line(
    color='#5B9BD5',
    strokeWidth=2.5
).encode(
    x=alt.X('date:T', 
            title='', 
            axis=alt.Axis(
                grid=True, 
                gridDash=[2, 2], 
                gridOpacity=0.4,
                tickCount=10,
                format='%Y'
            )),
    y=alt.Y('value:Q', 
            title='', 
            scale=alt.Scale(domain=[0, 160]),
            axis=alt.Axis(
                grid=True, 
                gridDash=[2, 2], 
                gridOpacity=0.4
            ))
)

# Pre-recession trend line (red dashed) - show for entire period
trend_line = alt.Chart(df).mark_line(
    color='#C94545',
    strokeDash=[8, 4],
    strokeWidth=2.5,
    interpolate='natural'
).encode(
    x=alt.X('date:T', title='', axis=alt.Axis(format='%Y')),
    y='trend:Q'
)

# Vertical line at recession start
recession_line = alt.Chart(pd.DataFrame({
    'x': [recession_start]
})).mark_rule(
    strokeDash=[6, 4],
    strokeWidth=1.5,
    color='#808080'
).encode(
    x=alt.X('x:T', title='')
)

# Label: "Pre-recession trend"
trend_label_date = pd.to_datetime('2016-01-01')
trend_value_at_label = df[df['date'] == trend_label_date]['trend'].values[0] if len(df[df['date'] == trend_label_date]) > 0 else 115

trend_label = alt.Chart(pd.DataFrame({
    'x': [trend_label_date],
    'y': [trend_value_at_label + 14],  # Position above the trend line
    'text': ['Pre-recession trend']
})).mark_text(
    align='center',
    color='#C94545',
    fontSize=12
).encode(
    x=alt.X('x:T', title=''),
    y='y:Q',
    text='text:N'
)

# Label: "Productivity gap" 
latest_date = df['date'].max()
latest_actual = df[df['date'] == latest_date]['value'].values[0]
latest_trend = df[df['date'] == latest_date]['trend'].values[0]

gap_label = alt.Chart(pd.DataFrame({
    'x': [latest_date + pd.DateOffset(years=1)],
    'y': [latest_trend - 8],
    'text': ['Productivity gap']
})).mark_text(
    align='left',
    color='#5B9BD5',
    fontSize=12
).encode(
    x=alt.X('x:T', title=''),
    y='y:Q',
    text='text:N'
)

# Line showing the gap with arrows
gap_line = alt.Chart(pd.DataFrame({
    'x': [latest_date],
    'y': [latest_actual],
    'y2': [latest_trend]
})).mark_rule(
    color='#5B9BD5',
    strokeWidth=2,
    strokeDash=[3, 3]
).encode(
    x=alt.X('x:T', title=''),
    y='y:Q',
    y2='y2:Q'
)

# Arrow markers using triangle shapes
# Top arrow (pointing down at trend line)
gap_arrow_top = alt.Chart(pd.DataFrame({
    'x': [latest_date],
    'y': [latest_trend - 1]  # Slightly below the trend line
})).mark_point(
    shape='triangle-up',
    filled=True,
    color='#5B9BD5',
    size=100
).encode(
    x=alt.X('x:T', title='', axis=alt.Axis(format='%Y')),
    y='y:Q'
)

# Bottom arrow (pointing up at actual line)  
gap_arrow_bottom = alt.Chart(pd.DataFrame({
    'x': [latest_date],
    'y': [latest_actual + 1]  # Slightly above the actual line
})).mark_point(
    shape='triangle-down',
    filled=True,
    color='#5B9BD5',
    size=100
).encode(
    x=alt.X('x:T', title='', axis=alt.Axis(format='%Y')),
    y='y:Q'
)

# Combine all layers
chart = (
    recession_line + 
    actual_line + 
    trend_line + 
    trend_label + 
    gap_label +
    gap_line +
    gap_arrow_top +
    gap_arrow_bottom
).properties(
    width=600,
    height=400

).configure_view(
    strokeWidth=0,
    stroke=None
).configure_axis(
    gridColor='#D3D3D3',
    domainColor='#000000',
    tickColor='#000000'
)

# Save and display
chart.save('Paper charts/productivity_chart.png', scale_factor=2)
chart.save('Paper charts/productivity_chart.json')
print("✓ Chart saved as 'productivity_chart.html'")
print(f"✓ Latest productivity (2008=100): {latest_actual:.1f}")
print(f"✓ Trend projection: {latest_trend:.1f}")
print(f"✓ Productivity gap: {latest_trend - latest_actual:.1f} points")

chart

## **Chart 2 - BSD basic facts**

In [None]:
# BSD facts - how has the total number of firms, employment and turnover changed over time?

total_population_df = population_df[population_df['dimension']=='Total']

n_firm_chart = alt.Chart(total_population_df.assign(n_firms_m=total_population_df['n_firms'] / 1e6)).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 != 0 ? datum.label : ''",
            labelAngle=0)),
    y=alt.Y('n_firms_m:Q', title='Total number of firms in BSD', scale=alt.Scale(domainMin=1.5, domainMax=2.5), axis=alt.Axis(format=".1f", labelExpr="datum.value + 'm'"))
)

emp_chart = alt.Chart(total_population_df.assign(employment_m=total_population_df['employment'] / 1e6)).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 != 0 ? datum.label : ''",
            labelAngle=0)),
    y=alt.Y('employment_m:Q', title='Total employment in BSD', scale=alt.Scale(domainMin=15, domainMax=24), axis=alt.Axis(format=".0f", labelExpr="datum.value + 'm'"))
)

turnover_chart = alt.Chart(total_population_df.assign(turnover_bn=total_population_df['turnover'] / 1e9)).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 != 0 ? datum.label : ''",  # Show every 2nd year
            labelAngle=0)),
    y=alt.Y('turnover_bn:Q',title='Total turnover in BSD', axis=alt.Axis(format=".0f", labelExpr="'£' + datum.value + 'tn'"))
)

productivity_chart = alt.Chart(total_population_df).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 != 0 ? datum.label : ''",  # Show every 2nd year
            labelAngle=0)),
    y=alt.Y('avg_turnover_per_employee:Q',title='Average turnover per employee in BSD', axis=alt.Axis(format=".0f", labelExpr="'£' + datum.value + 'k'"))
)

basic_facts_chart = (n_firm_chart | emp_chart) & (turnover_chart | productivity_chart)

display(basic_facts_chart)
basic_facts_chart.save(chart_path / 'Descriptive paper/Data/BSD_basic_facts.png', scale_factor=2.0)

In [15]:
# INDUSTRIAL BREAKDOWN TABLE



sectoral_population_df = population_df[population_df['dimension'] == 'Sector'].copy()
sectoral_population_2023_df = sectoral_population_df[sectoral_population_df['year'] == 2023].copy()

sectoral_population_2023_df = sectoral_population_2023_df.groupby(['year', 'category']).agg({
    'employment': 'sum'}).sort_values(by='employment', ascending=False)

sectoral_population_2023_df

Unnamed: 0_level_0,Unnamed: 1_level_0,employment
year,category,Unnamed: 2_level_1
2023,Retail Trade,3012282
2023,Business Support Services,2969640
2023,Professional Services,2660948
2023,Hospitality,2535031
2023,Manufacturing,2487562
2023,Wholesale Trade,1817239
2023,Social care,1738533
2023,Construction,1650204
2023,Transport & Logistics,1475664
2023,Other Services,1376838


# ENTRY AND EXIT RATES

In [None]:
firm_dynamics_df = firm_dynamics_df.sort_values(['category','dimension','year'])

firm_dynamics_df['total_firms_lag'] = firm_dynamics_df.groupby(['category','dimension'])['n_firms'].shift(1)

firm_dynamics_df['entry_rate'] = (firm_dynamics_df['n_entrants'] + firm_dynamics_df['n_entry_and_exit']) / firm_dynamics_df['total_firms_lag']
firm_dynamics_df['exit_rate'] = (firm_dynamics_df['n_exiters'] + firm_dynamics_df['n_entry_and_exit']) / firm_dynamics_df['total_firms_lag']


In [None]:
# Figure 7: ENTRY AND EXIT RATES

# Process entry and exit rates from dataframe
total_firm_dynamics_df = firm_dynamics_df[firm_dynamics_df['dimension'] == 'Total']
total_firm_dynamics_df = total_firm_dynamics_df[total_firm_dynamics_df['year'] >=1999]

total_entry_exit_df = total_firm_dynamics_df.melt(id_vars='year',value_vars=['entry_rate','exit_rate'])

total_entry_exit_df['variable'] = total_entry_exit_df['variable'].map({
    'entry_rate': 'Entry rate',
    'exit_rate': 'Exit rate'
})

entry_exit_chart = alt.Chart(total_entry_exit_df).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 == 0 ? datum.label : ''",  # Show every 2nd year
            labelAngle=0), title=None),
    y=alt.Y('value:Q', axis=alt.Axis(format='%')),
    color=alt.Color('variable:N', legend=None)
).properties(
    width=600,
    height=400
)
# End labels
end_labels = alt.Chart(
    total_entry_exit_df[total_entry_exit_df['year'] == total_entry_exit_df['year'].max()]
).mark_text(
    align='left', dx=8, fontSize=12
).encode(
    x='year:O',
    y='value:Q',
    text=alt.Text('variable:N'),
    color=alt.Color('variable:N', legend=None)
)

chart = (entry_exit_chart + end_labels)
chart.save(chart_path / 'Descriptive paper/Dynamism/BSD_entry_exit_rates.png', scale_factor=2.0)

In [None]:
# TABLE: Entry rates by industry

# Define period bins
bins = [1997, 2007, 2016, 2022]
labels = ['1998-2007', '2008-2016', '2017-2022']

df_sector = firm_dynamics_df[firm_dynamics_df['dimension'] == 'Sector'].copy()
df_sector['period'] = pd.cut(df_sector['year'], bins=bins, labels=labels)

table = (
    df_sector
    .groupby(['category', 'period'])['entry_rate']
    .mean()
    .mul(100)
    .round(1)
    .unstack('period')
    .reset_index()
)

table.columns.name = None
table = table.rename(columns={'category': 'Industry'})
table['avg'] = table[labels].mean(axis=1)
table = table.sort_values('avg', ascending=False).drop(columns='avg').reset_index(drop=True)
table.style.hide(axis='index').format('{:.1f}', subset=labels)

In [None]:
# FIGURE: Exit rates by size of firm

size_firm_dynamics_df = firm_dynamics_df[firm_dynamics_df['dimension'] == 'Size']
size_firm_dynamics_df = size_firm_dynamics_df[size_firm_dynamics_df['year'] >= 1999]

size_exit = size_firm_dynamics_df[['year','category','exit_rate']]

# Get last year for end labels
last_year = size_exit['year'].max()
end_labels = size_exit[size_exit['year'] == last_year]

end_labels.loc[end_labels['category'] == 'Large (250+)', 'exit_rate'] -= 0.0025
end_labels.loc[end_labels['category'] == 'Medium (50-249)', 'exit_rate'] += 0.0025

lines = alt.Chart(size_exit).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 == 0 ? datum.label : ''",
            labelAngle=0)),
    y=alt.Y('exit_rate:Q', axis=alt.Axis(format='%')),
    color=alt.Color('category:N',
                     scale=alt.Scale(
                         domain=['Micro (0-9)', 'Small (10-49)', 'Medium (50-249)', 'Large (250+)'],
                         range=['#eb5c2e', 'rgba(24, 42, 56, 0.4)', 'rgba(24, 42, 56, 0.7)', '#122b39']
                     ),
                     legend=None)
).properties(width=600, height=400)

labels = alt.Chart(end_labels).mark_text(align='left', dx=5, fontSize=11).encode(
    x=alt.X('year:O'),
    y=alt.Y('exit_rate:Q'),
    text='category:N',
    color=alt.Color('category:N',
                     scale=alt.Scale(
                         domain=['Micro (0-9)', 'Small (10-49)', 'Medium (50-249)', 'Large (250+)'],
                         range=['#eb5c2e', 'rgba(24, 42, 56, 0.9)', 'rgba(24, 42, 56, 0.7)', '#122b39']
                     ))
)

size_exit_chart = (lines + labels)

display(size_exit_chart)

size_exit_chart.save(chart_path / 'Descriptive paper/Dynamism/BSD_exit_rates_by_size.png', scale_factor=2.0)



In [None]:
# EXIT RATES BY PRODUCTIVITY STATUS

prod_firm_dynamics_df = firm_dynamics_df[firm_dynamics_df['dimension'] == 'Productivity']
prod_firm_dynamics_df = prod_firm_dynamics_df[prod_firm_dynamics_df['year'] >=1999]

line = alt.Chart(prod_firm_dynamics_df).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(
                labelExpr="datum.value % 2 == 0 ? datum.label : ''",  # Show every 2nd year
            labelAngle=0)),
    y=alt.Y('exit_rate:Q', axis=alt.Axis(format='%'), title='Firm exit rate by productivity group'),
    color=alt.Color('category:N')
).properties(height=600, width=800)


# End labels - filter to last year for each category
last_points = prod_firm_dynamics_df.loc[
    prod_firm_dynamics_df.groupby('category')['year'].idxmax()
]

# Adjust y-values directly for label positioning
last_points['label_y'] = last_points.apply(
    lambda row: row['exit_rate'] + 0.003 if row['category'] == 'High-Median (P50-P90)' 
                else row['exit_rate'] - 0.003 if row['category'] == 'Low-Median (P10-P50)'
                else row['exit_rate'],
    axis=1
)

labels = alt.Chart(last_points).mark_text(
    align='left',
    dx=5,
    fontSize=11,
    fontWeight='bold'
).encode(
    x=alt.X('year:O'),
    y=alt.Y('label_y:Q'),  # Use adjusted y position
    text='category:N',
    color=alt.Color('category:N', legend=None)
)
# Combine
prod_exit_chart = (line + labels).properties(
    height=400, 
    width=600
)

display(prod_exit_chart)
prod_exit_chart.save(chart_path / 'Exploratory/exit_rates_by_productivity.png')

In [None]:
# KM survival probabilites for cohorts
first_ten_cohort_df = cohort_df[cohort_df['age'] <= 10]
selected_cohorts = first_ten_cohort_df[first_ten_cohort_df['cohort'].isin([2000, 2005, 2010, 2015])]

color_scale = alt.Scale(
    domain=[2000, 2005, 2010, 2015],
    range=['#36b7b4', '#eb5c2e', '#179fdb', '#122b39']
)

last_points = selected_cohorts.loc[
    selected_cohorts.groupby('cohort')['age'].idxmax()
].copy()

last_points.loc[last_points['cohort'] == 2015, 'kaplan_meier_rate'] += 0.022
last_points.loc[last_points['cohort'] == 2005, 'kaplan_meier_rate'] -= 0.03


lines = alt.Chart(selected_cohorts).mark_line().encode(
    x=alt.X('age:O'),
    y=alt.Y('kaplan_meier_rate:Q', axis=alt.Axis(format='%')),
    color=alt.Color('cohort:N', scale=color_scale, legend=None)
).properties(width=500, height=400)

labels = alt.Chart(last_points).mark_text(
    align='left',
    dx=5,
    fontSize=11,
    fontWeight='bold'
).encode(
    x=alt.X('age:O'),
    y=alt.Y('kaplan_meier_rate:Q'),
    text='cohort:N',
    color=alt.Color('cohort:N', scale=color_scale, legend=None)
)

rules = alt.Chart(pd.DataFrame({'age': [3, 5]})).mark_rule(
    strokeDash=[4, 4],
    color='#374151',
    strokeWidth=1,
    opacity=0.5
).encode(
    x='age:O'
)

selected_cohort_survival_chart = (lines + labels + rules)
display(selected_cohort_survival_chart)
selected_cohort_survival_chart.save(chart_path / 'Descriptive paper/Dynamism/BSD_survival_by_cohort.png', scale_factor=2.0)

In [None]:
# DHS growth rate distribution (excluding Micro firms)
total_growth_df = growth_rates_df[growth_rates_df['dimension'] == 'Size'].copy()
total_growth_df = total_growth_df[total_growth_df['category'] != 'Micro (0-9)']
total_growth_df = total_growth_df[total_growth_df['year'] >= 2003]
total_growth_df = total_growth_df.groupby('year').agg({
    'mean_dhs_growth': 'mean',
    'p10_dhs_growth': 'mean',
    'p90_dhs_growth': 'mean'
}).reset_index()

# Keep a wide version for the shaded band
band_df = total_growth_df[['year', 'p10_dhs_growth', 'p90_dhs_growth']].copy()

# Melt for the lines
total_growth_df = total_growth_df.melt(
    id_vars='year',
    value_vars=['mean_dhs_growth', 'p10_dhs_growth', 'p90_dhs_growth'],
    var_name='statistic', value_name='growth_rate'
)

label_map = {'mean_dhs_growth': 'Mean', 'p10_dhs_growth': 'P10', 'p90_dhs_growth': 'P90'}
total_growth_df['label'] = total_growth_df['statistic'].map(label_map)

max_year = total_growth_df['year'].max()

# Colour scheme: dark navy for mean, muted tones for P10/P90
color_scale = alt.Scale(
    domain=['mean_dhs_growth', 'p10_dhs_growth', 'p90_dhs_growth'],
    range=['#1b3a4b', '#a3b8c8', '#a3b8c8']
)

# Shaded band between P10 and P90
band = alt.Chart(band_df).mark_area(opacity=0.08, color='#1b3a4b').encode(
    x=alt.X('year:O', axis=alt.Axis(
        labelExpr="datum.value % 2 == 0 ? datum.label : ''",
        labelAngle=0, title=None)),
    y=alt.Y('p10_dhs_growth:Q'),
    y2=alt.Y2('p90_dhs_growth:Q')
)

# Lines
lines = alt.Chart(total_growth_df).mark_line(strokeWidth=2).encode(
    x=alt.X('year:O'),
    y=alt.Y('growth_rate:Q'),
    color=alt.Color('statistic:N', scale=color_scale, legend=None),
    strokeDash=alt.StrokeDash(
        'statistic:N',
        scale=alt.Scale(
            domain=['mean_dhs_growth', 'p10_dhs_growth', 'p90_dhs_growth'],
            range=[[0], [4, 2], [4, 2]]  # solid for mean, dashed for P10/P90
        ),
        legend=None
    )
)

# End labels
end_labels = alt.Chart(total_growth_df).transform_filter(
    alt.datum.year == max_year
).mark_text(align='left', dx=5, fontSize=11).encode(
    x=alt.X('year:O'),
    y=alt.Y('growth_rate:Q'),
    text=alt.Text('label:N'),
    color=alt.Color('statistic:N', scale=color_scale, legend=None)
)

total_dhs_growth_chart = (band + lines + end_labels).properties(
    width=500,
    height=300,
)

display(total_dhs_growth_chart)
total_dhs_growth_chart.save(chart_path / 'Descriptive paper/Dynamism/total_dhs_growth_distribution_excluding_micro.png')