In [None]:
# scoping of geographic dynamism

import pandas as pd
import geopandas as gpd
import numpy as np
import altair as alt
from pandas.api.types import CategoricalDtype
import os
import eco_style 
import requests
import json
import io



ModuleNotFoundError: No module named 'altair'

In [122]:
itl3_dynamism = pd.read_excel('referencetablefirmemploymentdynamicsinlocaleconomies2004to2022.xls', sheet_name='ITL3 by Status')
itl2_dynamism = pd.read_excel('referencetablefirmemploymentdynamicsinlocaleconomies2004to2022.xls', sheet_name='ITL2 by Status')
itl2__industry_dynamism = pd.read_excel('referencetablefirmemploymentdynamicsinlocaleconomies2004to2022.xls', sheet_name='ITL2 by Status by Sector')



In [3]:
itl2_dynamism.head()

Unnamed: 0,year,ITL221NM,Status,Employment,Jobs Created,Jobs Destroyed,No Sites,No Enterprises,Total Cell Employment,Laggd Total Cell Employment (t-1),Job Creation Rate,Job Destruction Rate
0,2004,Bedfordshire and Hertfordshire,Closing Employment Site (Closing Firm),,0,27245,7140,7015,776155,0,,
1,2004,Bedfordshire and Hertfordshire,Closing Employment Site (Contracting Firm),,0,60410,5300,3965,776155,0,,
2,2004,Bedfordshire and Hertfordshire,Continuing Employment Sites,689155.0,54645,57325,63620,54830,776155,0,,
3,2004,Bedfordshire and Hertfordshire,New Employment Site (Expanding Firm),59130.0,59130,0,4765,3740,776155,0,,
4,2004,Bedfordshire and Hertfordshire,New Employment Site (New Firm),27870.0,27870,0,10120,10100,776155,0,,


In [39]:
itl3_dynamism_2022 = itl3_dynamism[itl3_dynamism['year'] == 2022].copy()

itl3_dynamism_2022['Net_Job_Creation'] = (
    itl3_dynamism_2022['Job Creation Rate'] - itl3_dynamism_2022['Job Destruction Rate']
)


In [16]:
itl3_dynamism_2022.dtypes


year                                   int64
ITL321NM                              object
Jobs Created                           int64
Jobs Destroyed                         int64
Employment                             int64
Laggd Total Cell Employment (t-1)      int64
No Sites                               int64
No Enterprises                         int64
Job Creation Rate                    float64
Job Destruction Rate                 float64
dtype: object

# ITL1 DYNAMISM TABLES (ENGLAND, SCOTLAND, WALES)

In [120]:
itl2_dynamism.columns

Index(['year', 'ITL221NM', 'Status', 'Employment', 'Jobs Created',
       'Jobs Destroyed', 'No Sites', 'No Enterprises', 'Total Cell Employment',
       'Laggd Total Cell Employment (t-1)', 'Job Creation Rate',
       'Job Destruction Rate', 'ITL125NM'],
      dtype='object')

In [124]:
itl1_dynamism

Unnamed: 0,year,ITL221NM,Status,Employment,Jobs Created,Jobs Destroyed,No Sites,No Enterprises,Total Cell Employment,Laggd Total Cell Employment (t-1),Job Creation Rate,Job Destruction Rate,ITL125NM
0,2004,Bedfordshire and Hertfordshire,Closing Employment Site (Closing Firm),,0,27245,7140,7015,776155,0,,,East (England)
1,2004,Bedfordshire and Hertfordshire,Closing Employment Site (Closing Firm),,0,27245,7140,7015,776155,0,,,East (England)
2,2004,Bedfordshire and Hertfordshire,Closing Employment Site (Closing Firm),,0,27245,7140,7015,776155,0,,,East (England)
3,2004,Bedfordshire and Hertfordshire,Closing Employment Site (Closing Firm),,0,27245,7140,7015,776155,0,,,East (England)
4,2004,Bedfordshire and Hertfordshire,Closing Employment Site (Closing Firm),,0,27245,7140,7015,776155,0,,,East (England)
...,...,...,...,...,...,...,...,...,...,...,...,...,...
27545,2022,West Yorkshire,New Employment Site (New Firm),33025.0,33025,0,13495,13475,1094800,1076410,3.0683,,Yorkshire and The Humber
27546,2022,West Yorkshire,New Employment Site (New Firm),33025.0,33025,0,13495,13475,1094800,1076410,3.0683,,Yorkshire and The Humber
27547,2022,West Yorkshire,New Employment Site (New Firm),33025.0,33025,0,13495,13475,1094800,1076410,3.0683,,Yorkshire and The Humber
27548,2022,West Yorkshire,New Employment Site (New Firm),33025.0,33025,0,13495,13475,1094800,1076410,3.0683,,Yorkshire and The Humber


In [133]:
# The data is provided at NUTS2, we want NUTS1 so let's bring this in via lookup
itl_lookup = pd.read_csv('LAD_(April_2025)_to_LAU1_to_ITL3_to_ITL2_to_ITL1_(January_2025)_Lookup_in_the_UK.csv')
itl_lookup = itl_lookup[['ITL125NM','ITL225NM']]
itl_lookup = itl_lookup.rename(columns={'ITL225NM':'ITL221NM'})

itl1_dynamism = pd.merge(itl2_dynamism, itl_lookup, on='ITL221NM')

# Calculate dynamism summary at this level
itl1_dynamism = itl1_dynamism.groupby(['year','ITL125NM']).agg({'Employment':'sum',
                                                              'Jobs Created':'sum',
                                                              'Jobs Destroyed':'sum',
                                                              'Laggd Total Cell Employment (t-1)':'sum'}).reset_index()

itl1_dynamism[itl1_dynamism['ITL125NM']=='Scotland']


Unnamed: 0,year,ITL125NM,Employment,Jobs Created,Jobs Destroyed,Laggd Total Cell Employment (t-1)
5,2004,Scotland,13021285.0,2168530,2357365,0
15,2005,Scotland,13930985.0,2548500,2081690,65106425
25,2006,Scotland,14235250.0,2366720,2151635,69654925
35,2007,Scotland,14793170.0,2477115,1919210,71176400
45,2008,Scotland,14697545.0,2240140,2335745,73965700
55,2009,Scotland,14601300.0,1919630,2015785,73487400
65,2010,Scotland,14036815.0,2024595,2589150,73006675
75,2011,Scotland,13740430.0,1618695,1710955,70184250
85,2012,Scotland,13899945.0,1879740,1913790,68702150
95,2013,Scotland,14104600.0,2021895,1817270,69500050


In [27]:
chart = alt.Chart(itl3_dynamism_2022).mark_bar().encode(
        # X-Axis: Job Creation Rate, binned to form the histogram bars
        x=alt.X('Job Creation Rate:Q'),

        # Y-Axis: Count of occurrences in each bin (Frequency)
        y=alt.Y('ITL321NM:O',axis=alt.Axis(labels=False), sort='-x'),

        # Add tooltip for interactivity
        tooltip=['Job Creation Rate:Q', 'ITL321NM:O']
    ).properties(
        title=f'Histogram of ITL3 Job Creation Rates in 2022'
    ).interactive()

chart

In [28]:
chart = alt.Chart(itl3_dynamism_2022).mark_bar().encode(
        # X-Axis: Job Creation Rate, binned to form the histogram bars
        x=alt.X('Job Destruction Rate:Q'),

        # Y-Axis: Count of occurrences in each bin (Frequency)
        y=alt.Y('ITL321NM:O',axis=alt.Axis(labels=False), sort='-x'),

        # Add tooltip for interactivity
        tooltip=['Job Destruction Rate:Q', 'ITL321NM:O']
    ).properties(
        title=f'Histogram of ITL3 Job Destruction Rates in 2022'
    ).interactive()

chart

In [42]:
itl3_dynamism_2022['Job Destruction Rate'] *= -1



In [44]:
itl3_dynamism_2022_long = itl3_dynamism_2022.melt(id_vars=['year','ITL321NM','Net_Job_Creation'], value_vars=['Job Creation Rate','Job Destruction Rate'], var_name='Dynamism Type', value_name='Rate')
itl3_dynamism_2022_long

chart = alt.Chart(itl3_dynamism_2022_long).mark_bar().encode(
        # X-Axis: Job Creation Rate, binned to form the histogram bars
        x=alt.X('Rate:Q'),

        # Y-Axis: Count of occurrences in each bin (Frequency)
        y=alt.Y('ITL321NM:O',axis=alt.Axis(labels=False), sort=alt.EncodingSortField(
            field='Net_Job_Creation', op='mean',order='descending'
        )),
        color=alt.Color('Dynamism Type:N'),

        # Add tooltip for interactivity
        tooltip=['Rate:Q', 'ITL321NM:O']
    ).properties(
        title=f'Histogram of ITL3 Job Destruction Rates in 2022'
    ).interactive()

chart

## JOB REALLOCATION ACROSS NUTS2 REGIONS

For the Nuffield application we are proposing to explain differences in job reallocation across NUTS-2 regions. We can obtain this from the ONS data release, this can give us an idea about region/time variation which can help inform our econometric approach.

First let's check variation over time of reallocation across the regions. 



In [66]:
# Calculate job reallocation by summing absolute job creation and destruction rates
itl2_totals = itl2_dynamism.groupby(['year','ITL221NM']).agg({
    'Jobs Created': 'sum',
    'Jobs Destroyed': 'sum',
    'Total Cell Employment':'first',
    'Laggd Total Cell Employment (t-1)':'first'
}).reset_index()

itl2_totals['Job Reallocation'] = itl2_totals['Jobs Created'] + itl2_totals['Jobs Destroyed']
itl2_totals['Job Reallocation Rate'] = itl2_totals['Job Reallocation'] / itl2_totals['Laggd Total Cell Employment (t-1)']
itl2_totals.replace([np.inf, -np.inf], np.nan, inplace=True)

itl2_totals

Unnamed: 0,year,ITL221NM,Jobs Created,Jobs Destroyed,Total Cell Employment,Laggd Total Cell Employment (t-1),Job Reallocation,Job Reallocation Rate
0,2004,Bedfordshire and Hertfordshire,141645,144980,776155,0,286625,
1,2004,"Berkshire, Buckinghamshire and Oxfordshire",218070,224910,1193945,0,442980,
2,2004,Cheshire,81040,74090,445850,0,155130,
3,2004,Cornwall and Isles of Scilly,22450,19175,134150,0,41625,
4,2004,Cumbria,34900,33225,223975,0,68125,
...,...,...,...,...,...,...,...,...
774,2022,Tees Valley and Durham,61250,52240,456775,301105,113490,0.376912
775,2022,West Central Scotland,90125,81085,711975,732970,171210,0.233584
776,2022,West Midlands,187070,173480,1249160,1276115,360550,0.282537
777,2022,West Wales and The Valleys,90385,87140,750695,747450,177525,0.237508


In [107]:
# Plot job reallocation across all ITL regions with 6 one each plot

# Setup parameters
regions_list = itl2_totals['ITL221NM'].unique()
chunk_size = 6
num_regions = len(regions_list)

# Determine the number of columns for the facet to display 6 regions neatly
# 3 columns for 6 regions means 2 rows.
columns = 3 

# Create output directory
output_dir = 'itl_rate_plots'
os.makedirs(output_dir, exist_ok=True)

print(f"Total regions to plot: {num_regions}")
print(f"Plots will be saved to: {output_dir}")

for i in range(0, num_regions, chunk_size):
    # Get the chunk of regions
    start_index = i
    end_index = i + chunk_size
    regions_chunk = regions_list[start_index:end_index]
    
    # Filter the melted DataFrame for the current chunk
    chunk_data = itl2_totals[
        itl2_totals['ITL221NM'].isin(regions_chunk)
    ]
    
    # Define the chart for the current chunk
    chart = alt.Chart(chunk_data).mark_line().encode(
        x=alt.X('year:O', title='Year', axis=alt.Axis(
                labelExpr="datum.value % 2 == 0 ? datum.label : ''",  # Show every 2nd year
            labelAngle=0)),
        y=alt.Y('Job Reallocation Rate:Q', axis=alt.Axis(format='%'), title='Job Reallocation Rate'),        
        # Facet the data by region, using the specified number of columns
        facet=alt.Facet('ITL221NM:N', columns=columns, title='ITL2 Region'),
        
        tooltip=['year:O', 'ITL221NM:N', alt.Tooltip('Job Reallocation Rate:Q', format='.2%')]
    ).properties(
        title=f'Job Creation and Destruction Rates: Regions {start_index+1} to {end_index}'
    )

    # Create the file name and save the chart
    file_name = f'reallocation_rates_regions{start_index+1}_to_{end_index}.png'
    full_path = os.path.join(output_dir, file_name)
    
    # Save the chart as a JSON file
    chart.save(full_path)
    
    print(f"Saved plot {i//chunk_size + 1}: {file_name}")

print("\n--- Plotting Complete ---")

Total regions to plot: 41
Plots will be saved to: itl_rate_plots
Saved plot 1: reallocation_rates_regions1_to_6.png
Saved plot 2: reallocation_rates_regions7_to_12.png
Saved plot 3: reallocation_rates_regions13_to_18.png
Saved plot 4: reallocation_rates_regions19_to_24.png
Saved plot 5: reallocation_rates_regions25_to_30.png
Saved plot 6: reallocation_rates_regions31_to_36.png
Saved plot 7: reallocation_rates_regions37_to_42.png

--- Plotting Complete ---


In [None]:

# CALCULATE RATES AND MELT
itl2_totals['Job creation rate'] = itl2_totals['Jobs Created'] / itl2_totals['Laggd Total Cell Employment (t-1)']
itl2_totals['Job destruction rate'] = itl2_totals['Jobs Destroyed'] / itl2_totals['Laggd Total Cell Employment (t-1)']

itl2_jc_jd = itl2_totals.melt(
    id_vars=['year','ITL221NM'],
    value_vars=['Job creation rate','Job destruction rate'], 
    value_name='Rate'
)

# CHUNKING AND PLOTTING LOGIC ---

# Setup parameters
regions_list = itl2_jc_jd['ITL221NM'].unique()
chunk_size = 6
num_regions = len(regions_list)

# Determine the number of columns for the facet to display 6 regions neatly
# 3 columns for 6 regions means 2 rows.
columns = 3 

# Create output directory
output_dir = 'itl_rate_plots'
os.makedirs(output_dir, exist_ok=True)

print(f"Total regions to plot: {num_regions}")
print(f"Plots will be saved to: {output_dir}")

for i in range(0, num_regions, chunk_size):
    # Get the chunk of regions
    start_index = i
    end_index = i + chunk_size
    regions_chunk = regions_list[start_index:end_index]
    
    # Filter the melted DataFrame for the current chunk
    chunk_data = itl2_jc_jd[
        itl2_jc_jd['ITL221NM'].isin(regions_chunk)
    ]
    
    # Define the chart for the current chunk
    chart = alt.Chart(chunk_data).mark_line().encode(
        x=alt.X('year:O', title='Year', axis=alt.Axis(
                labelExpr="datum.value % 2 == 0 ? datum.label : ''",  # Show every 2nd year
            labelAngle=0)),
        y=alt.Y('Rate:Q', axis=alt.Axis(format='%'), title='Rate'),
        color=alt.Color('variable:N', title='Type'),
        
        # Facet the data by region, using the specified number of columns
        facet=alt.Facet('ITL221NM:N', columns=columns, title='ITL2 Region'),
        
        tooltip=['year:O', 'ITL221NM:N', alt.Tooltip('Rate:Q', format='.2%')]
    ).properties(
        title=f'Job Creation and Destruction Rates: Regions {start_index+1} to {end_index}'
    )

    # Create the file name and save the chart
    file_name = f'job_rates_regions_{start_index+1}_to_{end_index}.png'
    full_path = os.path.join(output_dir, file_name)
    
    # Save the chart as a JSON file
    chart.save(full_path)
    
    print(f"Saved plot {i//chunk_size + 1}: {file_name}")

print("\n--- Plotting Complete ---")

Total regions to plot: 41
Plots will be saved to: itl_rate_plots
Saved plot 1: job_rates_regions_1_to_6.png
Saved plot 2: job_rates_regions_7_to_12.png
Saved plot 3: job_rates_regions_13_to_18.png
Saved plot 4: job_rates_regions_19_to_24.png
Saved plot 5: job_rates_regions_25_to_30.png
Saved plot 6: job_rates_regions_31_to_36.png
Saved plot 7: job_rates_regions_37_to_42.png

--- Plotting Complete ---
Files saved successfully. Note: You will need 'vl-convert-python' or a similar tool to convert the saved JSON files to PNG.


In [68]:
# RANK AVERAGE JOB REALLOCATION RATE 2005-2022

ranked_reallocation = itl2_totals.groupby('ITL221NM').agg({'Job Reallocation Rate':'mean'}).reset_index()

ranked_reallocation = ranked_reallocation.sort_values(by='Job Reallocation Rate', ascending=False)

ranked_reallocation

Unnamed: 0,ITL221NM,Job Reallocation Rate
19,Inner London - West,0.37439
18,Inner London - East,0.365816
31,Outer London - West and North West,0.341136
30,Outer London - South,0.33126
29,Outer London - East and North East,0.32733
0,Bedfordshire and Hertfordshire,0.31404
14,Greater Manchester,0.312481
38,West Midlands,0.31032
2,Cheshire,0.299636
12,Essex,0.296166


In [69]:
# RANK CHANGE IN JOB REALLOCATION RATE 2005-2022
reallocation_2005 = itl2_totals[itl2_totals['year'] == 2005][['ITL221NM','Job Reallocation Rate']].rename(columns={'Job Reallocation Rate':'Reallocation_2005'})
reallocation_2022 = itl2_totals[itl2_totals['year'] == 2022][['ITL221NM','Job Reallocation Rate']].rename(columns={'Job Reallocation Rate':'Reallocation_2022'})
reallocation_change = pd.merge(reallocation_2005, reallocation_2022, on='ITL221NM')
reallocation_change['Reallocation_Change'] = reallocation_change['Reallocation_2022'] - reallocation_change['Reallocation_2005']
reallocation_change = reallocation_change.sort_values(by='Reallocation_Change', ascending=False)

reallocation_change

Unnamed: 0,ITL221NM,Reallocation_2005,Reallocation_2022,Reallocation_Change
36,Tees Valley and Durham,0.338665,0.376912,0.038247
29,Outer London - East and North East,0.335807,0.313521,-0.022286
18,Inner London - East,0.398941,0.370833,-0.028108
16,"Herefordshire, Worcestershire and Warwickshire",0.322521,0.290857,-0.031663
22,"Leicestershire, Rutland and Northamptonshire",0.336568,0.292534,-0.044035
9,East Wales,0.312533,0.266553,-0.04598
0,Bedfordshire and Hertfordshire,0.34256,0.29431,-0.04825
10,East Yorkshire and Northern Lincolnshire,0.305972,0.253424,-0.052547
5,Derbyshire and Nottinghamshire,0.321039,0.267016,-0.054023
14,Greater Manchester,0.341399,0.285628,-0.055771


In [70]:
itl2_totals[itl2_totals['ITL221NM']=='Tees Valley and Durham']

Unnamed: 0,year,ITL221NM,Jobs Created,Jobs Destroyed,Total Cell Employment,Laggd Total Cell Employment (t-1),Job Reallocation,Job Reallocation Rate
36,2004,Tees Valley and Durham,69555,72465,432655,0,142020,
77,2005,Tees Valley and Durham,82340,64185,450805,432655,146525,0.338665
118,2006,Tees Valley and Durham,86400,65140,472065,450805,151540,0.336154
159,2007,Tees Valley and Durham,75350,84035,463375,472065,159385,0.337634
200,2008,Tees Valley and Durham,67305,65960,464720,463375,133265,0.287596
241,2009,Tees Valley and Durham,63170,72720,455165,464720,135890,0.292413
282,2010,Tees Valley and Durham,56705,88980,422890,455165,145685,0.320071
323,2011,Tees Valley and Durham,64745,60070,427565,422890,124815,0.295148
364,2012,Tees Valley and Durham,62150,64940,415775,427565,127090,0.297241
405,2013,Tees Valley and Durham,75390,66880,432875,415775,142270,0.34218


In [71]:
tees_valley_dynamism = itl2_totals[itl2_totals['ITL221NM']=='Tees Valley and Durham']
tees_valley_dynamism =  tees_valley_dynamism.melt(id_vars=['year','ITL221NM'],
                                                          value_vars=['Jobs Created','Jobs Destroyed'],
                                                          var_name='Dynamism Type',
                                                          value_name='Count')

chart = alt.Chart(tees_valley_dynamism).mark_bar().encode(
    x=alt.X('year:O'),
    y=alt.Y('Count:Q'),
    color=alt.Color('Dynamism Type:N'),
    tooltip=['year:O']
)

chart

In [72]:
# DO A CAGR CALCULATION FOR JOB REALLOCATION RATE FROM 2005 TO 2022
cagr_data = itl2_totals[
    itl2_totals['year'].isin([2005, 2022])
]

cagr_pivot = cagr_data.pivot(
    index='ITL221NM', 
    columns='year', 
    values='Job Reallocation Rate'
)

cagr_pivot = cagr_pivot.rename(
    columns={2005: 'Start_Rate', 2022: 'End_Rate'}
)

cagr_pivot['CAGR_Reallocation_Rate'] = (
    (cagr_pivot['End_Rate'] / cagr_pivot['Start_Rate']) ** (1 / 17) - 1
)

cagr_result = cagr_pivot[['CAGR_Reallocation_Rate']].reset_index().sort_values(by='CAGR_Reallocation_Rate', ascending=False)
cagr_result

year,ITL221NM,CAGR_Reallocation_Rate
36,Tees Valley and Durham,0.006314
29,Outer London - East and North East,-0.004031
18,Inner London - East,-0.004288
16,"Herefordshire, Worcestershire and Warwickshire",-0.00606
22,"Leicestershire, Rutland and Northamptonshire",-0.008214
0,Bedfordshire and Hertfordshire,-0.00889
9,East Wales,-0.009317
14,Greater Manchester,-0.010437
5,Derbyshire and Nottinghamshire,-0.01078
10,East Yorkshire and Northern Lincolnshire,-0.011023


###Â INDUSTRY CONTRIBUTIONS TO CHANGES IN REALLOCATION IN NUTS2 REGIONS

In [73]:
itl2__industry_dynamism.columns

Index(['year', 'ITL221NM', 'Sector', 'Status', 'Employment', 'Job Creation',
       'Job Destruction', 'No Sites', 'No Enterprises',
       'Total Cell Employment', 'Laggd Total Cell Employment (t-1)',
       'Job Creation Rate', 'Job Destruction Rate'],
      dtype='object')

In [79]:

itl2__industry_dynamism['Job Reallocation'] = (
    itl2__industry_dynamism['Job Creation'].abs() + itl2__industry_dynamism['Job Destruction'].abs()
)

# 1. Calculate Total Job Reallocation per Industry per Region (2005-2022 sum)
# Group by Region and Industry and sum the Reallocation
industry_totals = itl2__industry_dynamism.groupby(
    ['ITL221NM', 'Sector']
).agg({'Job Reallocation':'sum',
       'Laggd Total Cell Employment (t-1)':'first'}).reset_index()

# Rename the summed column for clarity
industry_totals = industry_totals.rename(
    columns={'Job Reallocation': 'Total_Sector_Reallocation'}
)

industry_totals

Unnamed: 0,ITL221NM,Sector,Total_Sector_Reallocation,Laggd Total Cell Employment (t-1)
0,Bedfordshire and Hertfordshire,Construction,139775.0,0
1,Bedfordshire and Hertfordshire,Finance,45980.0,0
2,Bedfordshire and Hertfordshire,Government Healthcare and Education,439830.0,0
3,Bedfordshire and Hertfordshire,Information and Communication,108695.0,0
4,Bedfordshire and Hertfordshire,Other Services,112845.0,0
...,...,...,...,...
364,West Yorkshire,Other Services,127310.0,0
365,West Yorkshire,Production and Manufacturing,272155.0,0
366,West Yorkshire,Professional Services Technology and Media,601595.0,0
367,West Yorkshire,Transportation and Storage,112605.0,0


In [80]:
itl2__industry_dynamism.columns

Index(['year', 'ITL221NM', 'Sector', 'Status', 'Employment', 'Job Creation',
       'Job Destruction', 'No Sites', 'No Enterprises',
       'Total Cell Employment', 'Laggd Total Cell Employment (t-1)',
       'Job Creation Rate', 'Job Destruction Rate', 'Job Reallocation'],
      dtype='object')

In [95]:
# Now calcualte the contribution to job reallocation RATE which uses employment share weights
itl2__industry_agg = itl2__industry_dynamism.groupby(['year','ITL221NM','Sector']).agg({
    'Job Creation':'sum',
    'Job Destruction':'sum',
    'Employment':'first',
    'Laggd Total Cell Employment (t-1)':'first'
})

itl2__industry_agg['Job Reallocation'] = (
    itl2__industry_agg['Job Creation'].abs() + itl2__industry_agg['Job Destruction'].abs()
)

itl2__industry_agg['Sector_Rate'] = (
    itl2__industry_agg['Job Reallocation'] / itl2__industry_agg['Laggd Total Cell Employment (t-1)']
)

itl2__industry_agg = itl2__industry_agg.reset_index()

# --- Step 1: Filter and Calculate Initial Employment Share (2005) ---

# Filter for the start year (2005)
df_2005 = itl2__industry_agg[itl2__industry_agg['year'] == 2005].copy()

# Calculate total regional employment in 2005
region_total_emp_2005 = df_2005.groupby('ITL221NM')['Employment'].sum().reset_index()
region_total_emp_2005 = region_total_emp_2005.rename(
    columns={'Employment': 'Total_Region_Emp_2005'}
)

# Merge back to calculate 2005 industry share
df_2005 = pd.merge(df_2005, region_total_emp_2005, on='ITL221NM', how='left')
df_2005['Emp_Share_2005'] = (
    df_2005['Employment'] / df_2005['Total_Region_Emp_2005']
)

# Keep only the necessary columns for the share
df_shares = df_2005[['ITL221NM', 'Sector', 'Emp_Share_2005']]

# --- Step 2: Calculate the Change in Industry Rate (2022 Rate - 2005 Rate) ---

# Get rates for 2005 and 2022
df_rates = itl2__industry_agg[
    itl2__industry_agg['year'].isin([2005, 2022])
]

# Pivot to get start and end rates side-by-side
rate_pivot = df_rates.pivot_table(
    index=['ITL221NM', 'Sector'],
    columns='year',
    values='Sector_Rate'
).reset_index()

# Rename columns
rate_pivot = rate_pivot.rename(
    columns={2005: 'Rate_2005', 2022: 'Rate_2022'}
)

# Calculate the change in the industry rate
rate_pivot['Change_in_Sector_Rate'] = (
    rate_pivot['Rate_2022'] - rate_pivot['Rate_2005']
)

# --- Step 3: Calculate the Final Contribution ---

# Merge the rate change with the 2005 employment shares
contribution_df = pd.merge(
    rate_pivot, 
    df_shares, 
    on=['ITL221NM', 'Sector'], 
    how='left'
)

# Calculate the weighted contribution
contribution_df['Rate_Change_Contribution'] = (
    contribution_df['Change_in_Sector_Rate'] * contribution_df['Emp_Share_2005']
)

# Final result DataFrame, showing the contribution of each industry to the total change
contribution_result = contribution_df[[
    'ITL221NM', 
    'Sector', 
    'Rate_Change_Contribution'
]].sort_values(
    by=['ITL221NM', 'Rate_Change_Contribution'],
    ascending=[True, False]
)

contribution_result

Unnamed: 0,ITL221NM,Sector,Rate_Change_Contribution
3,Bedfordshire and Hertfordshire,Information and Communication,0.002382
5,Bedfordshire and Hertfordshire,Production and Manufacturing,-0.003983
4,Bedfordshire and Hertfordshire,Other Services,-0.004403
1,Bedfordshire and Hertfordshire,Finance,-0.005180
0,Bedfordshire and Hertfordshire,Construction,-0.005297
...,...,...,...
360,West Yorkshire,Other Services,-0.007659
361,West Yorkshire,Production and Manufacturing,-0.012413
364,West Yorkshire,Wholesale and Retail,-0.019297
358,West Yorkshire,Government Healthcare and Education,-0.019325


In [110]:
contribution_df[contribution_df['ITL221NM']=='Surrey, East and West Sussex'].sum()

ITL221NM                    Surrey, East and West SussexSurrey, East and W...
Sector                      ConstructionFinanceGovernment Healthcare and E...
Rate_2005                                                            3.470859
Rate_2022                                                            2.361875
Change_in_Sector_Rate                                               -1.108984
Emp_Share_2005                                                            1.0
Rate_Change_Contribution                                            -0.106117
dtype: object

In [97]:
# Ensure the directory for saving plots exists (optional, but good practice)
output_dir = 'itl_contribution_altair_plots'
os.makedirs(output_dir, exist_ok=True)

# Get unique regions to loop through
regions = contribution_result['ITL221NM'].unique()
generated_files = []

for region in regions:
    # A. Filter data for the current region
    region_data = contribution_result[
        contribution_result['ITL221NM'] == region
    ]
    
    # B. Define color logic using alt.condition 
    # This assigns color based on the sign of the contribution
    color_condition = alt.condition(
        alt.datum['Rate_Change_Contribution'] >= 0,
        alt.value('darkgreen'),  # Positive contributions
        alt.value('darkred')     # Negative contributions
    )
    
    # C. Create the Altair chart
    chart = alt.Chart(region_data).mark_bar().encode(
        # Y-axis: Industry. Use sort=... to order industries by contribution value
        y=alt.Y('Sector:N', sort=alt.EncodingSortField(
            field='Rate_Change_Contribution', 
            op='max', 
            order='descending'
        ), title='Sector'),
        
        # X-axis: Contribution value. Format as a percentage.
        x=alt.X('Rate_Change_Contribution:Q', title='Contribution to Rate Change',
                axis=alt.Axis(format='.2%')), 
        
        # Color encoding using the conditional logic
        color=color_condition,
        
        # Tooltip for interactivity (shows exact values on hover)
        tooltip=[
            'Sector', 
            alt.Tooltip('Rate_Change_Contribution', format='.4f', title='Contribution')
        ]
    ).properties(
        title=f'Sector Contribution to Job Reallocation Rate Change (2005-2022) in {region}'
    ).interactive() # Allows zooming and panning

    # D. Save the plot as a JSON file
    # Create a safe file name
    safe_region_name = region.replace(' ', '_').replace('/', '_').replace('(', '').replace(')', '')
    file_name = f'altair_contribution_plot_{safe_region_name}.json'
    full_path = os.path.join(output_dir, file_name)
    
    # Save the chart
    chart.save(full_path)
    generated_files.append(file_name)

print(f"Altair plots (JSON files) have been generated and saved to the '{output_dir}' directory.")
print("Generated plot file names:", generated_files)

Altair plots (JSON files) have been generated and saved to the 'itl_contribution_altair_plots' directory.
Generated plot file names: ['altair_contribution_plot_Bedfordshire_and_Hertfordshire.json', 'altair_contribution_plot_Berkshire,_Buckinghamshire_and_Oxfordshire.json', 'altair_contribution_plot_Cheshire.json', 'altair_contribution_plot_Cornwall_and_Isles_of_Scilly.json', 'altair_contribution_plot_Cumbria.json', 'altair_contribution_plot_Derbyshire_and_Nottinghamshire.json', 'altair_contribution_plot_Devon.json', 'altair_contribution_plot_Dorset_and_Somerset.json', 'altair_contribution_plot_East_Anglia.json', 'altair_contribution_plot_East_Wales.json', 'altair_contribution_plot_East_Yorkshire_and_Northern_Lincolnshire.json', 'altair_contribution_plot_Eastern_Scotland.json', 'altair_contribution_plot_Essex.json', 'altair_contribution_plot_Gloucestershire,_Wiltshire_and_Bath_Bristol_area.json', 'altair_contribution_plot_Greater_Manchester.json', 'altair_contribution_plot_Hampshire_and

In [96]:
sum_of_contributions = contribution_result.groupby('ITL221NM')[
    'Rate_Change_Contribution'
].sum().reset_index()

sum_of_contributions = sum_of_contributions.rename(
    columns={'Rate_Change_Contribution': 'Sum_of_Industry_Contributions'}
)

sum_of_contributions

Unnamed: 0,ITL221NM,Sum_of_Industry_Contributions
0,Bedfordshire and Hertfordshire,-0.061329
1,"Berkshire, Buckinghamshire and Oxfordshire",-0.093572
2,Cheshire,-0.105685
3,Cornwall and Isles of Scilly,-inf
4,Cumbria,inf
5,Derbyshire and Nottinghamshire,-0.060119
6,Devon,-0.118996
7,Dorset and Somerset,-0.075467
8,East Anglia,-0.10134
9,East Wales,-0.049784
