In [None]:

import pandas as pd
import geopandas as gpd
import numpy as np
import altair as alt
import os
import eco_style 
alt.themes.enable("light")
import requests
import json
import time
from tqdm import tqdm
import io

In [None]:
# DOWNLOAD BRES EMPLOYMENT BY 5DIG SIC AND LAD FROM NOMIS API

# Create the output directory if it doesn't exist
output_dir = "nomis_dump"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# This is the base URL for BRES data
base_url = "https://www.nomisweb.co.uk/api/v01/dataset/NM_189_1.data.csv"

# This is list of LAD geographies - copy this from your link from NOMIS API
# Replace with MSOA if desired

GEO_LIST_STR = "1778384897...1778384901,1778384941,1778384950,1778385143...1778385146,1778385159,1778384902...1778384905,1778384942,1778384943,1778384956,1778384957,1778385033...1778385044,1778385124...1778385138,1778384906...1778384910,1778384958,1778385139...1778385142,1778385154...1778385158,1778384911...1778384914,1778384954,1778384955,1778384965...1778384972,1778385045...1778385058,1778385066...1778385072,1778384915...1778384917,1778384944,1778385078...1778385085,1778385100...1778385104,1778385112...1778385117,1778385147...1778385153,1778384925...1778384928,1778384948,1778384949,1778384960...1778384964,1778384986...1778384997,1778385015...1778385020,1778385059...1778385065,1778385086...1778385088,1778385118...1778385123,1778385160...1778385192,1778384929...1778384940,1778384953,1778384981...1778384985,1778385004...1778385014,1778385021...1778385032,1778385073...1778385077,1778385089...1778385099,1778385105...1778385111,1778384918...1778384924,1778384945...1778384947,1778384951,1778384952,1778384973...1778384980,1778384998...1778385003,1778384959,1778385193...1778385246"

INDUSTRY_CODES = "134218728,134218838,134218848,134218858,134218868,134218878,134218888,134218918,134218938,134218948,134218958,134218968,134218978,134218988,134218998,134219008,134219018,134219028,134219138,134219148,134219158,134219168,134219178,134219188,134219198,134219218,134219228,134219338,134219349,134219357,134219358,134219368,134219428,134219828,134219928,134220028,134220128,134220838,134220848,134220938,134220948,134222829,134222830,134222928,134223828,134223928,134224828,134224938,134225018,134225838,134225848,134226638,134226648,134226658,134226718,134226828,134227628,134227838,134227848,134227858,134227928,134228038,134228048,134228118,134228138,134228148,134228239,134228240,134228247,134228248,134228339,134228340,134228348,134228438,134228448,134228458,134228538,134228549,134228550,134228559,134228560,134228568,134228578,134228588,134228618,134228638,134228648,134228738,134228748,134228758,134228768,134228778,134228788,134228798,134229728,134230828,134230928,134231028,134231638,134231649...134231651,134231659,134231667,134231668,134231678,134231688,134231718,134231838,134231848,134231859,134231860,134231869,134231870,134231918,134231928,134232038,134232118,134232838,134232848,134232928,134233828,134233938,134233948,134233958,134233968,134234018,134234838,134234848,134234939,134234947,134234948,134234958,134234968,134235018,134235838,134235849,134235857,134235858,134235868,134235929...134235931,134236828,134236929,134236937,134237838,134237848,134237858,134237868,134237878,134237888,134237898,134237928,134238029,134238030,134238139,134238140,134238148,134238238,134238248,134238258,134238318,134238328,134238828,134238928,134239838,134239918,134239938,134239948,134239958,134240018,134240838,134240848,134240858,134240868,134240918,134240928,134241038,134241048,134241138,134241148,134241158,134241168,134241218,134241238,134241248,134241338,134241348,134241358,134241368,134241378,134241418,134241428,134241638,134241718,134241828,134241928,134242038,134242048,134242058,134242068,134242138,134242148,134242158,134242168,134242178,134242188,134242238,134242248,134242258,134242268,134242838,134242848,134242938,134243018,134243028,134243128,134243228,134243338,134243348,134243438,134243448,134243458,134243638,134243648,134243658,134243668,134243718,134243838,134243848,134243928,134244029,134244037,134244128,134244239...134244242,134244248,134244328,134244429,134244430,134244528,134244838,134244848,134244928,134245038,134245048,134245058,134245128,134245238,134245248,134245628,134245838,134245848,134245859,134245860,134245868,134245878,134245938,134245948,134245958,134245968,134245978,134246018,134246029,134246030,134246138,134246218,134246638,134246649...134246651,134246658,134246668,134246678,134246688,134246718,134246828,134246929...134246931,134247038,134247048,134247838,134247848,134247928,134248028,134248128,134248638,134248648,134248718,134248738,134248748,134248758,134248818,134249838,134249848,134249858,134249928,134250028,134250129,134250137,134250228,134250638,134250718,134250838,134250848,134250858,134250868,134250878,134250888,134250898,134250918,134250928,134252838,134252848,134252858,134252868,134252938,134252948,134252958,134253028,134253728,134254728,134255838,134255848,134255938,134255948,134256038,134256048,134256728,134258828,134258929,134258930,134259838,134259848,134259858,134259938,134259948,134260638,134260718,134260838,134260848,134260858,134260938,134260948,134261018,134261038,134261048,134261058,134261069,134261070,134261118,134261638,134261719,134261727,134262839,134262840,134262918,134262928,134263038,134263048,134263128,134263838,134263848,134263858,134263868,134263878,134263888,134263898,134263908,134263918,134263938,134263948,134263958,134263968,134264038,134264048,134264058,134264069,134264070,134264078,134264088,134264098,134264108,134264118,134264138,134264148,134264159,134264167,134264168,134264178,134264188,134264198,134264208,134264219,134264227,134264238,134264248,134264338,134264348,134264358,134264368,134264378,134264388,134264418,134264439,134264447,134264448,134264458,134264468,134264478,134264488,134264498,134264628,134264838,134264918,134264938,134264948,134264958,134264968,134264978,134264988,134265018,134265028,134265138,134265149,134265157,134265158,134265238,134265248,134265258,134265268,134265319,134265327,134265338,134265348,134265358,134265368,134265378,134265438,134265449,134265450,134265458,134265469,134265477,134265478,134265488,134265498,134265509,134265510,134265517,134265519,134265527,134265538,134265548,134265618,134265638,134265718,134266828,134266928,134267039,134267047,134267048,134267118,134267138,134267148,134267228,134267828,134267928,134268028,134268128,134268829,134268830,134268938,134268948,134269829...134269831,134269939...134269941,134269947,134269948,134269958,134269969...134269971,134270018,134270828,134270929,134270930,134272828,134272929,134272930,134272937,134273028,134273628,134273829...134273831,134273938,134274018,134274029,134274030,134275838,134275848,134275858,134275869,134275870,134275918,134275938,134276018,134276839...134276841,134276848,134276859...134276861,134276868,134276928,134277828,134277928,134278828,134278928,134279028,134279628,134279739,134279740,134279748,134279758,134279818,134280838,134280848,134281638,134281718,134281838,134281919,134281920,134281929...134281933,134281937,134282029...134282034,134282638,134282649,134282650,134282657,134282719,134282720,134282727,134282838,134282848,134282929,134282930,134283028,134283838,134283848,134283918,134283938,134283948,134284018,134284028,134285828,134285929,134285930,134285937,134286038,134286048,134286829,134286830,134286837,134286929...134286931,134287828,134287938,134287949,134287957,134288839,134288840,134288849,134288850,134288857,134288928,134289838,134289918,134289928,134290838,134290848,134290928,134291828,134291929...134291931,134291937,134292028,134292629,134292630,134292637,134292728,134294838,134294848,134294938,134294948,134295019,134295027,134295038,134295048,134295058,134295069,134295070,134295079,134295080,134295118,134295128,134295829,134295837,134295928,134296028,134296838,134296848,134297629,134297637,134297828,134297928,134298028,134298828,134298938,134298949...134298951,134298957,134299019,134299027,134299028,134299838,134299918,134299928,134300029,134300030,134300639,134300640,134300648,134300718,134301838,134301848,134301858,134301938,134301948,134301958,134301968,134301978,134302028,134302828,134302928,134303038,134303048,134303138,134303149,134303150,134303238,134303248,134303258,134303318,134303328,134303829,134303830,134303938,134303948,134303958,134304628,134304828,134304928,134305028,134305628,134305828,134306638,134306718,134307738,134307748,134307758,134307768,134308739,134308740,134308748,134308758,134308768,134309728,134310838,134310848,134310858,134310919,134310927,134310938,134311018,134311838,134311848,134311928,134312638,134312648,134312718,134312838,134312848,134312938,134312948,134312958,134312968,134312978,134313018,134313738,134313748,134313758,134313768,134313818,134314728,134315828,134315928,134316728"

INDUSTRY_LIST = "134218728,134218838,134218848,134218858,134218868,134218878,134218888,134218918,134218938,134218948,134218958,134218968,134218978,134218988,134218998,134219008,134219018,134219028,134219138,134219148,134219158,134219168,134219178,134219188,134219198,134219218,134219228,134219338,134219349,134219357,134219358,134219368,134219428,134219828,134219928,134220028,134220128,134220838,134220848,134220938,134220948,134222829,134222830,134222928,134223828,134223928,134224828,134224938,134225018,134225838,134225848,134226638,134226648,134226658,134226718,134226828,134227628,134227838,134227848,134227858,134227928,134228038,134228048,134228118,134228138,134228148,134228239,134228240,134228247,134228248,134228339,134228340,134228348,134228438,134228448,134228458,134228538,134228549,134228550,134228559,134228560,134228568,134228578,134228588,134228618,134228638,134228648,134228738,134228748,134228758,134228768,134228778,134228788,134228798,134229728,134230828,134230928,134231028,134231638,134231649...134231651,134231659,134231667,134231668,134231678,134231688,134231718,134231838,134231848,134231859,134231860,134231869,134231870,134231918,134231928,134232038,134232118,134232838,134232848,134232928,134233828,134233938,134233948,134233958,134233968,134234018,134234838,134234848,134234939,134234947,134234948,134234958,134234968,134235018,134235838,134235849,134235857,134235858,134235868,134235929...134235931,134236828,134236929,134236937,134237838,134237848,134237858,134237868,134237878,134237888,134237898,134237928,134238029,134238030,134238139,134238140,134238148,134238238,134238248,134238258,134238318,134238328,134238828,134238928,134239838,134239918,134239938,134239948,134239958,134240018,134240838,134240848,134240858,134240868,134240918,134240928,134241038,134241048,134241138,134241148,134241158,134241168,134241218,134241238,134241248,134241338,134241348,134241358,134241368,134241378,134241418,134241428,134241638,134241718,134241828,134241928,134242038,134242048,134242058,134242068,134242138,134242148,134242158,134242168,134242178,134242188,134242238,134242248,134242258,134242268,134242838,134242848,134242938,134243018,134243028,134243128,134243228,134243338,134243348,134243438,134243448,134243458,134243638,134243648,134243658,134243668,134243718,134243838,134243848,134243928,134244029,134244037,134244128,134244239...134244242,134244248,134244328,134244429,134244430,134244528,134244838,134244848,134244928,134245038,134245048,134245058,134245128,134245238,134245248,134245628,134245838,134245848,134245859,134245860,134245868,134245878,134245938,134245948,134245958,134245968,134245978,134246018,134246029,134246030,134246138,134246218,134246638,134246649...134246651,134246658,134246668,134246678,134246688,134246718,134246828,134246929...134246931,134247038,134247048,134247838,134247848,134247928,134248028,134248128,134248638,134248648,134248718,134248738,134248748,134248758,134248818,134249838,134249848,134249858,134249928,134250028,134250129,134250137,134250228,134250638,134250718,134250838,134250848,134250858,134250868,134250878,134250888,134250898,134250918,134250928,134252838,134252848,134252858,134252868,134252938,134252948,134252958,134253028,134253728,134254728,134255838,134255848,134255938,134255948,134256038,134256048,134256728,134258828,134258929,134258930,134259838,134259848,134259858,134259938,134259948,134260638,134260718,134260838,134260848,134260858,134260938,134260948,134261018,134261038,134261048,134261058,134261069,134261070,134261118,134261638,134261719,134261727,134262839,134262840,134262918,134262928,134263038,134263048,134263128,134263838,134263848,134263858,134263868,134263878,134263888,134263898,134263908,134263918,134263938,134263948,134263958,134263968,134264038,134264048,134264058,134264069,134264070,134264078,134264088,134264098,134264108,134264118,134264138,134264148,134264159,134264167,134264168,134264178,134264188,134264198,134264208,134264219,134264227,134264238,134264248,134264338,134264348,134264358,134264368,134264378,134264388,134264418,134264439,134264447,134264448,134264458,134264468,134264478,134264488,134264498,134264628,134264838,134264918,134264938,134264948,134264958,134264968,134264978,134264988,134265018,134265028,134265138,134265149,134265157,134265158,134265238,134265248,134265258,134265268,134265319,134265327,134265338,134265348,134265358,134265368,134265378,134265438,134265449,134265450,134265458,134265469,134265477,134265478,134265488,134265498,134265509,134265510,134265517,134265519,134265527,134265538,134265548,134265618,134265638,134265718,134266828,134266928,134267039,134267047,134267048,134267118,134267138,134267148,134267228,134267828,134267928,134268028,134268128,134268829,134268830,134268938,134268948,134269829...134269831,134269939...134269941,134269947,134269948,134269958,134269969...134269971,134270018,134270828,134270929,134270930,134272828,134272929,134272930,134272937,134273028,134273628,134273829...134273831,134273938,134274018,134274029,134274030,134275838,134275848,134275858,134275869,134275870,134275918,134275938,134276018,134276839...134276841,134276848,134276859...134276861,134276868,134276928,134277828,134277928,134278828,134278928,134279028,134279628,134279739,134279740,134279748,134279758,134279818,134280838,134280848,134281638,134281718,134281838,134281919,134281920,134281929...134281933,134281937,134282029...134282034,134282638,134282649,134282650,134282657,134282719,134282720,134282727,134282838,134282848,134282929,134282930,134283028,134283838,134283848,134283918,134283938,134283948,134284018,134284028,134285828,134285929,134285930,134285937,134286038,134286048,134286829,134286830,134286837,134286929...134286931,134287828,134287938,134287949,134287957,134288839,134288840,134288849,134288850,134288857,134288928,134289838,134289918,134289928,134290838,134290848,134290928,134291828,134291929...134291931,134291937,134292028,134292629,134292630,134292637,134292728,134294838,134294848,134294938,134294948,134295019,134295027,134295038,134295048,134295058,134295069,134295070,134295079,134295080,134295118,134295128,134295829,134295837,134295928,134296028,134296838,134296848,134297629,134297637,134297828,134297928,134298028,134298828,134298938,134298949...134298951,134298957,134299019,134299027,134299028,134299838,134299918,134299928,134300029,134300030,134300639,134300640,134300648,134300718,134301838,134301848,134301858,134301938,134301948,134301958,134301968,134301978,134302028,134302828,134302928,134303038,134303048,134303138,134303149,134303150,134303238,134303248,134303258,134303318,134303328,134303829,134303830,134303938,134303948,134303958,134304628,134304828,134304928,134305028,134305628,134305828,134306638,134306718,134307738,134307748,134307758,134307768,134308739,134308740,134308748,134308758,134308768,134309728,134310838,134310848,134310858,134310919,134310927,134310938,134311018,134311838,134311848,134311928,134312638,134312648,134312718,134312838,134312848,134312938,134312948,134312958,134312968,134312978,134313018,134313738,134313748,134313758,134313768,134313818,134314728,134315828,134315928,134316728".split(',')
Í
# Function to split a list into chunks
def chunk_list(data, chunk_size):
    for i in range(0, len(data), chunk_size):
        yield data[i:i + chunk_size]

# Iterate by chunks of industry codes

# Split the industry list into manageable chunks of 20
industry_chunks = list(chunk_list(INDUSTRY_LIST, 20))
all_dataframes = []

print(f"Starting download ... Will make {len(industry_chunks)} separate API calls.")

for i, industry_chunk in enumerate(tqdm(industry_chunks, desc="Downloading Chunks")):
    
    # Define the filename for this chunk
    chunk_filename = os.path.join(output_dir, f"bres_chunk_{i}.csv")

    # Skip if we already downloaded this file
    if os.path.exists(chunk_filename):
        print(f"Chunk {i} already downloaded. Skipping...")
        continue

    #Join the shunk of industry codes back into a comma-separated string
    industry_str_chunk = ",".join(industry_chunk)

    # Define all parameters for this request
    params = {
        'geography': GEO_LIST_STR,
        'date': 'latest',
        'industry': industry_str_chunk,
        'employment_status': '4',
        'measure':'1',
        'measures': '20100'
    #    'select': 'GEOGRAPHY_CODE, GEOGRAPHY_NAME,INDUSTRY, OBS_VALUE'
    }

    try:
        response = requests.get(base_url, params=params)

        # Raise an error if the API returns 4xx or 5xx status code
        response.raise_for_status()

        # Use io.StringIO to read the text content into a dataframe
        chunk_df = pd.read_csv(io.StringIO(response.text))

        # Save the chunk to a file
        chunk_df.to_csv(chunk_filename, index=False)

    except requests.exceptions.HTTPError as err:
        print(f"\n--- HTTP ERROR on chunk {i} ---")
        print(f"Error: {err}")
        print("This chunk failed, likely because it also exceeded the cell limit.")
        print(f"Response from server: {response.text[:200]}...") # Print first 200 chars of error
    except pd.errors.ParserError as err:
        print(f"\n--- PANDAS ERROR on chunk {i} ---")
        print(f"Error: {err}")
        print("This means the server did not return a CSV. It was probably an error page.")
        print(f"Response from server: {response.text[:200]}...")
    except Exception as e:
        print(f"\n--- UNKNOWN ERROR on chunk {i} ---: {e}")

    # pause between requests
    time.sleep(0.1)

print("\nAll chunks downloaded. Concatenating files ...")

# Concatenate all saved CSVs
all_files = [os.path.join(output_dir, path) for path in os.listdir(output_dir) if path.endswith(".csv")]
df_list = [pd.read_csv(path) for path in all_files]

if df_list:
    df_final = pd.concat(df_list, ignore_index=True)

    df_final.to_csv("bres_employment_5digsic_LAD.csv", index=False)
    print(f"Success! Final data saved to 'bres_employment_5digsic_LAD.csv' with {len(df_final)} rows.")
else:
    print("No dataframes to concatenate. Please check for errors in the download process.")



In [None]:
# Read in concatenated dataframe

employment_SIC_LAD = pd.read_csv("bres_employment_5digsic_LAD.csv")

# Select columns of interest
employment_SIC_LAD = employment_SIC_LAD[["GEOGRAPHY_CODE", "GEOGRAPHY_NAME", "INDUSTRY_CODE", "OBS_VALUE"]]

employment_SIC_LAD.dtypes

In [None]:
# Filter to IS-8 SIC codes only

IS8_LOOKUP = pd.read_csv("IS-8 SIC Lookup.csv")

# Drop if Frontier sector is not null - this is the more granular mapping
IS8_LOOKUP = IS8_LOOKUP[IS8_LOOKUP["Frontier sector"].isnull()]

IS8_LOOKUP.head()

# Full join first with employment data to introduce indicator of IS8 sector, later on we filter
# We need to do this on SIC digit level because some are defined at 2,3,4 etc

# Create different digit columns in the employment data before merging
employment_SIC_LAD["SIC_2digit"] = employment_SIC_LAD["INDUSTRY_CODE"].astype(str).str[:2]
employment_SIC_LAD["SIC_3digit"] = employment_SIC_LAD["INDUSTRY_CODE"].astype(str).str[:3]
employment_SIC_LAD["SIC_4digit"] = employment_SIC_LAD["INDUSTRY_CODE"].astype(str).str[:4]
employment_SIC_LAD["SIC_5digit"] = employment_SIC_LAD["INDUSTRY_CODE"].astype(str).str[:5]

IS8_LOOKUP['SIC'] = IS8_LOOKUP['SIC'].astype(str)
IS8_LOOKUP = IS8_LOOKUP[['SIC','Digit level','IS8 sector']]
IS8_LOOKUP = IS8_LOOKUP.rename(columns={'IS8 sector':'IS8_Sector'})

lookup_2dig = IS8_LOOKUP[IS8_LOOKUP['Digit level'] == 2].drop('Digit level',axis=1)
lookup_3dig = IS8_LOOKUP[IS8_LOOKUP['Digit level'] == 3].drop('Digit level',axis=1)
lookup_4dig = IS8_LOOKUP[IS8_LOOKUP['Digit level'] == 4].drop('Digit level',axis=1)
lookup_5dig = IS8_LOOKUP[IS8_LOOKUP['Digit level'] == 5].drop('Digit level',axis=1)


# 1. Merge 5-digit SICs

df_merged = pd.merge(employment_SIC_LAD, lookup_5dig, how='left', left_on='SIC_5digit', right_on='SIC', suffixes=('','_5'))

# 2. Merge 4-digit SICs

df_merged = pd.merge(df_merged, lookup_4dig, how='left', left_on='SIC_4digit', right_on='SIC', suffixes=('','_4'))

# 3. Merge 3-digit SICs

df_merged = pd.merge(df_merged, lookup_3dig, how='left', left_on='SIC_3digit', right_on='SIC', suffixes = ('','_3'))

# 4. Merge 2-digit SICs

df_merged = pd.merge(df_merged, lookup_2dig, how='left', left_on='SIC_2digit', right_on='SIC', suffixes=('','_2'))


# Combine all the digit level flags into one
# Rename the first flag column (from the 5-digit merge) for clarity
df_merged = df_merged.rename(columns={'IS8_Sector': 'IS8_Sector_5'})

# Coalesce the flags, starting with the most specific
df_merged['IS8_Final_Flag'] = (
    df_merged['IS8_Sector_5']
    .fillna(df_merged['IS8_Sector_4'])
    .fillna(df_merged['IS8_Sector_3'])
    .fillna(df_merged['IS8_Sector_2'])
)

# Drop all the intermediary flags
cols_to_drop = [
    'SIC', 'IS8_Sector_5', 
    'SIC_4', 'IS8_Sector_4', 
    'SIC_3', 'IS8_Sector_3', 
    'SIC_2', 'IS8_Sector_2',
]

df_final = df_merged.drop(columns=cols_to_drop, errors='ignore')

df_final.head()

In [None]:
# Keep only industries in the IS8

# Number of rows before 261800
row_count = df_final.shape[0]
print(f"Number of rows: {row_count}")

IS8_df = df_final[df_final['IS8_Final_Flag'].notnull()]

# Number of rows after 80850
row_count = IS8_df.shape[0]
print(f"Number of rows: {row_count}")

IS8_df.head(100)

In [None]:
# We want a denominator for this ... can we get working age population?

base_url = "https://www.nomisweb.co.uk/api/v01/dataset/NM_31_1.data.csv"

GEO_LIST_STR = "1778384897...1778384901,1778384941,1778384950,1778385143...1778385146,1778385159,1778384902...1778384905,1778384942,1778384943,1778384956,1778384957,1778385033...1778385044,1778385124...1778385138,1778384906...1778384910,1778384958,1778385139...1778385142,1778385154...1778385158,1778384911...1778384914,1778384954,1778384955,1778384965...1778384972,1778385045...1778385058,1778385066...1778385072,1778384915...1778384917,1778384944,1778385078...1778385085,1778385100...1778385104,1778385112...1778385117,1778385147...1778385153,1778384925...1778384928,1778384948,1778384949,1778384960...1778384964,1778384986...1778384997,1778385015...1778385020,1778385059...1778385065,1778385086...1778385088,1778385118...1778385123,1778385160...1778385192,1778384929...1778384940,1778384953,1778384981...1778384985,1778385004...1778385014,1778385021...1778385032,1778385073...1778385077,1778385089...1778385099,1778385105...1778385111,1778384918...1778384924,1778384945...1778384947,1778384951,1778384952,1778384973...1778384980,1778384998...1778385003,1778384959,1778385193...1778385257"

params = {
        'geography': GEO_LIST_STR,
        'date': 'latest',
        'sex': '7',
        'age':'22',
        'measures': '20100',
 #       'select': 'GEOGRAPHY_CODE, GEOGRAPHY_NAME, OBS_VALUE'
    }

response = requests.get(base_url, params=params)

        # Raise an error if the API returns 4xx or 5xx status code
response.raise_for_status()

        # Use io.StringIO to read the text content into a dataframe
chunk_df = pd.read_csv(io.StringIO(response.text))

        # Save the chunk to a file
chunk_df.to_csv('working_age_pop.csv', index=False)