# Disaggregate employment projections to LSOA

Using:
1. Business Register and Employment Survey (BRES) open-access estimates of employment taken from Nomis
2. LAD projections provided by Cambridge Econometrics and processed by `convert-scenarios.ipynb` 

Disaggregate projections of total employment to Lower Super Output Area (LSOA) for the years 2015-2050.



In [None]:
import os

import geopandas
import pandas

In [None]:
SCENARIOS = ('baseline', '0-unplanned', '1-new-cities', '2-expansion')

In [None]:
# read and combine employment scenarios
dfs = []
for scenario in SCENARIOS:
    df = pandas.read_csv(os.path.join("data_processed", f"arc_employment__{scenario}.csv"))
    df['scenario'] = scenario
    dfs.append(df)
lad_employment = pandas.concat(dfs)

# check length
expected_length = 391 * 36 * 4
msg = f"Expected {len(lad_employment)} == {expected_length} LADs * years * scenarios"
assert len(lad_employment) == expected_length, msg

# print extract
lad_employment.tail(1)

In [None]:
def download(url, filename, force=False):
    if force or not os.path.exists(filename):
        import requests
        r = requests.get(url, stream=True)
        with open(filename, 'wb') as fd:
            for chunk in r.iter_content(chunk_size=128):
                fd.write(chunk)

def unzip(path, parent):
    import zipfile
    with zipfile.ZipFile(path, 'r') as zip_ref:
        zip_ref.extractall(parent)

In [None]:
# Output Area to Lower Layer Super Output Area to Middle Layer Super Output Area to Local Authority District (December 2011) Lookup in England and Wales
# Description page: 
# https://geoportal.statistics.gov.uk/datasets/output-area-to-lower-layer-super-output-area-to-middle-layer-super-output-area-to-local-authority-district-december-2011-lookup-in-england-and-wales
url = "https://opendata.arcgis.com/datasets/6ecda95a83304543bc8feedbd1a58303_0.csv"
path = os.path.join("data_as_provided", "lsoa_lad_lu.csv")        
download(url, path, force=True)        
#unzip(path, "data_as_provided")

In [None]:
lad_lsoa_lu = pandas.read_csv(
    os.path.join("data_as_provided", "lsoa_lad_lu.csv"),
    usecols=['LAD11NM', 'LAD11CD', 'LSOA11CD', 'LSOA11NM']) \
    .rename(columns={'LAD11NM': 'lad11nm', 'LAD11CD': 'lad11cd', 'LSOA11CD': 'lsoa11cd', 'LSOA11NM': 'lsoa11nm'}) \
    .sort_values(by=['lad11cd', 'lsoa11cd']) \
    .drop_duplicates(ignore_index=True)

lad_cd_changes = pandas.read_csv(
    os.path.join('data_as_provided', 'lad_nmcd_changes.csv'),
    usecols=['lad11cd', 'lad16cd'])

lad_lsoa_lu = lad_lsoa_lu.merge(lad_cd_changes, on='lad11cd')

# England and Wales only (constrained by BRES/Nomis statistics)
lad_lsoa_lu = lad_lsoa_lu[lad_lsoa_lu.lad11cd.str.startswith('E') | lad_lsoa_lu.lad11cd.str.startswith('W')]
lad_lsoa_lu

In [None]:
# Expect this to include only Scotland and Northern Ireland
set(lad_lsoa_lu.lad16cd) ^ set(lad_employment.lad_uk_2016)

In [None]:
# Expect this to be empty (all LSOAs shared)
set(lad_lsoa_lu.lsoa11cd) ^ set(lsoa_employment.lsoa11cd)

In [None]:
# read total employment by LSOA
lsoa_employment = pandas.read_csv(
    os.path.join("data_as_provided", "bres-employment-by-lsoa-2015-2018.csv"),
    skiprows=lambda x: x in [0, 1, 2, 3, 4, 5, 6, 7, 9], # skip header comments
    skipfooter=10, # skip footer comments and totals row
    engine='python', # need to use this to be able to skip footer
    dtype={'2015': 'int', '2016': 'int', '2017': 'int', '2018': 'int'}) \
    .drop(columns=["Unnamed: 2", "Unnamed: 4", "Unnamed: 6", "Unnamed: 8"]) \
    .rename(columns={"2011 super output area - lower layer": "lsoa"})

# split name, code
lsoa_employment['lsoa11cd'] = lsoa_employment.lsoa.apply(lambda d: d.split(" : ")[0])
lsoa_employment.drop(columns=['lsoa'], inplace=True)
lsoa_employment = lsoa_employment[['lsoa11cd', '2015', '2016', '2017', '2018']]

# add LAD names/codes
lsoa_employment = lsoa_employment.merge(
    lad_lsoa_lu, 
    on='lsoa11cd', 
    validate='one_to_one')
lsoa_employment.tail(1)

In [None]:
lsoa_to_lad_agg = lsoa_employment[['lad16cd', '2015', '2016', '2017', '2018']] \
    .groupby('lad16cd') \
    .sum() \
    .rename(columns={
        '2015': '2015_lad', 
        '2016': '2016_lad', 
        '2017': '2017_lad', 
        '2018': '2018_lad', 
    })

lsoa_to_lad_agg.tail(1)

In [None]:
df = lsoa_employment.merge(lsoa_to_lad_agg, on='lad16cd')
df['prop15'] = df['2015'] / df['2015_lad']
df['prop16'] = df['2016'] / df['2016_lad']
df['prop17'] = df['2017'] / df['2017_lad']
df['prop18'] = df['2018'] / df['2018_lad']
proportions = df[['lsoa11cd', 'lad16cd', 'prop15', 'prop16', 'prop17', 'prop18']]
proportions.tail(1)

In [None]:
projections = lad_employment.merge(proportions, left_on='lad_uk_2016', right_on='lad16cd') \
    .rename(columns={'employment': 'employment_lad_thousands'})

proj_2015 = projections[projections.timestep == 2015].copy()
proj_2016 = projections[projections.timestep == 2016].copy()
proj_2017 = projections[projections.timestep == 2017].copy()
proj_rest = projections[projections.timestep >= 2018].copy()

proj_2015['employment'] = proj_2015.employment_lad_thousands * 1000 * proj_2015.prop15
proj_2016['employment'] = proj_2016.employment_lad_thousands * 1000 * proj_2016.prop16
proj_2017['employment'] = proj_2017.employment_lad_thousands * 1000 * proj_2017.prop17
proj_rest['employment'] = proj_rest.employment_lad_thousands * 1000 * proj_rest.prop18

projections = pandas \
    .concat([proj_2015, proj_2016, proj_2017, proj_rest]) \
    [['scenario', 'timestep', 'lsoa11cd', 'lad16cd', 'employment']]
projections

In [None]:
projections.employment = projections.employment.round().astype(int)
projections

In [None]:
set(lad_lsoa_lu.lad16cd.unique()) ^ set(projections.lad16cd.unique())

In [None]:
len(lad_lsoa_lu.lad16cd.unique()), len(projections.lad16cd.unique())

In [None]:
len(lad_lsoa_lu.lsoa11cd.unique()), len(projections.lsoa11cd.unique())

In [None]:
# check length
expected_length = 34753 * 36 * 4
msg = f"Expected {len(projections)} == {expected_length} LSOAs * years * scenarios"
assert len(projections) == expected_length, msg

In [None]:
projections.to_csv(os.path.join('data_processed', 'ew_employment_lsoa.csv.gz'), index=False)

In [None]:
projections.groupby(['timestep', 'scenario']).sum()

In [None]:
arc_lads = geopandas.read_file(os.path.join('data_as_provided', 'arc_lad_uk16.gpkg'))

In [None]:
arc_lad_codes = arc_lads[arc_lads.in_arc == 1].name.unique()

In [None]:
arc_projections = projections[projections.lad16cd.isin(arc_lad_codes)]

In [None]:
arc_projections.to_csv(os.path.join('data_processed', 'arc_employment_lsoa.csv.gz'), index=False)