# Scale population and employment for Postcodes and Workplace Zones

In outline:
- get area definitions for Local Authority Districts (LAD - 2011 and 2016), Workplace Zones (WZ), Postcodes (PCD) in Great Britain
- read 2011 census data for workplace population at LAD, WZ
- read 2011 census data for usual resident population at LAD, PCD
- read population and employment scenarios and baseline projection at LAD scale
- scale future employment to workplace zones
- scale future population to postcode sectors

## Citations

England, Northern Ireland, Scotland and Wales 2011 Census
- Office for National Statistics ; National Records of Scotland ; Northern Ireland Statistics and Research Agency (2017): 2011 Census aggregate data. UK Data Service (Edition: February 2017). DOI: http://dx.doi.org/10.5257/census/aggregate-2011-2
This information is licensed under the terms of the Open Government Licence [http://www.nationalarchives.gov.uk/doc/open-government-licence/version/2].

England and Wales 2001 Census
- Office for National Statistics (2011): 2001 Census aggregate data (Edition: May 2011). UK Data Service. DOI: http://dx.doi.org/10.5257/census/aggregate-2001-2


In [None]:
import gc
import os
import zipfile
from glob import glob

import geopandas as gpd
import pandas as pd
import requests
from geopandas.tools import explicit_crs_from_epsg
from numpy import savetxt
from shapely.geometry import Point

## Get boundaries and lookups

In [None]:
def download(url, filename, dirname=".", force=False):
    if force or not os.path.exists(filename):
        r = requests.get(url, stream=True)
        with open(filename, 'wb') as fd:
            for chunk in r.iter_content(chunk_size=128):
                fd.write(chunk)
    if filename.endswith(".zip"):
        with zipfile.ZipFile(filename,"r") as zf:
            zf.extractall(dirname)    

In [None]:
# Scotland Workplace Zones
# ref: https://www.nrscotland.gov.uk/statistics-and-data/geography/our-products/census-datasets/2011-census/2011-boundaries
url = "https://www.nrscotland.gov.uk/files//geography/products/workplacezones2011scotland.zip"
download(url, "data/workplacezones2011scotland.zip")
wz_sc = gpd.read_file("data/WorkplaceZones2011Scotland/WorkplaceZones2011Scotland.shp").rename(columns={
    'LADCD': 'lad11cd',
    'WZCD': 'wz11cd'
})

In [None]:
# England and Wales Workplace Zones
# ref: https://geoportal.statistics.gov.uk/datasets/workplace-zones-december-2011-full-clipped-boundaries-in-england-and-wales
url = "https://opendata.arcgis.com/datasets/a399c2a5922a4beaa080de63c0a218a3_0.zip?outSR=%7B%22latestWkid%22%3A27700%2C%22wkid%22%3A27700%7D"
download(url, "data/workplacezones2011ew.zip", "data")
wz_ew = gpd.read_file("data/Workplace_Zones_December_2011_Full_Clipped_Boundaries_in_England_and_Wales.shp")[
    ['lad11cd', 'wz11cd', 'geometry']
]

In [None]:
wz = pd.concat([wz_sc, wz_ew], axis=0)

In [None]:
wz.head()

## Get 2011 census data 
- population by Postcode Sector, LAD
- workplace population by Workplace Zone, LAD

In [None]:
def unique_col_len(df, col):
    unique_len = len(df[col].unique())
    return unique_len == len(df), len(df), unique_len

### Postcode resident population
Postcode data sourced from Nomis and Scotland Census

- ref https://www.nomisweb.co.uk/census/2011/postcode_headcounts_and_household_estimates
- data https://www.nomisweb.co.uk/output/census/2011/Postcode_Estimates_Table_1.csv


- ref https://www.scotlandscensus.gov.uk/bulletin-figures-and-tables
- data https://www.scotlandscensus.gov.uk/documents/censusresults/release1c/rel1c2tableA1.csv

Cleaned by Marcus Young, Southampton University

In [None]:
pcd_pop11 = pd.read_csv("data/pcpop11.csv.gz").sort_values('postcode').rename(columns={
    'oslaua': 'lad11cd',
    'population': 'pop11'
})

In [None]:
pcd_pop11.pop11.sum()

In [None]:
lad_pop11 = pcd_pop11.groupby('lad11cd').sum().reset_index().rename(columns={
    'pop11': 'ladpop11'
})

In [None]:
pcd_pop11 = pcd_pop11.merge(lad_pop11, on='lad11cd', how='left')

In [None]:
pcd_pop11['proportion11'] = pcd_pop11.pop11 / pcd_pop11.ladpop11

In [None]:
pcd_pop11.head()

### Workplace zone workplace population
Workplace population density is available by workplace zones through NOMIS
https://www.nomisweb.co.uk/census/2011/wp102ew

In [None]:
# Scotland
url = 'http://www.scotlandscensus.gov.uk/documents/additional_tables/WP103SCwz.csv'
download(url, "data/WP103SCwz.csv", "data")

# England and Wales - ref https://www.nomisweb.co.uk/census/2011/wp102ew
# Download via javascript to wp102ew_lad.csv and wp102ew_wz.csv

In [None]:
wz_wp11_sc = pd.read_csv("data/WP103SCwz.csv", header=None, skiprows=10)[[0, 1]][:5375]
wz_wp11_sc.columns = ['wz11', 'wp11']
wz_wp11_sc.wp11 = wz_wp11_sc.wp11.apply(lambda d: str(d).replace(",","")).astype('int')

In [None]:
wz_wp11_ew = pd.read_csv("data/wp102ew_wz.csv")
wz_wp11_ew.columns = ['date', 'name', 'wz11', 'wp11', 'area', 'density']
wz_wp11_ew = wz_wp11_ew[['wz11', 'wp11']]

In [None]:
wz_wp11 = pd.concat([wz_wp11_ew, wz_wp11_sc], axis=0, sort=False) \
    .rename(columns={'wz11': 'wz11cd'}) \
    .merge(wz[['wz11cd', 'lad11cd']], on='wz11cd', how='left')

In [None]:
lad_wp11 = wz_wp11.groupby('lad11cd').sum().reset_index().rename(columns={
    'wp11': 'ladwp11'
})
wz_wp11 = wz_wp11.merge(lad_wp11, on='lad11cd', how='left')
wz_wp11['proportion11'] = wz_wp11.wp11 / wz_wp11.ladwp11

In [None]:
wz_wp11.head()

## Scale and project scenarios

In [None]:
wlads = set(wz_wp11.lad11cd.unique()) 
plads = set(pcd_pop11.lad11cd.unique())
wlads == plads, wlads ^ plads  # ^ is symmetric difference: elements in one set or the other but not both

### Population

In [None]:
def scale_project_population(pcd_pop11, key):    
    df = pd.read_csv("../simim/data/output/arc_population__{}.csv".format(key))
    df = df.pivot(index='lad_uk_2016', columns='timestep', values='population')
    df = pcd_pop11.merge(df, how='left', left_on='lad11cd', right_on='lad_uk_2016')
    df = pd.melt(
        df, 
        id_vars=['postcode', 'pop11', 'lad11cd', 'ladpop11', 'proportion11'], 
        var_name='timestep', 
        value_name='population')
    df.population *= df.proportion11
    df = df[['timestep', 'postcode', 'lad11cd', 'population']]
    print(key, df[df.timestep == 2015].population.sum(), df[df.timestep == 2050].population.sum())
    
    fname = "data/arc_population_postcode__{}.csv".format(key)
    # savetxt is ~30% quicker than pandas to_csv
    # df.to_csv(fname, index=False)
    savetxt(
        fname, 
        df.values, 
        fmt='%d,%s,%s,%.3f',
        header=','.join(df.columns), 
        comments=''
    )
    print("Saved as", fname)

In [None]:
keys = [
    'baseline',
    '0-unplanned',
    '1-new-cities-from-dwellings',
    '2-expansion',
    '3-new-cities23-from-dwellings',
    '4-expansion23'
]
dfs = []
for key in keys:
    scale_project_population(pcd_pop11, key)
    gc.collect()

### Workplace population

In [None]:
def scale_project_employment(wz_wp11, key):    
    df = pd.read_csv("../arc-economics/data_processed/arc_employment__{}.csv".format(key))
    df.employment *= 1000
    df = df.pivot(index='lad_uk_2016', columns='timestep', values='employment')
    df = wz_wp11.merge(df, how='left', left_on='lad11cd', right_on='lad_uk_2016')
    df = pd.melt(
        df, 
        id_vars=['wz11cd', 'wp11', 'lad11cd', 'ladwp11', 'proportion11'], 
        var_name='timestep', 
        value_name='employment')
    df.employment *= df.proportion11
    df = df[['timestep', 'wz11cd', 'lad11cd', 'employment']]
    print(key, df[df.timestep == 2015].employment.sum(), df[df.timestep == 2050].employment.sum())
    
    fname = "data/arc_employment_workplace_zone__{}.csv".format(key)
    # savetxt is ~30% quicker than pandas to_csv
    # df.to_csv(fname, index=False)
    savetxt(
        fname, 
        df.values, 
        fmt='%d,%s,%s,%.3f',
        header=','.join(df.columns), 
        comments=''
    )
    print("Saved as", fname)

In [None]:
keys = [
    'baseline',
    '0-unplanned',
    '1-new-cities',
    '2-expansion'
]
dfs = []
for key in keys:
    scale_project_employment(wz_wp11, key)
    gc.collect()