In [1]:
import os
import re

import pandas
import requests

In [2]:
data_path = os.path.join('.', 'data_as_provided')
output_path = os.path.join('.', 'data_processed')
arc_scenarios_file = os.path.join(data_path, 'ARC Employment Scenarios (45 sectors) v2.xlsx')
gb_baseline_file = os.path.join(data_path, 'LAD data by sectors.xlsx')

In [3]:
baseline = pandas.read_excel(
    gb_baseline_file, 
    sheet_name=['GVA', 'Employment', 'Productivity'], 
    header=1, 
    index_col=0
)

In [4]:
dfs = []
label_lookup = {
    'GVA': 'GVA (GBP2016m)',
    'Employment': 'Employment (000s)',
    'Productivity': 'Productivity (GBP2016 thousands per person in employment)'
}

def melt_block(df, label, year):
    df.index.name = "sector"
    df = df.reset_index().melt(
        id_vars=['sector'], var_name='lad_nm', value_name=label
    )
    df['year'] = year
    df.lad_nm = df.lad_nm.apply(lambda nm: nm.strip())
    df = df.set_index(
        ['year', 'lad_nm', 'sector']
    )
    return df

for label, df in baseline.items():
    # two blocks
    dfs.append(
        pandas.concat([
            melt_block(df[:45], label, 2018),
            melt_block(df[48:], label, 2050)
        ], axis=0)
    )
    
baseline_all = pandas.concat(dfs, axis=1, levels=['year','lad_nm', 'sector'])
baseline_all.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,GVA,Employment,Productivity
year,lad_nm,sector,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018,Darlington,Agriculture etc,11.2521,0.281729,39.9396
2018,Darlington,Mining & quarrying,9.70036,0.0992628,97.7241
2018,Darlington,"Food, drink & tobacco",11.0349,0.233751,47.2079
2018,Darlington,Textiles etc,6.24571,0.181865,34.3426
2018,Darlington,Wood & paper,13.7693,0.146105,94.2427


In [5]:
# years * Great Britain LADs * sectors
assert len(baseline_all) == 2 * 380 * 45 

In [6]:
variants = pandas.read_excel(
    arc_scenarios_file,
    sheet_name=['Baseline', 'Unplanned', 'City Focused', 'New Developments'],
    index_col=0,
    header=1
)

In [7]:
df = variants['Baseline'].iloc[0:45, 0:29]
df.index.name = 'sector'
df = df.reset_index().melt(
    id_vars='sector', var_name='lad_nm', value_name='Employment')
assert len(df) == 45*29
df.head()

Unnamed: 0,sector,lad_nm,Employment
0,Agriculture etc,Peterborough,0.401362
1,Mining & quarrying,Peterborough,0.118696
2,"Food, drink & tobacco",Peterborough,0.979734
3,Textiles etc,Peterborough,0.195852
4,Wood & paper,Peterborough,0.243796


In [8]:
def melt_scenario(s_df, scenario, name, year, row): 
    step = 45
    from_row, to_row = row, row + step
    df = s_df.iloc[from_row:to_row, 0:29].copy()
    df = df.reset_index().melt(
        id_vars=['sector'],
        var_name='lad_nm',
        value_name=name
    )
    df['year'] = year
    df['scenario'] = scenario
    df = df.set_index(
        ['scenario', 'year', 'lad_nm', 'sector']
    )
    return df

dfs = []
for scenario, s_df in variants.items():
    s_df.index.name = 'sector'
    s_dfs = [
        pandas.concat([
            melt_scenario(s_df, scenario, 'Employment', 2018, 0),
            melt_scenario(s_df, scenario, 'Employment', 2050, 49),
        ], axis=0),
        pandas.concat([
            melt_scenario(s_df, scenario, 'GVA', 2018, 98),
            melt_scenario(s_df, scenario, 'GVA', 2050, 147),
        ], axis=0),
        pandas.concat([
            melt_scenario(s_df, scenario, 'Productivity', 2018, 196),
            melt_scenario(s_df, scenario, 'Productivity', 2050, 245),
        ], axis=0)
    ]
    s_df_all = pandas.concat(s_dfs, axis=1, levels=['scenario', 'year', 'lad_nm', 'sector'])
    dfs.append(s_df_all)
        
variants_all = pandas.concat(dfs, axis=0)
variants_all.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Employment,GVA,Productivity
scenario,year,lad_nm,sector,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Baseline,2018,Peterborough,Agriculture etc,0.401362,33.762,84.1185
Baseline,2018,Peterborough,Mining & quarrying,0.118696,4.24017,35.7228
Baseline,2018,Peterborough,"Food, drink & tobacco",0.979734,80.1415,81.7992
Baseline,2018,Peterborough,Textiles etc,0.195852,14.1783,72.393
Baseline,2018,Peterborough,Wood & paper,0.243796,12.2622,50.2971


In [9]:
assert len(variants_all) == 2 * 29 * 4 * 45  # years * Arc LADs * scenarios * sectors

### Add LAD codes

In [10]:
lad_nmcds = pandas.read_csv(os.path.join(data_path, 'lad_nmcd_changes.csv'))

In [11]:
lad_nmcds = lad_nmcds[['lad11nm', 'lad11cd', 'lad16nm', 'lad16cd']]

In [12]:
baseline_all_lad_nms = set(baseline_all.reset_index().lad_nm.unique())
all_lad_nms = set(lad_nmcds.lad11nm)

In [13]:
all_lad_nms - baseline_all_lad_nms

{'Antrim',
 'Ards',
 'Armagh',
 'Ballymena',
 'Ballymoney',
 'Banbridge',
 'Belfast',
 'Carrickfergus',
 'Castlereagh',
 'Coleraine',
 'Cookstown',
 'Craigavon',
 'Derry',
 'Down',
 'Dungannon',
 'Fermanagh',
 'Isle of Anglesey',
 "King's Lynn and West Norfolk",
 'Larne',
 'Limavady',
 'Lisburn',
 'Magherafelt',
 'Moyle',
 'Newry and Mourne',
 'Newtownabbey',
 'North Down',
 'Omagh',
 'Rhondda Cynon Taf',
 'Strabane'}

In [14]:
baseline_all_lad_nms - all_lad_nms

{'Anglesey', 'King`s Lynn and West Norfolk', 'Rhondda, Cynon, Taff'}

In [15]:
baseline_all = baseline_all.reset_index()
baseline_all.lad_nm = baseline_all.lad_nm.replace({
    'Anglesey': 'Isle of Anglesey',
    'King`s Lynn and West Norfolk': "King's Lynn and West Norfolk",
    'Rhondda, Cynon, Taff': 'Rhondda Cynon Taf'
})

In [16]:
baseline_wlad = baseline_all.merge(lad_nmcds, left_on='lad_nm', right_on='lad11nm').drop('lad_nm', axis=1)

In [17]:
len(baseline_wlad.lad11nm.unique()), len(baseline_wlad), len(baseline_all)

(380, 34200, 34200)

In [18]:
variants_wlad = variants_all.reset_index().merge(lad_nmcds, left_on='lad_nm', right_on='lad11nm').drop('lad_nm', axis=1)

In [19]:
variants_wlad.head()

Unnamed: 0,scenario,year,sector,Employment,GVA,Productivity,lad11nm,lad11cd,lad16nm,lad16cd
0,Baseline,2018,Agriculture etc,0.401362,33.762,84.1185,Peterborough,E06000031,Peterborough,E06000031
1,Baseline,2018,Mining & quarrying,0.118696,4.24017,35.7228,Peterborough,E06000031,Peterborough,E06000031
2,Baseline,2018,"Food, drink & tobacco",0.979734,80.1415,81.7992,Peterborough,E06000031,Peterborough,E06000031
3,Baseline,2018,Textiles etc,0.195852,14.1783,72.393,Peterborough,E06000031,Peterborough,E06000031
4,Baseline,2018,Wood & paper,0.243796,12.2622,50.2971,Peterborough,E06000031,Peterborough,E06000031


In [20]:
len(variants_wlad.lad11nm.unique()), len(variants_wlad), len(variants_all)

(29, 10440, 10440)

In [21]:
baseline_wlad[(baseline_wlad.lad11nm != baseline_wlad.lad16nm) | (baseline_wlad.lad11cd != baseline_wlad.lad16cd)].lad16nm.unique()

array(['Northumberland', 'Gateshead', 'East Hertfordshire', 'St Albans',
       'Stevenage', 'Welwyn Hatfield', 'Vale of Glamorgan',
       'City of Edinburgh', 'Na h-Eileanan Siar'], dtype=object)

## Output data

In [22]:
baseline_wlad.to_csv(os.path.join(output_path, 'gb_baseline.csv'), index=False)

In [23]:
variants_wlad.to_csv(os.path.join(output_path, 'arc_variants.csv'), index=False)

### Merged, separate file-per-scenario

In [24]:
base = baseline_wlad[
    ['year', 'sector', 'Employment', 'GVA', 'lad11nm', 'lad11cd', 'lad16nm', 'lad16cd']
].rename(columns={
    'Employment': 'employment', 
    'GVA': 'gva'
})
base = base[base.year.isin(range(2015, 2051))]
assert len(base) == 2 * 45 * 380  # years * sectors * LADs in GB

### Project Northern rGVA by LAD and sector

Look at projection for Northern Ireland based on 2015 industry composition, UK average growth

In [25]:
def download(url, filename, force=False):
    if force or not os.path.exists(filename):
        r = requests.get(url, stream=True)
        with open(filename, 'wb') as fd:
            for chunk in r.iter_content(chunk_size=128):
                fd.write(chunk)

In [26]:
# Nominal and real regional gross value added (balanced) by industry, NUTS1, NUTS2, NUTS3, 1998-2017
rgva_uk_ind_url = "https://www.ons.gov.uk/file?uri=/economy/grossvalueaddedgva/datasets/nominalandrealregionalgrossvalueaddedbalancedbyindustry/current/nominalandrealregionalgvabbyindustry.xlsx"
download(rgva_uk_ind_url, os.path.join(data_path, 'rgva_uk_industry.xlsx'))

In [27]:
rgva = pandas.read_excel(
    os.path.join(data_path, 'rgva_uk_industry.xlsx'), 
    sheet_name='Table3c', 
    header=1)

In [28]:
rgva = rgva[:13783]  # skip footnotes

In [29]:
rgva.head()

Unnamed: 0,Region code,Region name,SIC07,SIC07 description,1998,1999,2000,2001,2002,2003,...,2008,2009,2010,2011,2012,2013,2014,2015,2016,20173
0,UKC11,Hartlepool and Stockton-on-Tees,1-9,"Agriculture, forestry and fishing; mining and ...",12.0,15.0,19.0,14.0,19.0,32.0,...,20.0,43.0,17.0,40.0,49.0,52.0,64.0,39.0,36.0,43.0
1,UKC11,Hartlepool and Stockton-on-Tees,AB,"Agriculture, forestry and fishing; mining and ...",12.0,15.0,19.0,14.0,19.0,32.0,...,20.0,43.0,17.0,40.0,49.0,52.0,64.0,39.0,36.0,43.0
2,UKC11,Hartlepool and Stockton-on-Tees,10-12,"Manufacture of food, beverages and tobacco",106.0,110.0,119.0,99.0,86.0,99.0,...,106.0,131.0,136.0,126.0,127.0,88.0,85.0,70.0,116.0,124.0
3,UKC11,Hartlepool and Stockton-on-Tees,CA,"Manufacture of food, beverages and tobacco",106.0,110.0,119.0,99.0,86.0,99.0,...,106.0,131.0,136.0,126.0,127.0,88.0,85.0,70.0,116.0,124.0
4,UKC11,Hartlepool and Stockton-on-Tees,13-15,"Manufacture of textiles, wearing apparel and l...",12.0,10.0,9.0,9.0,6.0,7.0,...,5.0,5.0,8.0,7.0,6.0,5.0,8.0,7.0,6.0,8.0


In [30]:
# pick division-level values (avoid double-count sectors, total)
p = re.compile('^\d')
rgva = rgva[rgva.SIC07.apply(lambda sic: bool(re.match(p, str(sic))))]

In [31]:
lad_nuts3_lu_url = "http://geoportal1-ons.opendata.arcgis.com/datasets/e1e5de6c5fcc40c78adb03d84a2d299d_0.csv"
download(lad_nuts3_lu_url, os.path.join(data_path, "lad_nuts_lu.csv"))
lad_nuts = pandas.read_csv(os.path.join(data_path, 'lad_nuts_lu.csv'))
lad_nuts = lad_nuts[['LAD16CD', 'LAD16NM', 'NUTS318CD']].sort_values('LAD16CD').drop_duplicates().rename(columns={
    'LAD16CD': 'lad16cd',
    'LAD16NM': 'lad16nm',
    'NUTS318CD': 'nuts318cd'
})
lad_nuts.head(), len(lad_nuts.nuts318cd.unique()), len(lad_nuts.lad16cd.unique())

(     lad16cd               lad16nm nuts318cd
 0  E06000001            Hartlepool     UKC11
 2  E06000002         Middlesbrough     UKC12
 3  E06000003  Redcar and Cleveland     UKC12
 1  E06000004      Stockton-on-Tees     UKC11
 4  E06000005            Darlington     UKC13, 179, 391)

In [32]:
rgva15 = rgva.merge(
    lad_nuts, left_on='Region code', right_on='nuts318cd', how='outer'
).rename(columns={
    'Region name': 'nuts318nm',
    2015: 'gva15_nuts_division_group',
    'SIC07': 'sic07_division_group',
    'SIC07 description': 'sic07_division_group_description'
})[[
    'lad16cd',
    'lad16nm',
    'nuts318cd',
    'nuts318nm',
    'sic07_division_group',
    'sic07_division_group_description',
    'gva15_nuts_division_group'
]]
rgva15.sic07_division_group = rgva15.sic07_division_group.astype(str)
rgva15

Unnamed: 0,lad16cd,lad16nm,nuts318cd,nuts318nm,sic07_division_group,sic07_division_group_description,gva15_nuts_division_group
0,E06000001,Hartlepool,UKC11,Hartlepool and Stockton-on-Tees,1-9,"Agriculture, forestry and fishing; mining and ...",39.0
1,E06000004,Stockton-on-Tees,UKC11,Hartlepool and Stockton-on-Tees,1-9,"Agriculture, forestry and fishing; mining and ...",39.0
2,E06000001,Hartlepool,UKC11,Hartlepool and Stockton-on-Tees,10-12,"Manufacture of food, beverages and tobacco",70.0
3,E06000004,Stockton-on-Tees,UKC11,Hartlepool and Stockton-on-Tees,10-12,"Manufacture of food, beverages and tobacco",70.0
4,E06000001,Hartlepool,UKC11,Hartlepool and Stockton-on-Tees,13-15,"Manufacture of textiles, wearing apparel and l...",7.0
5,E06000004,Stockton-on-Tees,UKC11,Hartlepool and Stockton-on-Tees,13-15,"Manufacture of textiles, wearing apparel and l...",7.0
6,E06000001,Hartlepool,UKC11,Hartlepool and Stockton-on-Tees,16-18,Manufacture of wood and paper products and pri...,49.0
7,E06000004,Stockton-on-Tees,UKC11,Hartlepool and Stockton-on-Tees,16-18,Manufacture of wood and paper products and pri...,49.0
8,E06000001,Hartlepool,UKC11,Hartlepool and Stockton-on-Tees,19-23,"Manufacture of petroleum, chemicals and other ...",498.0
9,E06000004,Stockton-on-Tees,UKC11,Hartlepool and Stockton-on-Tees,19-23,"Manufacture of petroleum, chemicals and other ...",498.0


In [97]:
sector_map = pandas.read_csv(os.path.join('data_as_provided','map_sectors.csv'))
sector_map

Unnamed: 0,sector,sector_description,sic07_division_broad,sic07_division_group,sic07_division_group_description,itrc_sector,itrc_sector_description,is_knowledge_based,sic07_division,sic07_division_description
0,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,1 to 9,"Agriculture, forestry and fishing; mining and ...",1,Agriculture etc,0,1,"Crop and animal production, hunting and relate..."
1,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,1 to 9,"Agriculture, forestry and fishing; mining and ...",1,Agriculture etc,0,2,Forestry and logging
2,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,1 to 9,"Agriculture, forestry and fishing; mining and ...",1,Agriculture etc,0,3,Fishing and aquaculture
3,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,1 to 9,"Agriculture, forestry and fishing; mining and ...",2,Mining & quarrying,0,5,Mining of coal and lignite
4,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,1 to 9,"Agriculture, forestry and fishing; mining and ...",2,Mining & quarrying,0,6,Extraction of crude petroleum and natural gas
5,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,1 to 9,"Agriculture, forestry and fishing; mining and ...",2,Mining & quarrying,0,7,Mining of metal ores
6,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,1 to 9,"Agriculture, forestry and fishing; mining and ...",2,Mining & quarrying,0,8,Other mining and quarrying
7,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,1 to 9,"Agriculture, forestry and fishing; mining and ...",2,Mining & quarrying,0,9,Mining support service activities
8,C,Manufacturing,10 to 15,10 to 12,"Manufacture of food, beverages and tobacco",3,"Food, drink & tobacco",0,10,Manufacture of food products
9,C,Manufacturing,10 to 15,10 to 12,"Manufacture of food, beverages and tobacco",3,"Food, drink & tobacco",0,11,Manufacture of beverages


In [36]:
rgva15.sic07_division_group = rgva15.sic07_division_group.apply(lambda d: d.replace('-', ' to '))

In [37]:
rgva15s = rgva15.merge(sector_map, on='sic07_division_group', how='outer')
rgva15s

Unnamed: 0,lad16cd,lad16nm,nuts318cd,nuts318nm,sic07_division_group,sic07_division_group_description_x,gva15_nuts_division_group,sector,sector_description,sic07division_broad,sic07_division_group_description_y,itrc_sector,itrc_sector_description,is_knowledge_based,sic07_division,sic07_division_description
0,E06000001,Hartlepool,UKC11,Hartlepool and Stockton-on-Tees,1 to 9,"Agriculture, forestry and fishing; mining and ...",39.0,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,"Agriculture, forestry and fishing; mining and ...",1,Agriculture etc,0,1,"Crop and animal production, hunting and relate..."
1,E06000001,Hartlepool,UKC11,Hartlepool and Stockton-on-Tees,1 to 9,"Agriculture, forestry and fishing; mining and ...",39.0,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,"Agriculture, forestry and fishing; mining and ...",1,Agriculture etc,0,2,Forestry and logging
2,E06000001,Hartlepool,UKC11,Hartlepool and Stockton-on-Tees,1 to 9,"Agriculture, forestry and fishing; mining and ...",39.0,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,"Agriculture, forestry and fishing; mining and ...",1,Agriculture etc,0,3,Fishing and aquaculture
3,E06000001,Hartlepool,UKC11,Hartlepool and Stockton-on-Tees,1 to 9,"Agriculture, forestry and fishing; mining and ...",39.0,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,"Agriculture, forestry and fishing; mining and ...",2,Mining & quarrying,0,5,Mining of coal and lignite
4,E06000001,Hartlepool,UKC11,Hartlepool and Stockton-on-Tees,1 to 9,"Agriculture, forestry and fishing; mining and ...",39.0,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,"Agriculture, forestry and fishing; mining and ...",2,Mining & quarrying,0,6,Extraction of crude petroleum and natural gas
5,E06000001,Hartlepool,UKC11,Hartlepool and Stockton-on-Tees,1 to 9,"Agriculture, forestry and fishing; mining and ...",39.0,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,"Agriculture, forestry and fishing; mining and ...",2,Mining & quarrying,0,7,Mining of metal ores
6,E06000001,Hartlepool,UKC11,Hartlepool and Stockton-on-Tees,1 to 9,"Agriculture, forestry and fishing; mining and ...",39.0,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,"Agriculture, forestry and fishing; mining and ...",2,Mining & quarrying,0,8,Other mining and quarrying
7,E06000001,Hartlepool,UKC11,Hartlepool and Stockton-on-Tees,1 to 9,"Agriculture, forestry and fishing; mining and ...",39.0,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,"Agriculture, forestry and fishing; mining and ...",2,Mining & quarrying,0,9,Mining support service activities
8,E06000004,Stockton-on-Tees,UKC11,Hartlepool and Stockton-on-Tees,1 to 9,"Agriculture, forestry and fishing; mining and ...",39.0,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,"Agriculture, forestry and fishing; mining and ...",1,Agriculture etc,0,1,"Crop and animal production, hunting and relate..."
9,E06000004,Stockton-on-Tees,UKC11,Hartlepool and Stockton-on-Tees,1 to 9,"Agriculture, forestry and fishing; mining and ...",39.0,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,"Agriculture, forestry and fishing; mining and ...",1,Agriculture etc,0,2,Forestry and logging


### Disaggregate UK 2015 rGVA to LADs and SIC07 divisions

Very coarse, purely proportional split - could be improved e.g. by using BRES employment industry percentage figures for current split to full divisions

In [38]:
count_for_disagg = rgva15s.groupby(['nuts318cd', 'sic07_division_group']).count()[['lad16cd']]
count_for_disagg.columns = ['nuts_division_group_count']
count_for_disagg = count_for_disagg.reset_index()
rgva15sd = rgva15s.merge(count_for_disagg, on=['nuts318cd', 'sic07_division_group'], how='left')
rgva15sd

Unnamed: 0,lad16cd,lad16nm,nuts318cd,nuts318nm,sic07_division_group,sic07_division_group_description_x,gva15_nuts_division_group,sector,sector_description,sic07division_broad,sic07_division_group_description_y,itrc_sector,itrc_sector_description,is_knowledge_based,sic07_division,sic07_division_description,nuts_division_group_count
0,E06000001,Hartlepool,UKC11,Hartlepool and Stockton-on-Tees,1 to 9,"Agriculture, forestry and fishing; mining and ...",39.0,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,"Agriculture, forestry and fishing; mining and ...",1,Agriculture etc,0,1,"Crop and animal production, hunting and relate...",16.0
1,E06000001,Hartlepool,UKC11,Hartlepool and Stockton-on-Tees,1 to 9,"Agriculture, forestry and fishing; mining and ...",39.0,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,"Agriculture, forestry and fishing; mining and ...",1,Agriculture etc,0,2,Forestry and logging,16.0
2,E06000001,Hartlepool,UKC11,Hartlepool and Stockton-on-Tees,1 to 9,"Agriculture, forestry and fishing; mining and ...",39.0,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,"Agriculture, forestry and fishing; mining and ...",1,Agriculture etc,0,3,Fishing and aquaculture,16.0
3,E06000001,Hartlepool,UKC11,Hartlepool and Stockton-on-Tees,1 to 9,"Agriculture, forestry and fishing; mining and ...",39.0,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,"Agriculture, forestry and fishing; mining and ...",2,Mining & quarrying,0,5,Mining of coal and lignite,16.0
4,E06000001,Hartlepool,UKC11,Hartlepool and Stockton-on-Tees,1 to 9,"Agriculture, forestry and fishing; mining and ...",39.0,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,"Agriculture, forestry and fishing; mining and ...",2,Mining & quarrying,0,6,Extraction of crude petroleum and natural gas,16.0
5,E06000001,Hartlepool,UKC11,Hartlepool and Stockton-on-Tees,1 to 9,"Agriculture, forestry and fishing; mining and ...",39.0,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,"Agriculture, forestry and fishing; mining and ...",2,Mining & quarrying,0,7,Mining of metal ores,16.0
6,E06000001,Hartlepool,UKC11,Hartlepool and Stockton-on-Tees,1 to 9,"Agriculture, forestry and fishing; mining and ...",39.0,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,"Agriculture, forestry and fishing; mining and ...",2,Mining & quarrying,0,8,Other mining and quarrying,16.0
7,E06000001,Hartlepool,UKC11,Hartlepool and Stockton-on-Tees,1 to 9,"Agriculture, forestry and fishing; mining and ...",39.0,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,"Agriculture, forestry and fishing; mining and ...",2,Mining & quarrying,0,9,Mining support service activities,16.0
8,E06000004,Stockton-on-Tees,UKC11,Hartlepool and Stockton-on-Tees,1 to 9,"Agriculture, forestry and fishing; mining and ...",39.0,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,"Agriculture, forestry and fishing; mining and ...",1,Agriculture etc,0,1,"Crop and animal production, hunting and relate...",16.0
9,E06000004,Stockton-on-Tees,UKC11,Hartlepool and Stockton-on-Tees,1 to 9,"Agriculture, forestry and fishing; mining and ...",39.0,AB,"Agriculture, forestry and fishing; mining and ...",1 to 9; 35 to 39,"Agriculture, forestry and fishing; mining and ...",1,Agriculture etc,0,2,Forestry and logging,16.0


In [40]:
rgva15sd['gva15_lad_division'] = rgva15sd.gva15_nuts_division_group / rgva15sd.nuts_division_group_count
rgva15_lad_division = rgva15sd[[
    'lad16cd', 'itrc_sector', 'sic07_division', 'gva15_lad_division'
]]
rgva15_lad_division.head()

Unnamed: 0,lad16cd,itrc_sector,sic07_division,gva15_lad_division
0,E06000001,1,1,2.4375
1,E06000001,1,2,2.4375
2,E06000001,1,3,2.4375
3,E06000001,2,5,2.4375
4,E06000001,2,6,2.4375


In [41]:
sum_for_disagg = rgva15_lad_division.groupby(['lad16cd']).sum()[['gva15_lad_division']]
sum_for_disagg.columns = ['gva15_lad_total']
rgva15_lad_division_d = rgva15_lad_division.merge(sum_for_disagg.reset_index(), on='lad16cd')
rgva15_lad_division_d['gva15_lad_division_proportion'] = rgva15_lad_division_d.gva15_lad_division / rgva15_lad_division_d.gva15_lad_total
rgva15_lad_division_d.head()

Unnamed: 0,lad16cd,itrc_sector,sic07_division,gva15_lad_division,gva15_lad_total,gva15_lad_division_proportion
0,E06000001,1,1,2.4375,2801.0,0.00087
1,E06000001,1,2,2.4375,2801.0,0.00087
2,E06000001,1,3,2.4375,2801.0,0.00087
3,E06000001,2,5,2.4375,2801.0,0.00087
4,E06000001,2,6,2.4375,2801.0,0.00087


### Project NI regions as proportion of NI total

Where future NI total is projected assuming NI growth equals GB growth:

NI GVA in 2015 * (GB GVA in future year / GB GVA in 2015) = NI GVA in future year

And NI future regional/sectoral GVA follows the same proportional structure as in 2015.

In [42]:
ni_rgva = rgva15_lad_division_d[rgva15_lad_division_d.lad16cd.str.startswith('N')].copy()
ni_rgva.sort_values(['lad16cd', 'sic07_division']).head()

Unnamed: 0,lad16cd,itrc_sector,sic07_division,gva15_lad_division,gva15_lad_total,gva15_lad_division_proportion
34408,N09000001,1,1,3.875,2379.0,0.001629
34409,N09000001,1,2,3.875,2379.0,0.001629
34410,N09000001,1,3,3.875,2379.0,0.001629
34411,N09000001,2,5,3.875,2379.0,0.001629
34412,N09000001,2,6,3.875,2379.0,0.001629


In [43]:
ni_rgva_for_proj = ni_rgva[['lad16cd', 'gva15_lad_total']].drop_duplicates()
ni_rgva_for_proj['gva15_lad_ni_proportion'] = ni_rgva_for_proj.gva15_lad_total / ni_rgva_for_proj.gva15_lad_total.sum()
ni_rgva_for_proj

Unnamed: 0,lad16cd,gva15_lad_total,gva15_lad_ni_proportion
33792,N09000003,15120.0,0.417092
33880,N09000002,3300.0,0.091032
33968,N09000010,2240.0,0.061791
34056,N09000011,1495.0,0.04124
34144,N09000005,2315.0,0.06386
34232,N09000009,2149.0,0.059281
34320,N09000004,1620.0,0.044688
34408,N09000001,2379.0,0.065626
34496,N09000007,2199.0,0.06066
34584,N09000008,2015.0,0.055585


In [44]:
gb_base = base.copy()[['year', 'lad16cd', 'gva', 'employment']]
gb_base.head()

Unnamed: 0,year,lad16cd,gva,employment
0,2018,E06000005,11.2521,0.281729
1,2018,E06000005,9.70036,0.0992628
2,2018,E06000005,11.0349,0.233751
3,2018,E06000005,6.24571,0.181865
4,2018,E06000005,13.7693,0.146105


In [45]:
gb_growth = gb_base.groupby('year').sum()[['gva']].reset_index()
gb_growth.head()

Unnamed: 0,year,gva
0,2018,1581542.0
1,2050,2329914.0


In [47]:
total_ni_rgva15 = ni_rgva_for_proj.gva15_lad_total.sum()
total_uk_rgva15 = rgva15_lad_division_d.gva15_lad_division.sum()
total_gb_rgva15 = total_uk_rgva15 - total_ni_rgva15

In [48]:
dfs = []
for year in gb_growth.year:
    df = ni_rgva_for_proj.copy()
    df['year'] = year
    gb_future = float(gb_growth[gb_growth.year == year].gva)
    ni_future = total_ni_rgva15 * (gb_future / total_gb_rgva15)
    df['gva'] = df.gva15_lad_ni_proportion * ni_future
    dfs.append(df)
    
ni_base = pandas.concat(dfs, axis=0)[['year', 'lad16cd', 'gva']]
ni_base

Unnamed: 0,year,lad16cd,gva
33792,2018,N09000003,14586.819593
33880,2018,N09000002,3183.63126
33968,2018,N09000010,2161.01031
34056,2018,N09000011,1442.281435
34144,2018,N09000005,2233.365566
34232,2018,N09000009,2073.219266
34320,2018,N09000004,1562.873528
34408,2018,N09000001,2295.108718
34496,2018,N09000007,2121.456104
34584,2018,N09000008,1943.944542


### Disaggregate projections by SIC07 division, reaggregate to ITRC sector

Assuming constant sectoral shares of GVA, projected LAD sectoral GVA is (projected LAD GVA * (current LAD sectoral GVA / current LAD GVA).

In [49]:
ni_disagg = ni_base.merge(rgva15_lad_division_d, on='lad16cd', how='left').rename(columns={'year': 'timestep'})
ni_disagg.head()

Unnamed: 0,timestep,lad16cd,gva,itrc_sector,sic07_division,gva15_lad_division,gva15_lad_total,gva15_lad_division_proportion
0,2018,N09000003,14586.819593,1,1,2.0,15120.0,0.000132
1,2018,N09000003,14586.819593,1,2,2.0,15120.0,0.000132
2,2018,N09000003,14586.819593,1,3,2.0,15120.0,0.000132
3,2018,N09000003,14586.819593,2,5,2.0,15120.0,0.000132
4,2018,N09000003,14586.819593,2,6,2.0,15120.0,0.000132


In [50]:
ni_disagg['gva_lad_division'] = ni_disagg.gva * ni_disagg.gva15_lad_division_proportion

In [51]:
ni_disagg = ni_disagg.groupby(
    ['timestep', 'lad16cd', 'itrc_sector']
).sum().reset_index()[
    ['timestep', 'lad16cd', 'itrc_sector', 'gva_lad_division']
].dropna().rename(
    columns={'gva_lad_division': 'gva_per_sector'}
)
ni_disagg = ni_disagg[ni_disagg.itrc_sector != 46]  # drop unallocated/household

In [52]:
assert len(ni_disagg) == 2 * 11 * 45  # timestep * NI LADs * 45 sectors
ni_disagg['employment'] = 0  # no estimate for NI employment
ni_disagg.sort_values(by=['timestep', 'lad16cd', 'itrc_sector']).tail()

Unnamed: 0,timestep,lad16cd,itrc_sector,gva_per_sector,employment
1006,2050,N09000011,41,49.743468,0
1007,2050,N09000011,42,106.593147,0
1008,2050,N09000011,43,15.633662,0
1009,2050,N09000011,44,32.688565,0
1010,2050,N09000011,45,51.16471,0


In [53]:
base.head()

Unnamed: 0,year,sector,employment,gva,lad11nm,lad11cd,lad16nm,lad16cd
0,2018,Agriculture etc,0.281729,11.2521,Darlington,E06000005,Darlington,E06000005
1,2018,Mining & quarrying,0.0992628,9.70036,Darlington,E06000005,Darlington,E06000005
2,2018,"Food, drink & tobacco",0.233751,11.0349,Darlington,E06000005,Darlington,E06000005
3,2018,Textiles etc,0.181865,6.24571,Darlington,E06000005,Darlington,E06000005
4,2018,Wood & paper,0.146105,13.7693,Darlington,E06000005,Darlington,E06000005


In [54]:
# add itrc sector codes to GB base
base.sector = base.sector.apply(lambda d: d.replace(', etc', ' etc'))
sector_ids = sector_map[['itrc_sector', 'itrc_sector_description']].drop_duplicates()
sector_ids.itrc_sector_description = sector_ids.itrc_sector_description.apply(lambda d: d.replace(', etc', ' etc'))
gb_disagg = base.merge(
    sector_ids, left_on='sector', right_on='itrc_sector_description', how='left'
)[['year', 'lad16cd', 'itrc_sector', 'gva', 'employment']].rename(columns={'year': 'timestep', 'gva': 'gva_per_sector'})
assert len(gb_disagg) == 2 * 380 * 45
gb_disagg.head()

Unnamed: 0,timestep,lad16cd,itrc_sector,gva_per_sector,employment
0,2018,E06000005,1,11.2521,0.281729
1,2018,E06000005,2,9.70036,0.0992628
2,2018,E06000005,3,11.0349,0.233751
3,2018,E06000005,4,6.24571,0.181865
4,2018,E06000005,5,13.7693,0.146105


In [55]:
full_base = pandas.concat([gb_disagg, ni_disagg], axis=0)
assert len(full_base) == 391 * 45 * 2
full_base.to_csv(os.path.join(output_path, 'uk_baseline.csv'), index=False)

### Stitch scenario projections with baseline UK (GVA by LAD)

In [56]:
# Baseline 2018/2050 for all non-Arc LADs
scenario_base = full_base[~full_base.lad16cd.isin(variants_wlad.lad16cd.unique())]
assert len(scenario_base) == (391 - 29) * 45 * 2
scenario_base.head()

Unnamed: 0,timestep,lad16cd,itrc_sector,gva_per_sector,employment
0,2018,E06000005,1,11.2521,0.281729
1,2018,E06000005,2,9.70036,0.0992628
2,2018,E06000005,3,11.0349,0.233751
3,2018,E06000005,4,6.24571,0.181865
4,2018,E06000005,5,13.7693,0.146105


In [57]:
variants_wlad.scenario.unique()

array(['Baseline', 'Unplanned', 'City Focused', 'New Developments'],
      dtype=object)

In [58]:
# add itrc sector codes to GB base
variants_wlad.sector = variants_wlad.sector.apply(lambda d: d.replace(', etc', ' etc'))
variants_wlad = variants_wlad.merge(
    sector_ids, left_on='sector', right_on='itrc_sector_description', how='left'
)[
    ['year', 'lad16cd', 'itrc_sector', 'GVA', 'Employment', 'scenario']
].rename(
    columns={'year': 'timestep', 'GVA': 'gva_per_sector', 'Employment': 'employment'}
)

In [59]:
variants_wlad.head()

Unnamed: 0,timestep,lad16cd,itrc_sector,gva_per_sector,employment,scenario
0,2018,E06000031,1,33.762,0.401362,Baseline
1,2018,E06000031,2,4.24017,0.118696,Baseline
2,2018,E06000031,3,80.1415,0.979734,Baseline
3,2018,E06000031,4,14.1783,0.195852,Baseline
4,2018,E06000031,5,12.2622,0.243796,Baseline


In [60]:
vard = {}

for scenario in ('Baseline', 'Unplanned', 'City Focused', 'New Developments'):
    var = variants_wlad[
        variants_wlad.scenario == scenario
    ][
        ['timestep', 'employment', 'gva_per_sector', 'lad16cd', 'itrc_sector']
    ]
    vard[scenario] = var.copy()

In [74]:
arc_baseline_from_scenario = vard['Baseline'][
    ['timestep','lad16cd','itrc_sector','gva_per_sector','employment']
].sort_values(['timestep','lad16cd','itrc_sector']).reset_index(drop=True)
arc_baseline_from_gb = full_base[full_base.lad16cd.isin(variants_wlad.lad16cd.unique())][
    ['timestep','lad16cd','itrc_sector','gva_per_sector','employment']
].sort_values(['timestep','lad16cd','itrc_sector']).reset_index(drop=True)

In [76]:
import pandas.testing
pandas.testing.assert_frame_equal(arc_baseline_from_scenario, arc_baseline_from_gb)

In [61]:
for scenario, key in (('Baseline', 'baseline'), ('Unplanned', '0-unplanned'), ('New Developments', '1-new-cities'), ('City Focused', '2-expansion')):
    stitch = pandas.concat(
        [scenario_base, vard[scenario]], axis=0, sort=False
    ).sort_values(
        ['timestep', 'lad16cd']
    ).rename(columns={'lad16cd': 'lad_uk_2016'})
    # output by LAD/sector
    stitch.to_csv(os.path.join(output_path, 'arc_gva_employment_by_sector__{}.csv'.format(key)), index=False)
    print(len(stitch), list(stitch.columns))
    # output by LAD
    by_lad = stitch.drop('itrc_sector', axis=1).groupby(['timestep', 'lad_uk_2016']).sum().reset_index()
    by_lad.to_csv(os.path.join(output_path, 'arc_gva_employment__{}.csv'.format(key)), index=False)
    print(len(by_lad), list(by_lad.columns))

35190 ['timestep', 'lad_uk_2016', 'itrc_sector', 'gva_per_sector', 'employment']
782 ['timestep', 'lad_uk_2016', 'gva_per_sector', 'employment']
35190 ['timestep', 'lad_uk_2016', 'itrc_sector', 'gva_per_sector', 'employment']
782 ['timestep', 'lad_uk_2016', 'gva_per_sector', 'employment']
35190 ['timestep', 'lad_uk_2016', 'itrc_sector', 'gva_per_sector', 'employment']
782 ['timestep', 'lad_uk_2016', 'gva_per_sector', 'employment']
35190 ['timestep', 'lad_uk_2016', 'itrc_sector', 'gva_per_sector', 'employment']
782 ['timestep', 'lad_uk_2016', 'gva_per_sector', 'employment']


## Get UK / LAD / division 2015 data

### Regional gross value added (balanced) by local authority in the UK (released December 2017)
- see https://www.ons.gov.uk/economy/grossvalueaddedgva/datasets/regionalgrossvalueaddedbalancedlocalauthoritiesbynuts1region


In [77]:
urls = [
    ('/file?uri=/economy/grossvalueaddedgva/datasets/regionalgrossvalueaddedbalancedlocalauthoritiesbynuts1region/ukcnortheast/regionalgvabbylaukcnortheast.xlsx', 'regionalgvabbylaukcnortheast.xlsx'),
    ('/file?uri=/economy/grossvalueaddedgva/datasets/regionalgrossvalueaddedbalancedlocalauthoritiesbynuts1region/ukdnorthwest/regionalgvabbylaukdnorthwest.xlsx', 'regionalgvabbylaukdnorthwest.xlsx'),
    ('/file?uri=/economy/grossvalueaddedgva/datasets/regionalgrossvalueaddedbalancedlocalauthoritiesbynuts1region/ukeyorkshireandthehumber/regionalgvabbylaukeyorkshireandthehumber.xlsx', 'regionalgvabbylaukeyorkshireandthehumber.xlsx'),
    ('/file?uri=/economy/grossvalueaddedgva/datasets/regionalgrossvalueaddedbalancedlocalauthoritiesbynuts1region/ukfeastmidlands/regionalgvabbylaukfeastmidlands.xlsx', 'regionalgvabbylaukfeastmidlands.xlsx'),
    ('/file?uri=/economy/grossvalueaddedgva/datasets/regionalgrossvalueaddedbalancedlocalauthoritiesbynuts1region/ukgwestmidlands/regionalgvabbylaukgwestmidlands.xlsx', 'regionalgvabbylaukgwestmidlands.xlsx'),
    ('/file?uri=/economy/grossvalueaddedgva/datasets/regionalgrossvalueaddedbalancedlocalauthoritiesbynuts1region/ukheastofengland/regionalgvabbylaukheastofengland.xlsx', 'regionalgvabbylaukheastofengland.xlsx'),
    ('/file?uri=/economy/grossvalueaddedgva/datasets/regionalgrossvalueaddedbalancedlocalauthoritiesbynuts1region/ukilondon/regionalgvabbylaukilondon.xlsx', 'regionalgvabbylaukilondon.xlsx'),
    ('/file?uri=/economy/grossvalueaddedgva/datasets/regionalgrossvalueaddedbalancedlocalauthoritiesbynuts1region/ukjsoutheast/regionalgvabbylaukjsoutheast.xlsx', 'regionalgvabbylaukjsoutheast.xlsx'),
    ('/file?uri=/economy/grossvalueaddedgva/datasets/regionalgrossvalueaddedbalancedlocalauthoritiesbynuts1region/ukksouthwest/regionalgvabbylaukksouthwest.xlsx', 'regionalgvabbylaukksouthwest.xlsx'),
    ('/file?uri=/economy/grossvalueaddedgva/datasets/regionalgrossvalueaddedbalancedlocalauthoritiesbynuts1region/uklwales/regionalgvabbylauklwales.xlsx', 'regionalgvabbylauklwales.xlsx'),
    ('/file?uri=/economy/grossvalueaddedgva/datasets/regionalgrossvalueaddedbalancedlocalauthoritiesbynuts1region/ukmscotland/regionalgvabbylaukmscotland.xlsx', 'regionalgvabbylaukmscotland.xlsx'),
    ('/file?uri=/economy/grossvalueaddedgva/datasets/regionalgrossvalueaddedbalancedlocalauthoritiesbynuts1region/uknnorthernireland/regionalgvabbylauknnorthernireland.xlsx', 'regionalgvabbylauknnorthernireland.xlsx'),
]
base_url = 'https://www.ons.gov.uk'

In [78]:
for url_part, filename in urls:
    download(base_url + url_part, filename)

In [230]:
sic07groups = [
    '1-9; 35-39', '10-15', '16-23', '24-30', '31-33', '41', '42', '43', '45', '46', '47', '49-51', '52-53', 
    '55-56', '58-63', '64-66', '68', '68IMP', '69', '70', '71', '72-75', '77', '78-80', '81', '82', '84', '85', '86-87', 
    '88', '90-93', '94-95', '96', '97-98'
]

In [231]:
dfs = []
for _, filename in urls:
    df = pandas.read_excel(filename, sheet_name='Current Price', header=1)
    df.SIC07 = df.SIC07.astype('str')
    df = df[df.SIC07.isin(sic07groups)]
    dfs.append(df)

In [232]:
uk_hist = pandas.concat(
    dfs, axis=0
).drop(
    ['Region', 'SIC07 description'], axis=1
).rename(columns={
    'LAD code': 'lad16cd',
    'LA name': 'lad16nm',
    'SIC07': 'sic07_division_broad',
    '20173': 2017
})
uk_hist.sic07_division_broad = uk_hist.sic07_division_broad.apply(lambda d: d.replace('-', ' to '))
uk_hist.lad16cd.replace('S12000047', 'S12000015', inplace=True)  # Fife
uk_hist.lad16cd.replace('S12000048', 'S12000024', inplace=True)  # Perth and Kinross

In [233]:
sector_map = pandas.read_csv(os.path.join('data_as_provided','map_sectors.csv'))

In [234]:
uk_hist_s = uk_hist.merge(sector_map, on='sic07_division_broad', how='outer')

In [235]:
count_for_disagg = uk_hist_s.groupby(
    ['lad16cd', 'sic07_division_broad']
).count().reset_index()[
    ['lad16cd','sic07_division_broad','lad16nm']
].rename(columns={
    'lad16nm': 'division_group_count'
})
count_for_disagg.head()

Unnamed: 0,lad16cd,sic07_division_broad,division_group_count
0,E06000001,1 to 9; 35 to 39,13
1,E06000001,10 to 15,6
2,E06000001,16 to 23,8
3,E06000001,24 to 30,7
4,E06000001,31 to 33,3


In [236]:
uk_hist_sd = uk_hist_s.merge(
    count_for_disagg, on=['lad16cd', 'sic07_division_broad'], how='left'
).drop(
    ['lad16nm', 'sector', 'sector_description', 'sic07_division_group', 'sic07_division_group_description',
     'itrc_sector_description', 'is_knowledge_based', 'sic07_division_description'], axis=1
)
uk_hist_sd.columns

Index([             'lad16cd', 'sic07_division_broad',                   1998,
                         1999,                   2000,                   2001,
                         2002,                   2003,                   2004,
                         2005,                   2006,                   2007,
                         2008,                   2009,                   2010,
                         2011,                   2012,                   2013,
                         2014,                   2015,                   2016,
                         2017,          'itrc_sector',       'sic07_division',
       'division_group_count'],
      dtype='object')

In [237]:
uk_hist_sdm = pandas.melt(
    uk_hist_sd,
    id_vars=['lad16cd','sic07_division_broad','itrc_sector','sic07_division','division_group_count'],
    var_name='timestep',
    value_name='gva'
)
uk_hist_sdm.head()

Unnamed: 0,lad16cd,sic07_division_broad,itrc_sector,sic07_division,division_group_count,timestep,gva
0,E06000001,1 to 9; 35 to 39,1,1,13.0,1998,56
1,E06000001,1 to 9; 35 to 39,1,2,13.0,1998,56
2,E06000001,1 to 9; 35 to 39,1,3,13.0,1998,56
3,E06000001,1 to 9; 35 to 39,2,5,13.0,1998,56
4,E06000001,1 to 9; 35 to 39,2,6,13.0,1998,56


In [238]:
uk_hist_sdm.gva.replace('c', 0, inplace=True) 
# 'c' indicates a figure that has been suppressed to avoid identification of individual company information
# e.g. in Cornwall

In [242]:
uk_hist_sdm['gva_per_sector'] = uk_hist_sdm.gva.astype(float) / uk_hist_sdm.division_group_count.astype(float)
uk_hist_g = uk_hist_sdm.drop(
    ['sic07_division_broad', 'sic07_division', 'division_group_count', 'gva'], axis=1
).groupby(
    ['lad16cd' ,'itrc_sector', 'timestep']
).sum().reset_index()
uk_hist_g = uk_hist_g[uk_hist_g.timestep >=2015]
assert len(uk_hist_g) == 3 * 391 * 46  # timestep * UK LADs * sectors
uk_hist_g.head()

Unnamed: 0,lad16cd,itrc_sector,timestep,gva_per_sector
17,E06000001,1,2015,27.692308
18,E06000001,1,2016,45.0
19,E06000001,1,2017,63.0
37,E06000001,2,2015,46.153846
38,E06000001,2,2016,75.0


### UK Business Register and Employment Survey
- see https://www.ons.gov.uk/employmentandlabourmarket/peopleinwork/employmentandemployeetypes/bulletins/businessregisterandemploymentsurveybresprovisionalresults/provisionalresults2017revisedresults2016
- from https://www.nomisweb.co.uk/

In [245]:
bres_emp = pandas.read_csv(
    './data_as_provided/bres-employment-by-lad-sic07division-2015-2017.csv', header=6
).melt(
    id_vars=['timestep','lad16nm','lad16cd'],
    var_name='sic07_division',
    value_name='employment'
)
bres_emp.sic07_division = bres_emp.sic07_division.astype('int64')
bres_emp = bres_emp.merge(
    sector_map, on='sic07_division', how='left'
).drop(
    ['sector','sector_description','sic07_division_broad','sic07_division_group','sic07_division_group_description',
     'itrc_sector_description','is_knowledge_based','sic07_division','sic07_division_description', 'lad16nm'], axis=1
).sort_values(
    ['lad16cd','timestep','itrc_sector', ]
).groupby(
    ['timestep','lad16cd','itrc_sector']
).sum().reset_index()
assert len(bres_emp) == 3 * 380 * 46 # timesteps * GB LADs * sectors
bres_emp.head()

Unnamed: 0,timestep,lad16cd,itrc_sector,employment
0,2015,E06000001,1,180
1,2015,E06000001,2,20
2,2015,E06000001,3,250
3,2015,E06000001,4,75
4,2015,E06000001,5,325


### Join historical, projections; interpolate

In [249]:
full_hist = uk_hist_g.merge(bres_emp, on=['timestep','lad16cd','itrc_sector'], how='left')
full_hist = full_hist[full_hist.itrc_sector != 46]
assert len(full_hist) == 3 * 391 * 45 # timesteps * GB LADs * sectors
full_hist.head()

Unnamed: 0,lad16cd,itrc_sector,timestep,gva_per_sector,employment
0,E06000001,1,2015,27.692308,180.0
1,E06000001,1,2016,45.0,190.0
2,E06000001,1,2017,63.0,150.0
3,E06000001,2,2015,46.153846,20.0
4,E06000001,2,2016,75.0,20.0


In [250]:
assert len(full_base) == 2 * 391 * 45  # timesteps * UK LADs * sectors
full_base.sort_values(['lad16cd','itrc_sector','timestep']).head()

Unnamed: 0,timestep,lad16cd,itrc_sector,gva_per_sector,employment
90,2018,E06000001,1,7.23615,0.181177
135,2050,E06000001,1,11.6295,0.157776
91,2018,E06000001,2,1.43417,0.0146748
136,2050,E06000001,2,0.881672,0.0116352
92,2018,E06000001,3,14.3223,0.303389


In [318]:
all_base = pandas.concat(
    [full_base, full_hist], axis=0, sort=False
)
all_base.head()

Unnamed: 0,timestep,lad16cd,itrc_sector,gva_per_sector,employment
0,2018,E06000005,1,11.2521,0.281729
1,2018,E06000005,2,9.70036,0.0992628
2,2018,E06000005,3,11.0349,0.233751
3,2018,E06000005,4,6.24571,0.181865
4,2018,E06000005,5,13.7693,0.146105


In [319]:
all_base = all_base.fillna(0)

In [320]:
all_base = all_base.groupby(['lad16cd', 'itrc_sector', 'timestep']).sum().unstack('timestep')

In [321]:
for year in (2018, 2050):
    all_base.employment.loc[:, year] = all_base.employment[year] * 1000

In [322]:
all_base.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,gva_per_sector,gva_per_sector,gva_per_sector,gva_per_sector,gva_per_sector,employment,employment,employment,employment,employment
Unnamed: 0_level_1,timestep,2015,2016,2017,2018,2050,2015,2016,2017,2018,2050
lad16cd,itrc_sector,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
E06000001,1,27.692308,45.0,63.0,7.236152,11.629481,180.0,190.0,150.0,181.177092,157.775544
E06000001,2,46.153846,75.0,105.0,1.434168,0.881672,20.0,20.0,10.0,14.674831,11.635233
E06000001,3,12.0,12.0,10.5,14.322333,14.986256,250.0,160.0,300.0,303.3888,244.473272
E06000001,4,12.0,12.0,10.5,2.555429,2.614616,75.0,150.0,30.0,74.409348,45.094134
E06000001,5,22.0,17.75,21.25,29.664682,28.886498,325.0,250.0,500.0,314.77023,222.64168


In [332]:
# (CPI in 2018 / CPI in 2011) × 2011 GBP value = 2018 GBP value
cpi18 = 106
cpi11 = 93.6

for year in (2015, 2016, 2017, 2018, 2050):
    if year < 2018:
        print(all_base.gva_per_sector[year].sum())
    else:
        print(all_base.gva_per_sector[year].sum() * (cpi18/cpi11))



1663859.0
1729317.0
1790700.0
1830529.7400814062
2696720.8210618705


# Output subset for energy demand
Filter on ITRC sectors

In [329]:
energy_demand_sector_subset = [2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 14, 15, 17, 19, 23, 27, 28, 29, 35, 40, 41]
len(energy_demand_sector_subset)

21

In [None]:
ed_base_disagg = full_base_disagg[full_base_disagg.itrc_sector.isin(energy_demand_sector_subset)].rename(
    columns={'itrc_sector': 'sectors', 'lad16cd': 'lad_uk_2016', 'gva': 'gva_per_sector'}
)
len(ed_base_disagg)

In [None]:
36 * 391 * 21

In [None]:
ed_base_disagg.to_csv(
    os.path.join(output_path, 'arc_gva_sector__baseline.csv'), index=False
)

In [None]:
for key, df in disagg_d.items():
    df = df[df.itrc_sector.isin(energy_demand_sector_subset)]
    df.rename(
        columns={'itrc_sector': 'sectors', 'lad16cd': 'lad_uk_2016'}
    ).to_csv(
        os.path.join(output_path, 'arc_gva_sector__{}.csv'.format(key)), index=False
    )
    print(len(df))