In [None]:
import os

import pandas

In [None]:
data_path = os.path.join('.', 'data_as_provided')
output_path = os.path.join('.', 'data_processed')
arc_scenarios_file = os.path.join(data_path, 'ARC Employment Scenarios.xlsx')
gb_baseline_file = os.path.join(data_path, 'GVA-Employment-Productivity-All LAs.xlsx')

In [None]:
baseline = pandas.read_excel(
    gb_baseline_file, 
    sheet_name=['GVA', 'Employment', 'Productivity'], 
    header=6, 
    index_col=0
)

In [None]:
dfs = []
label_lookup = {
    'GVA': 'GVA (GBP2016m)',
    'Employment': 'Employment (000s)',
    'Productivity': 'Productivity (GBP2016 thousands per person in employment)'
}
for label, df in baseline.items():
    df = df.dropna(
    ).reset_index(
    ).rename(
        columns={'index':'lad_nm'}
    ).melt(
        id_vars=['lad_nm'],
        var_name='year',
        value_name=label_lookup[label]
    )
    df.lad_nm = df.lad_nm.apply(lambda nm: nm.strip())
    df = df.set_index(
        ['year', 'lad_nm']
    )
    dfs.append(df)
    
baseline_all = pandas.concat(dfs, axis=1, levels=['year','lad_nm'])
baseline_all

In [None]:
40 * 380 # years * Great Britain LADs

In [None]:
variants = pandas.read_excel(
    arc_scenarios_file,
    sheet_name=['baseline', 'scenario 0', 'scenario 1', 'scenario 2'],
    index_col=0
)

In [None]:
dfs = []
var_names = (
    'KBS Employment', 
    'Non-KBS employment (000s)', 
    'Employment in Total (000s)', 
    'GVA in KBS', 
    'GVA in non-KBS (GBP2011m)', 
    'GVA in Total (GBP2011m)', 
    'Prod in KBS', 
    'Prod in non-KBS (GBP2011m)', 
    'Prod in Total (GBP2011m)'
)
for scenario, s_df in variants.items():
    s_df.index.name = 'lad_nm'
    s_dfs = []
    for i, var in enumerate(var_names):     
        step = 27
        from_row, to_row = i*step + 1, i*step + 25
        df = s_df.iloc[from_row:to_row, 0:37].copy()
        df.columns = df.iloc[0]
        df = df[2:].reset_index(
        ).melt(
            id_vars=['lad_nm'],
            var_name='year',
            value_name=var
        )
        df.lad_nm = df.lad_nm.apply(lambda nm: nm.strip())
        df.year = df.year.astype('int')
        df['scenario'] = scenario
        df = df.set_index(
            ['scenario', 'year', 'lad_nm']
        )
        s_dfs.append(df)
    s_df_all = pandas.concat(s_dfs, axis=1, levels=['scenario', 'year', 'lad_nm'])
    dfs.append(s_df_all)
        
variants_all = pandas.concat(dfs, axis=0)
variants_all

In [None]:
37*22*4  # years * Arc LADs * scenarios

## Convert from 2011 GBP to 2016 GBP

(CPI in 2016 / CPI in 2011) × 2011 GBP value = 2016 GBP value

CPIH in 2011 (index all items, 2015=100): 93.6

CPIH in 2016 (index all items, 2015=100): 101

Source: https://www.ons.gov.uk/economy/inflationandpriceindices/timeseries/l522/mm23

In [None]:
cpi16 = 101
cpi11 = 93.6
cpi11/cpi16

In [None]:
for y in range(2015,2051,5):
    gva11 = variants_all.loc[('baseline', y, 'Cambridge')]['GVA in Total (GBP2011m)']
    gva16 = baseline_all.loc[(y, 'Cambridge')]['GVA (GBP2016m)']  # £2016m
    est = gva16 * (cpi11/cpi16)
    print("%d, %.3f, %.3f, %.3f, %.3f, %.3f" % (y, gva11, gva16, est, abs(gva11 - est), abs(gva11 - est)/gva11))

In [None]:
baseline_all['GVA (GBP2011m)'] = baseline_all['GVA (GBP2016m)'] * (cpi11 / cpi16)
baseline_all['Productivity (GBP2011 thousands per person in employment)'] = \
    baseline_all['Productivity (GBP2016 thousands per person in employment)'] * (cpi11 / cpi16)

In [None]:
baseline_all.head()

### Add LAD codes

In [None]:
lad_nmcds = pandas.read_csv(os.path.join(data_path, 'lad_nmcd_changes.csv'))

In [None]:
lad_nmcds = lad_nmcds[['lad11nm', 'lad11cd', 'lad18nm', 'lad18cd']]

In [None]:
baseline_all_lad_nms = set(baseline_all.reset_index().lad_nm.unique())
all_lad_nms = set(lad_nmcds.lad11nm)

In [None]:
all_lad_nms - baseline_all_lad_nms

In [None]:
baseline_all_lad_nms - all_lad_nms

In [None]:
baseline_all = baseline_all.reset_index()
baseline_all.lad_nm = baseline_all.lad_nm.replace({
    'Anglesey': 'Isle of Anglesey',
    'Dumfries & Galloway': 'Dumfries and Galloway',
    'King`s Lynn and West Norfolk': "King's Lynn and West Norfolk",
    'Perth and Kinross': 'Perth & Kinross',
    'Rhondda, Cynon, Taff': 'Rhondda Cynon Taf'
})

In [None]:
baseline_wlad = baseline_all.merge(lad_nmcds, left_on='lad_nm', right_on='lad11nm').drop('lad_nm', axis=1)

In [None]:
len(baseline_wlad.lad11nm.unique()), len(baseline_wlad), len(baseline_all)

In [None]:
variants_wlad = variants_all.reset_index().merge(lad_nmcds, left_on='lad_nm', right_on='lad11nm').drop('lad_nm', axis=1)

In [None]:
len(variants_wlad.lad11nm.unique()), len(variants_wlad), len(variants_all)

In [None]:
baseline_wlad[(baseline_wlad.lad11nm != baseline_wlad.lad18nm) | (baseline_wlad.lad11cd != baseline_wlad.lad18cd)].lad18nm.unique()

## Output data

In [None]:
baseline_wlad.to_csv(os.path.join(output_path, 'gb_baseline.csv'), index=False)

In [None]:
variants_wlad.to_csv(os.path.join(output_path, 'arc_variants.csv'), index=False)

### Merged, separate file-per-scenario

In [None]:
base = baseline_wlad[
    ['year', 'Employment (000s)', 'GVA (GBP2011m)', 'lad11nm', 'lad11cd', 'lad18nm', 'lad18cd']
].rename(columns={
    'Employment (000s)': 'employment', 
    'GVA (GBP2011m)': 'gva'
})
base = base[~base.lad11cd.isin(variants_wlad.lad11cd.unique())]
base = base[base.year.isin(range(2014, 2051))]
len(base)

In [None]:
358 * 37

In [None]:
base.head()

In [None]:
vard = {}

for scenario in ('baseline', 'scenario 0', 'scenario 1', 'scenario 2'):
    var = variants_wlad[
        variants_wlad.scenario == scenario
    ][
        ['year', 'Employment in Total (000s)', 'GVA in Total (GBP2011m)', 'lad11nm', 'lad11cd', 'lad18nm', 'lad18cd']
    ].rename(columns={
        'Employment in Total (000s)': 'employment',
        'GVA in Total (GBP2011m)': 'gva'
    })
    var = var[var.year.isin(range(2014, 2051))].copy()
    vard[scenario] = var

    print(len(var))

In [None]:
22 * 37

In [None]:
vard['baseline'].head()

In [None]:
for scenario in ('baseline', 'scenario 0', 'scenario 1', 'scenario 2'):
    stitch = pandas.concat([base, vard[scenario]])
    stitch.to_csv(os.path.join(output_path, 'arc_gva_employment__{}.csv'.format(scenario.replace(" ", ""))), index=False)
    print(len(stitch))

In [None]:
37 * 380