In [54]:
import pandas as pd
import numpy as np 
import os
import glob
import warnings

warnings.filterwarnings('ignore')

* Create several lookup tables for a unified spatial scale at either 2021 LSOAs or 2011 DZs

In [None]:
# Read a lookup table from ONSPD UK 2023.11 
lookup = pd.read_csv('ONSPD_NOV_2023_UK.csv')
lookup = lookup[['pcd','oa21','lsoa21','msoa21', 'oslaua','rgn','oa11','lsoa11','msoa11']].fillna('NA')
# England and Wales 
lookup_EW = lookup[lookup['lsoa21'].str.contains('E|W')]
# Scotland only
lookup_S = lookup[lookup['lsoa11'].str.startswith('S0')]
# merge for GB
lookup_gb = pd.concat([lookup_EW,lookup_S])

In [58]:
# LSOA11 to LSOA21
lsoa11_21 = lookup_gb[['lsoa21','lsoa11']].drop_duplicates().set_index('lsoa11')
#create a weight variable: divisions based on the count of LSOA11 at each LSOA11 group
lsoa11_21 = lsoa11_21.join(1/lsoa11_21.groupby('lsoa11').count().rename(columns = {'lsoa21':'weight'})).sort_values('weight')

#lookup from LSOA name to LSOA11 code England and Wales
lsoaCD_NM = pd.read_csv(os.getcwd() + '/LSOA_(2011)_to_LSOA_(2021)_to_Local_Authority_District_(2022)_Lookup_for_England_and_Wales.csv',index_col = 1).iloc[:,[0]].drop_duplicates()
#lookup from DZ name to DZ code for Scotland
dzCD_NM = pd.read_excel(os.getcwd() + '/OA_DZ_IZ_2011_lookup.xlsx', sheet_name = 'DataZone2011Lookup', index_col = 1)
dzCD_NM.columns = ['F_LSOA11CD']
GB_NM_CD = pd.concat([lsoaCD_NM,dzCD_NM])

* Households with electricity pre-payment meters

In [62]:
#2017 electricity prepayment meters
pre_elec_meter = pd.read_csv('LSOA-prepayment-electricity-2017.csv',header = 1,index_col = 5).iloc[0:41729,[6]]
#2017 electricity meters
elec_meter_2017 = pd.read_excel('LSOA_domestic_elec_2010-21.xlsx', sheet_name = '2017',header = 4, index_col = 4).iloc[0:41729,[5]]
#join two electricity
elec_pre_meter = elec_meter_2017.join(pre_elec_meter)

# calculate proportion of prepay electric meter
elec_pre_meter['prepay electric meter'] =  elec_pre_meter['Total meters']/elec_pre_meter['Number\nof meters\n']
#join lookup table to link with lsoa21
elec_pre_meter = elec_pre_meter.join(lsoa11_21, how = 'right')
# consider weight
elec_pre_meter['prepay electric meter'] = elec_pre_meter['prepay electric meter'] * elec_pre_meter['weight'] * 100
elec_pre_meter = elec_pre_meter.groupby('lsoa21').mean().fillna(0)
elec_pre_meter = elec_pre_meter[['prepay electric meter']]

* Property not connected to gas grid 

In [63]:
# 2021 not connect to gas grid
no_mains_gas = pd.read_excel('LSOA_estimates_of_properties_not_connected_to_the_gas_network_2015-2022.xlsx', sheet_name = '2022',header = 3, index_col = 4)
no_mains_gas['not connect to gas grid'] = no_mains_gas['Estimated percentage\nof properties not \non the gas grid'] * 100
no_mains_gas = no_mains_gas[['not connect to gas grid']]

* Households with universal credit

In [64]:
# population estimation
population_2021_EW = pd.read_csv('All population_LSOA21_EW.csv', header = 5, index_col = 1).iloc[0:35672][['2021']]
population_2021_EW.columns = ['Total population']
population_2021_S = pd.read_csv('2021Mid-year DZ population_age band.csv',index_col = 0)[['Total population']]
population_2021_S['Total population']= population_2021_S['Total population'].str.replace(',','')
population_2021_S = population_2021_S.astype('int')
# population estimation for GB
population_2021 = pd.concat([population_2021_EW,population_2021_S])
# universal credit 2021 gb
universal_credit_GB =  pd.read_csv('universal credit people 2021 GB.csv',header = 6, index_col = 0)

In [66]:
# Drop columns with all null values
universal_credit_GB = universal_credit_GB.dropna(axis=1, how='all')
# Drop rows with all null values
universal_credit_GB = universal_credit_GB.dropna(axis=0, how='all')
# Convert Column1 to numeric (errors='coerce' will replace non-numeric values with NaN)
universal_credit_GB['April 2021'] = pd.to_numeric(universal_credit_GB['April 2021'], errors='coerce')
# Drop rows with NaN values in Column1 (which were originally non-numeric or string values)
universal_credit_GB = universal_credit_GB.dropna(subset=['April 2021'])
# Drop the last rows with summary values
universal_credit_GB = universal_credit_GB.drop('Total', axis= 0)
# Measure average value
universal_credit_GB['2021 universal credit-avg'] = universal_credit_GB.mean(axis= 1)
universal_credit_GB = universal_credit_GB.rename(index = {'Shepway 015A': 'Shepway 014E', 'Shepway 015B':'Shepway 014F','Shepway 015C':'Shepway 014G','Shepway 015D':'Shepway 014H'})
universal_credit_GB = universal_credit_GB[['2021 universal credit-avg']].dropna()

In [68]:
# join with name-code lookup table, join with lsoa11-21 lookup table, join with population data
uc_GB = universal_credit_GB.join(GB_NM_CD).set_index('F_LSOA11CD').join(lsoa11_21)
uc_GB['2021 universal credit-avg'] = uc_GB['2021 universal credit-avg'] * uc_GB['weight'] 
uc_GB = uc_GB.groupby('lsoa21').mean().join(population_2021)
uc_GB['2021 uc-mean'] = uc_GB['2021 universal credit-avg']/uc_GB['Total population']*100
uc_GB = uc_GB[['2021 uc-mean']]

* Properties average domestic electricity consumption KWh per meter
* Properties average domestic gas consumption KWh per meter

In [69]:
gas_consumption = pd.read_excel('LSOA_domestic_gas_2010-2022.xlsx', sheet_name = '2022', header = 4, index_col = 4).iloc[:,[8]]
elc_consumption = pd.read_excel('LSOA_domestic_elec_2010-22.xlsx', sheet_name = '2022', header = 4, index_col = 4).iloc[:,[8]]

In [70]:
gas_consumption.columns = ['gas_consumption']
elc_consumption.columns = ['elc_consumption']

* Gross Disposable Household Income (GDHI) 

In [71]:
income_path = glob.glob('Income/*.xlsx')
income_df = [pd.read_excel(f, sheet_name= 'Table 3', header= 1, index_col=0, usecols= [0, 25]) for f in income_path]
# Income data merged for GB
income_gb = pd.concat(income_df)
# Unify spatial scale to 2021 LSOAs and reweight income data
income_gb = income_gb.join(lsoa11_21)
income_gb[2021] = income_gb[2021]  * income_gb['weight'] 
income_gb = income_gb.groupby('lsoa21').mean()
income_gb = income_gb.drop('weight', axis= 1)

In [79]:
pd.concat([gas_consumption,elc_consumption,elec_pre_meter, no_mains_gas, uc_GB, income_gb], axis = 1)\
.to_csv('Vars_gov.csv')