In [1]:
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
import pickle

# WMCA LA Code

In [2]:
# Get all local authority codes and corresponding council names
page = requests.get("https://epc.opendatacommunities.org/docs/api/domestic#domestic-local-authority").text
soup = BeautifulSoup(page)

table = soup.findAll('table')[3]
la_code_dict = {}

for tr in table.findAll('tr')[1:]:
   code, local_auth = tr.findAll('td')
   la_code_dict[local_auth.text] = code.text

# Get local authority codes for councils in WMCA
WMCA_councils = open("../data/raw/WMCA_council.txt").read().split(",")[:-1]
WMCA_code = [la_code_dict[i] for i in WMCA_councils]
WMCA = dict(zip(WMCA_code, WMCA_councils))

# Save codes for future use
with open('../data/raw/WMCA_council_code.pkl', 'wb') as f:
    pickle.dump(WMCA, f)

# Electricity consumption

In [2]:
# Electricity consumption data
elec_consump_df = pd.read_excel('../data/external/LSOA_domestic_elec_2010-20.xlsx', sheet_name="2020", header=4)
elec_consump_df.columns = [
        'la_code', 'la', 'msoa_code', 'msoa', 'lsoa_code', 'lsoa', 'num_meter', 'total_consumption', 'mean_counsumption', 'median_consumption'
        ]
# Filter for local authorities in WMCA
elec_consump_df = elec_consump_df[elec_consump_df['la_code'].isin(WMCA_code)]

FileNotFoundError: [Errno 2] No such file or directory: '../data/external/LSOA_domestic_elec_2010-20.xlsx'

In [3]:
elec_consump_df.groupby('local-authority')

NameError: name 'elec_consump_df' is not defined

In [7]:
# Post code to LSOA to MSOA converting data
postcode_df = pd.read_csv("../data/external/PCD_OA_LSOA_MSOA_LAD_AUG19_UK_LU.csv", low_memory=False, encoding='latin-1')

with open('../data/raw/WMCA_council_code.pkl', 'rb') as f:
    WMCA_code = pickle.load(f)

# Filter for local authorities in WMCA
postcode_df = postcode_df[postcode_df['ladcd'].isin(WMCA_code)]

In [13]:
# Merge data to get postcodes associated with each LSOA code
postcode_elec_consump_df = pd.merge(postcode_df, elec_consump_df, left_on="lsoa11cd", right_on="lsoa_code", how="left")
postcode_elec_consump_df = postcode_elec_consump_df[['pcds', 'la', 'la_code', 'msoa_code', 'msoa', 'lsoa_code', 'lsoa', 'num_meter', 'total_consumption', 'mean_counsumption',
       'median_consumption']]

# EPC Rating Data

In [16]:
# Make private
import os
from dotenv import load_dotenv, find_dotenv

# find .env automagically by walking up directories until it's found
dotenv_path = find_dotenv()

# load up the entries as environment variables
load_dotenv(dotenv_path)

AUTH_TOKEN = os.environ.get("EPC_AUTH_TOKEN")

In [17]:
def get_epc_data(postcode, num_rows=5000):
    """
    Pull data from Domestic Energy Performance Certificates API.

    Input:
    postcode(str): (1) Postcode 
    num_rows(int): Number of rows to pull. Max 5000 allowed at one time

    Output:
    (str): Pulled data from API

    """
    headers = {
        'Authorization': f'Basic {AUTH_TOKEN}',
        'Accept': 'application/json'
    }
    params = {
        'postcode': postcode,
        'size': num_rows
    }
    url = f'https://epc.opendatacommunities.org/api/v1/domestic/search'
    res = requests.get(url, headers=headers, params=params)
    return res.text

In [18]:
# Pull WMCA postcode data and save as CSV
result = list()

for code in postcode_elec_consump_df.pcds.unique()[:100]:
    requested_data = get_epc_data(code)
    if len(requested_data)!=0:
        result.extend(json.loads(requested_data)['rows'])

In [21]:
EPC_data = pd.DataFrame(result)

# Merge EPC and electricity consumption data on postcode
EPC_postcode_elec_consump = pd.merge(EPC_data, postcode_elec_consump_df, left_on="postcode", right_on="pcds", how="left")
EPC_postcode_elec_consump.drop(columns=["pcds", "address1", "address2", "address3", 'uprn-source'], inplace=True)

In [39]:
# Export postcodes
with open('../data/processed/WMCA_postcodes.pkl', 'wb') as fp:
    pickle.dump(EPC_postcode_elec_consump.postcode.unique(), fp)

# Fuel Poverty

In [21]:
fuel_poverty_df = pd.read_excel("../data/external/sub-regional-fuel-poverty-2022-tables.xlsx", sheet_name="Table 3", header=2)
fuel_poverty_df.drop(columns=["LSOA Name", "LA Code", "LA Name", "Region"], inplace=True)
fuel_poverty_df.columns = ["lsoa_code", "num_households", "num_households_fuel_poverty", "prop_households_fuel_poor"]

In [22]:
idx = fuel_poverty_df[fuel_poverty_df['num_households'].isna() == True].index

In [28]:
grouped_df = fuel_poverty_df.groupby('lsoa_code').mean()
mapping = dict(zip(fuel_poverty_df['lsoa_code'], ['A']*len(fuel_poverty_df)))

In [23]:
import numpy as np
mapping = dict(zip(fuel_poverty_df['lsoa_code'], np.arange(len(fuel_poverty_df))))

In [24]:
fuel_poverty_df.loc[idx, 'num_households'] = fuel_poverty_df.loc[idx, 'lsoa_code'].map(mapping)

In [25]:
fuel_poverty_df['num_households'].isna().sum()

0

In [27]:
fuel_poverty_df.loc[idx, 'num_households']

32844    32850.0
32845    32850.0
32846    32846.0
32847    32847.0
32848    32848.0
32849    32849.0
32850    32850.0
32851    32851.0
Name: num_households, dtype: float64

In [27]:
EPC_postcode_elec_consump_fuel_poverty = pd.merge(EPC_postcode_elec_consump, fuel_poverty_df, on="lsoa_code", how="left")

# UPRN

In [7]:
uprn_df = pd.read_csv("..\data\external\osopenuprn_202207_csv\osopenuprn_202206.csv")
uprn_df = uprn_df[['UPRN', 'LATITUDE', 'LONGITUDE']]

In [37]:
# Match column type with EPC UPRN so that we can merge
uprn_df.UPRN = uprn_df.UPRN.astype('object')

In [45]:
EPC_postcode_elec_consump_fuel_poverty_uprn = pd.merge(EPC_postcode_elec_consump_fuel_poverty, uprn_df, left_on="uprn", right_on="UPRN", how="left")
EPC_postcode_elec_consump_fuel_poverty_uprn.drop(columns=["UPRN", 'la', 'la_code'], inplace=True)

In [9]:
EPC_postcode_elec_consump_fuel_poverty_uprn.to_csv(f"../data/processed/data_{len(EPC_postcode_elec_consump_fuel_poverty_uprn)}.csv", index=False)