In [1]:
from pathlib import Path
import pandas as pd
ROOT = Path('../..')
ROOT.resolve()

PosixPath('/Users/lukestrange/Code/housing')

In [2]:
data = pd.read_csv(ROOT / 'raw/affordable-homes/affordable_homes_open_data_202223')

In [3]:
# Remove un-needed columns
data.drop(columns=['LA code', 'LA name', 'District name', 'Metropolitan code', 'Metropolitan name', 'Region code', 'Region name', 'LA type 202223'], inplace=True)

In [4]:
# Rename column
data.rename(columns={'LA code 202223': 'geography_code', 'LA name 202223': 'geography_name'}, inplace=True)

In [5]:
# We are only interested in completions for now
data = data.loc[data.Completions=='Completion']

In [6]:
# Sum values per year, per local authority, per tenure type
local_authority = data.groupby(['geography_code', 'geography_name', 'Year', 'Tenure']).sum(numeric_only=True).reset_index()

local_authority_wide = local_authority.pivot(index=['geography_code', 'geography_name', 'Year'], columns='Tenure', values='Units')

# Add a column for all affordable homes, which is the sum of all the tenure types.
local_authority_wide['All afforable'] = local_authority_wide.sum(axis=1)

In [7]:
# Calculate the values for the whole of England
all_england = local_authority.groupby(['Year', 'Tenure']).sum(numeric_only=True).reset_index()

# Pivot to wide format
all_england_wide = all_england.pivot(index='Year', columns='Tenure', values='Units')

# Add a column for all affordable homes, which is the sum of all the tenure types.
all_england_wide['All afforable'] = all_england_wide.sum(axis=1)

# Add the geo code for England and append it to the index.
all_england_wide['geography_code'] = 'E92000001'
all_england_wide['geography_name'] = 'England'
all_england_wide.set_index(['geography_code', 'geography_name'], append=True, inplace=True)
all_england_wide = all_england_wide.reorder_levels(['geography_code', 'geography_name', 'Year'])

In [8]:
# Concatenate the dataframes
combined = pd.concat([local_authority_wide, all_england_wide])

In [9]:
# Write the files to CSV
combined.to_csv(ROOT / 'data/affordable-homes/by_tenure.csv')
combined.to_parquet(ROOT / 'data/affordable-homes/by_tenure.parquet')
# all_england.to_csv('data/affordable-homes/by_tenure_england.csv')

In [10]:
combined

Unnamed: 0_level_0,Unnamed: 1_level_0,Tenure,Affordable Home Ownership,Affordable Rent,First Homes,Intermediate Rent,London Affordable Rent,Shared Ownership,Social Rent,Unknown,All afforable
geography_code,geography_name,Year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
E06000001,Hartlepool,1991-92,13.0,,,,,,134.0,,147.0
E06000001,Hartlepool,1992-93,29.0,,,,,,204.0,,233.0
E06000001,Hartlepool,1993-94,20.0,,,,,,123.0,,143.0
E06000001,Hartlepool,1994-95,22.0,,,,,,149.0,,171.0
E06000001,Hartlepool,1995-96,23.0,,,,,,134.0,,157.0
...,...,...,...,...,...,...,...,...,...,...,...
E92000001,England,2018-19,2460.0,28957.0,,1383.0,1002.0,17028.0,6363.0,33.0,57226.0
E92000001,England,2019-20,2108.0,28259.0,,1748.0,1797.0,18239.0,6766.0,43.0,58960.0
E92000001,England,2020-21,1134.0,23786.0,,2026.0,2102.0,16796.0,6051.0,28.0,51923.0
E92000001,England,2021-22,1047.0,26398.0,35.0,1477.0,3101.0,19338.0,7659.0,42.0,59097.0


# TODO 
Use a lookup to go from LAs to met counties and regions.