# Dwellings Started and Completed 1980-2021 UK by Tenure and District

In [1]:
import pandas as pd
import numpy as np

The dataset is taken from ONS publication on Housing, Live Table 253: Housebuilding: permanent dwellings started and completed, by tenure and district 1980-2021.

The excel table contains some weird formatting, and empty columns, we import the dataset and remove unnecessary columns like National Totals since we are only interested in the data at the Local Authority level.
* DS: dwelllings started
* DC: dwellings completed
* P: private enterprise
* HA: housing association
* LA: local authority


The data wrangling is divided into 4 sections, 1980-1998, 1999-2010, 2011, and 2012-2021 due to changes in formatting

## Importing and cleaning data from 1980-1998

In [2]:
data1 = []
for x in range(18):
    df = pd.read_excel('LiveTable253.xlsx',sheet_name=x,header=1)
    df.rename(columns={'Unnamed: 0':'DLUHC Code','Unnamed: 1':'Former ONS','Unnamed: 2':'Current ONS','Unnamed: 3':'National Total',
    'Unnamed: 4':'LA Total','Unnamed: 5':'remove1','Unnamed: 6':'DS_P','Unnamed: 7':'DS_HA','Unnamed: 8':'DS_LA','Unnamed: 9':'DS_total',
    'Unnamed: 10':'remove2','Unnamed: 11':'DC_P','Unnamed: 12':'DC_HA','Unnamed: 13':'DC_LA','Unnamed: 14':'DC_total'}, inplace=True)
    df.drop(columns=['remove1','remove2','National Total'],index=2,inplace=True)
    df = df[~df.loc[:,'LA Total'].isnull()]
    df['year'] = 1981 + x
    df.reset_index(drop=True,inplace=True)
    for x in df.values.tolist():
        data1.append(x)
    
    
    

In [3]:
df1 = pd.DataFrame(data1)
df1.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,P0105,,,Bath,230,0,10,240,160,0,0,170,1981
1,U0110,,,Bristol,300,100,130,530,210,180,590,980,1981
2,C0115,,,Kingswood,280,90,40,410,560,30,60,650,1981
3,G0120,,,Northavon,280,10,70,370,350,80,120,550,1981
4,Q0125,,,Wansdyke,210,0,10,220,200,50,0,250,1981


## Importing and cleaning data from 1999-2010

In [4]:
data2 = []
for x in range(18,30):
    df = pd.read_excel('LiveTable253.xlsx',sheet_name=x,header=1)
    df.rename(columns={'Unnamed: 0':'DLUHC Code','Unnamed: 1':'Former ONS','Unnamed: 2':'Current ONS','Unnamed: 3':'National Total','Unnamed: 4':'Met Total',
    'Unnamed: 5':'LA Total','Unnamed: 6':'remove1','Unnamed: 7':'DS_P','Unnamed: 8':'DS_HA','Unnamed: 9':'DS_LA','Unnamed: 10':'DS_total',
    'Unnamed: 11':'remove2','Unnamed: 12':'DC_P','Unnamed: 13':'DC_HA','Unnamed: 14':'DC_LA','Unnamed: 15':'DC_total'}, inplace=True)
    df.drop(columns=['remove1','remove2','National Total','Met Total'],index=2,inplace=True)
    df = df[~df.loc[:,'LA Total'].isnull()]
    df['year'] = 1981 + x
    df.reset_index(drop=True,inplace=True)
    for x in df.values.tolist():
        data2.append(x)
    

In [5]:
df2 = pd.DataFrame(data2)
print(df2.shape)
df2.head()


(4220, 13)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,N1350,00EH,E06000005,Darlington UA,480,..,..,..,480,..,..,..,1999
1,H0724,00EB,E06000001,Hartlepool UA,420,0,0,420,380,20,0,400,1999
2,W0734,00EC,E06000002,Middlesbrough UA,230,90,0,320,340,70,0,410,1999
3,V0728,00EE,E06000003,Redcar and Cleveland UA,130,70,0,200,90,50,0,140,1999
4,H0738,00EF,E06000004,Stockton-on-Tees UA,880,80,0,960,970,20,0,990,1999


## Importing and cleaning data for 2011

In [6]:
data3 = []
for x in range(30,31):
    df = pd.read_excel('LiveTable253.xlsx',sheet_name=x,header=1)
    df.rename(columns={'Unnamed: 0':'DLUHC Code','Unnamed: 1':'Former ONS','Unnamed: 2':'Current ONS','Unnamed: 3':'National Total','Unnamed: 4':'Met Total',
    'Unnamed: 5':'LA Total','Unnamed: 6':'remove1','Unnamed: 7':'DS_P','Unnamed: 8':'DS_HA','Unnamed: 9':'DS_LA','Unnamed: 10':'DS_total',
    'Unnamed: 11':'remove2','Unnamed: 12':'remove3','Unnamed: 13':'DC_P','Unnamed: 14':'DC_HA','Unnamed: 15':'DC_LA','Unnamed: 16':'DC_total'}, inplace=True)
    df.drop(columns=['remove1','remove2','remove3','National Total','Met Total'],index=2,inplace=True)
    df = df[~df.loc[:,'LA Total'].isnull()]
    df['year'] = 1981 + x
    df.reset_index(drop=True,inplace=True)
    for x in df.values.tolist():
        data3.append(x)


In [7]:
df3 = pd.DataFrame(data3)
df3.replace('..',np.nan,inplace=True)
df3.dropna(inplace=True)
df3.head()


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,X1355,00EJ,E06000047,County Durham UA,800.0,300.0,0.0,1090.0,930.0,190.0,0.0,1110.0,2011
1,N1350,00EH,E06000005,Darlington UA,230.0,0.0,0.0,230.0,120.0,0.0,0.0,120.0,2011
2,H0724,00EB,E06000001,Hartlepool UA,140.0,90.0,0.0,230.0,140.0,0.0,0.0,150.0,2011
4,P2935,00EM,E06000048,Northumberland UA,400.0,80.0,0.0,480.0,520.0,30.0,0.0,550.0,2011
5,V0728,00EE,E06000003,Redcar and Cleveland UA,110.0,100.0,0.0,210.0,150.0,100.0,0.0,250.0,2011


## Importing and cleaning data from 2012-2021

In [8]:
data4 = []
for x in range(31,41):
    df = pd.read_excel('LiveTable253.xlsx',sheet_name=x,header=1)
    df.rename(columns={'Unnamed: 0': 'remove4','Unnamed: 1':'DLUHC Code','Unnamed: 2':'Former ONS','Unnamed: 3':'Current ONS','Unnamed: 4':'Met Total',
    'Unnamed: 5':'LA Total','Unnamed: 6':'remove1','Unnamed: 7':'DS_P','Unnamed: 8':'DS_HA','Unnamed: 9':'DS_LA','Unnamed: 10':'DS_total',
    'Unnamed: 11':'remove2','Unnamed: 12':'DC_P','Unnamed: 13':'DC_HA','Unnamed: 14':'DC_LA','Number of dwellings':'DC_total'}, inplace=True)
    df.drop(columns=['remove1','remove2','remove4','Met Total'],index=1, inplace=True)
    df = df[~df.loc[:,'LA Total'].isnull()]
    df['year'] = 1981+x
    df.reset_index(drop=True,inplace=True)
    for x in df.values.tolist():
        data4.append(x)


In [9]:
df4 = pd.DataFrame(data4)
print(df4.shape)
df4.head()


(3240, 13)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,F0114,00HA,E06000022,Bath and North East Somerset UA,330,200,0,530,100,180,0,280,2012
1,K0235,00KB,E06000055,Bedford UA,490,160,0,650,520,290,0,810,2012
2,M2372,00EX,E06000008,Blackburn with Darwen UA,180,40,0,220,70,0,0,70,2012
3,J2373,00EY,E06000009,Blackpool UA,10,30,0,40,20,20,0,40,2012
4,G1250,00HN,E06000028,Bournemouth UA,270,50,60,380,370,10,120,490,2012


## Merging all datasets and exporting to excel sheet

In [10]:
df_main = pd.concat([df1,df2,df3,df4])
df_main.rename(columns={0:'DLUHC Code',1:'Former ONS Code',2:'ONS Code',3:'Local Authority',4:'DS_P',5:'DS_HA',6:'DS_LA',7:'DS_Total',8:'DC_P',
9:'DC_HA',10:'DC_LA',11:'DC_Total',12:'year'}, inplace=True)
df_main.head()

Unnamed: 0,DLUHC Code,Former ONS Code,ONS Code,Local Authority,DS_P,DS_HA,DS_LA,DS_Total,DC_P,DC_HA,DC_LA,DC_Total,year
0,P0105,,,Bath,230,0,10,240,160,0,0,170,1981
1,U0110,,,Bristol,300,100,130,530,210,180,590,980,1981
2,C0115,,,Kingswood,280,90,40,410,560,30,60,650,1981
3,G0120,,,Northavon,280,10,70,370,350,80,120,550,1981
4,Q0125,,,Wansdyke,210,0,10,220,200,50,0,250,1981


In [11]:
df_main[df_main.loc[:,'Local Authority']=='Coventry']

Unnamed: 0,DLUHC Code,Former ONS Code,ONS Code,Local Authority,DS_P,DS_HA,DS_LA,DS_Total,DC_P,DC_HA,DC_LA,DC_Total,year
352,U4610,00CQ,E08000026,Coventry,230,170,10,420,290,80,350,730,1981
727,U4610,00CQ,E08000026,Coventry,330,160,130,610,300,230,100,630,1982
1102,U4610,00CQ,E08000026,Coventry,440,110,80,630,300,80,80,460,1983
1477,U4610,00CQ,E08000026,Coventry,530,120,30,680,490,190,150,830,1984
1852,U4610,00CQ,E08000026,Coventry,440,90,110,640,490,120,10,610,1985
2227,U4610,00CQ,E08000026,Coventry,390,20,60,470,430,100,40,570,1986
2601,U4610,00CQ,E08000026,Coventry,200,90,0,300,280,60,100,440,1987
2975,U4610,00CQ,E08000026,Coventry,300,210,0,510,230,60,30,320,1988
3349,U4610,00CQ,E08000026,Coventry,230,100,0,330,380,130,0,510,1989
3723,U4610,00CQ,E08000026,Coventry,270,80,0,350,220,100,0,320,1990


In [49]:
df_main.to_excel('New Dwellings by LA 1980-2021.xlsx')