This script reads data in from 3 datasets taken from the ABS website.  
It sorts this data and assembles it into a working DataFrame for further analysis.

The CSV datasets were found at https://explore.data.abs.gov.au/.  
Census 2021, G36 Dwelling structure, Postal Areas (POA)  
Census 2021, G41 Dwelling structure by number of bedrooms, Postal Areas (POA)  
Census 2021, G42 Dwelling structure by household composition and family composition, Postal Areas (POA)



In [36]:
# Dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import gmaps
import json
import os

In [37]:
# Read in 2 CSVs
# G41 Dwelling structure by number of bedrooms
dwell_path = "Resources/ABS_C21_G36_POA_1.0.0.csv"
dwell_df = pd.read_csv(dwell_path)

# G41 Dwelling structure by number of bedrooms
bedrooms_path = "Resources/ABS_C21_G41_POA_1.0.0.csv"
bedrooms_df = pd.read_csv(bedrooms_path)

# G42 Dwelling structure by household composition and family composition
comp_path = "Resources/ABS_C21_G42_POA_1.0.0.csv"
comp_df = pd.read_csv(comp_path)


# Dwelling Type by Postcode

In [44]:
# Convert region column to postcode
dwell_df["Postcode"] = dwell_df['REGION: Region'].astype(str).str[0:4]

# Filter to Persons only, totals only for dwelling types and remove non-postcode column
filt_dwell_df = dwell_df.loc[(dwell_df['SUM: Summation'] == "P: Persons") &
                ((dwell_df["DWTSTRD: Dwelling structure"] == "11: Separate house") |
                (dwell_df["DWTSTRD: Dwelling structure"] == "2: Semi-detached, row or terrace house, townhouse etc with: Total") |
                (dwell_df["DWTSTRD: Dwelling structure"] == "3: Flat or apartment: Total") |
                (dwell_df["DWTSTRD: Dwelling structure"] == "9: Other dwelling: Total")) &
                (dwell_df["Postcode"] != "9797"), :]

# Filter to required columns only, rename columns and return dwelling type count value as integers
filt_dwell_df = filt_dwell_df[["DWTSTRD: Dwelling structure", "Postcode", "STATE: State", "OBS_VALUE"]]
filt_dwell_df = filt_dwell_df.rename(columns={"OBS_VALUE": "Dwelling Type", "DWTSTRD: Dwelling structure": "Count"})
filt_dwell_df["Dwelling Type"] = filt_dwell_df["Dwelling Type"].astype(int)

# Pivot dataframe by Postcode
dwell_poa_df = filt_dwell_df.groupby(["Postcode", "Count"]).sum().unstack()

# Rename dwelling type columns
dwell_poa_df = dwell_poa_df.rename(columns={"11: Separate house": "House",
                                "2: Semi-detached, row or terrace house, townhouse etc with: Total": "Semi-detached",
                                "3: Flat or apartment: Total": "Apartment",
                                "9: Other dwelling: Total": "Other dwelling"})
dwell_poa_df

Unnamed: 0_level_0,Dwelling Type,Dwelling Type,Dwelling Type,Dwelling Type
Count,House,Semi-detached,Apartment,Other dwelling
Postcode,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
0800,43,5,5948,3
0810,22676,2704,5969,84
0812,14511,1087,1525,121
0820,7317,3466,7171,96
0822,20976,1426,194,426
...,...,...,...,...
7466,29,0,0,0
7467,1576,20,27,12
7468,573,5,11,3
7469,621,41,4,6


# Number of Bedrooms by Postcode

In [45]:
# Convert region column to postcode
bedrooms_df["Postcode"] = bedrooms_df['REGION: Region'].astype(str).str[0:4]

# Filter to columns with bedroom counts only and remove non-postcode row
filt_bedrooms_df = bedrooms_df.loc[(bedrooms_df["BEDD: Number of bedrooms in private dwelling"] != '_N: Number of bedrooms not stated') &
                    (bedrooms_df["BEDD: Number of bedrooms in private dwelling"] != '_T: Total') &
                    (bedrooms_df["Postcode"] != "9797"), :]

# Filter to required columns only and rename columns
filt_bedrooms_df = filt_bedrooms_df[["BEDD: Number of bedrooms in private dwelling", "Postcode", "OBS_VALUE"]]
filt_bedrooms_df = filt_bedrooms_df.rename(columns={"OBS_VALUE": "Number of Bedrooms", "BEDD: Number of bedrooms in private dwelling": "Count"})

# Pivot dataframe by Postcode
bedrooms_poa_df = filt_bedrooms_df.groupby(["Postcode", "Count"]).sum().unstack()

# Rename number of bedrooms columns
bedrooms_poa_df = bedrooms_poa_df.rename(columns={"0: None (includes studio apartments or bedsitters)": "No bedrooms (studios, etc)",
                                "1: 1 bedroom": "1 bedroom",
                                "2: 2 bedrooms": "2 bedrooms",
                                "3: 3 bedrooms": "3 bedrooms",
                                "4: 4 bedrooms": "4 bedrooms",
                                "5: 5 bedrooms": "5 bedrooms",
                                "6: 6 or more bedrooms": "6 or more bedrooms"})
bedrooms_poa_df


Unnamed: 0_level_0,Number of Bedrooms,Number of Bedrooms,Number of Bedrooms,Number of Bedrooms,Number of Bedrooms,Number of Bedrooms,Number of Bedrooms
Count,"No bedrooms (studios, etc)",1 bedroom,2 bedrooms,3 bedrooms,4 bedrooms,5 bedrooms,6 or more bedrooms
Postcode,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
0800,172,1856,3749,2805,168,12,6
0810,207,1768,8552,9360,5484,1260,311
0812,67,448,3389,6590,2353,543,95
0820,167,1463,7566,7580,2372,592,168
0822,216,873,3083,4392,1811,344,183
...,...,...,...,...,...,...,...
7466,0,0,10,12,0,0,0
7467,5,84,319,987,218,26,10
7468,0,33,145,251,116,6,14
7469,3,62,126,412,80,14,0


# Household Composition by Postcode

In [46]:
comp_df["HHCFMCD: Household and Family composition"].unique()

array(['5: Family household: Total', '_T: Total',
       '3: Family household: One parent family', '7: Group household',
       '4: Family household: Other family', '6: Lone person household',
       '2: Family household: Couple family with children',
       '1: Family household: Couple family with no children'],
      dtype=object)

In [47]:
# Convert region column to postcode
comp_df["Postcode"] = comp_df['REGION: Region'].astype(str).str[0:4]

# Filter to columns with household composition counts only and remove non-postcode row
filt_comp_df = comp_df.loc[(comp_df["HHCFMCD: Household and Family composition"] != '5: Family household: Total') &
                    (comp_df["HHCFMCD: Household and Family composition"] != '_T: Total') &
                    (comp_df["Postcode"] != "9797"), :]

# Filter to required columns only and rename columns
filt_comp_df = filt_comp_df[["HHCFMCD: Household and Family composition", "Postcode", "OBS_VALUE"]]
filt_comp_df = filt_comp_df.rename(columns={"OBS_VALUE": "Household Composition", "HHCFMCD: Household and Family composition": "Count"})

# Pivot dataframe by Postcode
comp_poa_df = filt_comp_df.groupby(["Postcode", "Count"]).sum().unstack()

# Rename number of bedrooms columns
comp_poa_df = comp_poa_df.rename(columns={"1: Family household: Couple family with no children": "Couple with no children",
                                "2: Family household: Couple family with children": "Couple with children",
                                "3: Family household: One parent family": "One parent with children",
                                "4: Family household: Other family": "Other family",
                                "6: Lone person household": "Sole person household",
                                "7: Group household": "Group household"})
comp_poa_df


Unnamed: 0_level_0,Household Composition,Household Composition,Household Composition,Household Composition,Household Composition,Household Composition
Count,Couple with no children,Couple with children,One parent with children,Other family,Sole person household,Group household
Postcode,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
0800,2874,1181,395,134,3420,910
0810,6553,8881,2893,425,6996,1737
0812,3237,4486,2032,204,3249,592
0820,5792,4755,1644,220,5987,1843
0822,2456,4369,1989,404,1921,268
...,...,...,...,...,...,...
7466,6,10,0,0,6,8
7467,450,272,170,8,731,48
7468,181,114,38,0,229,18
7469,188,110,74,0,305,15


# Output

In [50]:
# Create master dataframe
master_df = pd.concat([dwell_poa_df, bedrooms_poa_df,comp_poa_df], axis=1)

# Rename axes
master_df.index.names = ["Postcode"]
master_df.columns.names = ["",""]
#master_df.columns = master_df.columns.droplevel(0)

# Write master dataframe to CSV
master_df.to_csv("Output/master_data.csv")
