In [5]:
import pandas as pd
import numpy as np
import re

In [6]:
# Toronto = 35535
# Calgary = 48825
# Edmonton = 48835
# Montreal = 24462
# Vancouver = 59933

In [13]:
# Load all the data
da2021 = pd.read_csv("../../data_raw/DA_data/2001_da.csv")
da2016 = pd.read_csv("../../data_raw/DA_data/2016_da.csv")
da2011 = pd.read_csv("../../data_raw/DA_data/2011_da.csv")
da2006 = pd.read_csv("../../data_raw/DA_data/2006_da.csv")
da2001 = pd.read_csv("../../data_raw/DA_data/2001_da.csv")

In [4]:
def get_locations(df):
    """ 
    Keeps only the data from the relevant areas
    """
    return_df = df.copy()
    return_df = return_df[return_df["CMA_UID"].isin([35535, 48825, 48835, 24462, 59933])]
    return return_df

In [5]:
def simplify_col_names(data):
    """
    Use this to remove the code from all the column names
    Saves the data inplace
    """
    data.columns = [col.split(": ", 1)[1] if ": " in col else col for col in data.columns]

In [6]:
da2021 = get_locations(da2021)
da2016 = get_locations(da2016)
da2011 = get_locations(da2011)
da2006 = get_locations(da2006)
da2001 = get_locations(da2001)

## 2021 Census Data

In [7]:
# da2021.columns

In [8]:
# da2021.columns
col2021 = ['GeoUID', "CMA_UID", 'v_CA21_6: Population density per square kilometre', 'Dwellings',
       'v_CA21_434: Occupied private dwellings by structural type of dwelling data',
       'v_CA21_435: Single-detached house', 'v_CA21_436: Semi-detached house',
       'v_CA21_437: Row house', 'v_CA21_438: Apartment or flat in a duplex',
       'v_CA21_439: Apartment in a building that has fewer than five storeys',
       'v_CA21_440: Apartment in a building that has five or more storeys',
       'v_CA21_441: Other single-attached house',
       'v_CA21_442: Movable dwelling', 'v_CA21_4245: No bedrooms',
       'v_CA21_4246: 1 bedroom', 'v_CA21_4247: 2 bedrooms',
       'v_CA21_4248: 3 bedrooms', 'v_CA21_4249: 4 or more bedrooms',
       'v_CA21_4238: Owner', 'v_CA21_4239: Renter']

In [9]:
da2021 = da2021[col2021]

## 2016 Census Data

In [10]:
# da2016.columns

In [11]:
col2016 = ['GeoUID', "CMA_UID", 'v_CA16_406: Population density per square kilometre', 'Dwellings', 
       'v_CA16_408: Occupied private dwellings by structural type of dwelling data',
       'v_CA16_409: Single-detached house', 'v_CA16_412: Semi-detached house',
       'v_CA16_413: Row house', 'v_CA16_414: Apartment or flat in a duplex',
       'v_CA16_415: Apartment in a building that has fewer than five storeys',
       'v_CA16_410: Apartment in a building that has five or more storeys',
       'v_CA16_411: Other attached dwelling',
       'v_CA16_416: Other single-attached house',
       'v_CA16_417: Movable dwelling', 'v_CA16_4844: No bedrooms',
       'v_CA16_4845: 1 bedroom', 'v_CA16_4846: 2 bedrooms',
       'v_CA16_4847: 3 bedrooms', 'v_CA16_4848: 4 or more bedrooms',
       'v_CA16_4837: Owner', 'v_CA16_4838: Renter']

In [12]:
da2016 = da2016[col2016]

## 2011 Census Data

In [13]:
# da2011.columns

In [14]:
da2011["Population Density per square kilometre"] = da2011["Population"] / da2011["Area (sq km)"]

In [15]:
col2011 = ['GeoUID', "CMA_UID", 'Population Density per square kilometre', 
       'Dwellings',
       'v_CA11F_199: Total number of occupied private dwellings by structural type of dwelling',
       'v_CA11F_200: Single-detached house',
       'v_CA11F_204: Semi-detached house', 'v_CA11F_205: Row house',
       'v_CA11F_206: Apartment, duplex',
       'v_CA11F_207: Apartment, building that has fewer than five storeys',
       'v_CA11F_201: Apartment, building that has five or more storeys',
       'v_CA11F_203: Other dwelling',
       'v_CA11F_208: Other single-attached house',
       'v_CA11F_202: Movable dwelling', 'v_CA11N_2248: 0 to 1 bedroom',
       'v_CA11N_2249: 2 bedrooms', 'v_CA11N_2250: 3 bedrooms',
       'v_CA11N_2251: 4 or more bedrooms', 'v_CA11N_2253: Owner',
       'v_CA11N_2254: Renter', ]

In [16]:
da2011 = da2011[col2011]

## 2006 Census Data

In [17]:
da2006["Population Density per square kilometre"] = da2006["Population"] / da2006["Area (sq km)"]

In [18]:
# da2006.columns

In [19]:
col2006 = ['GeoUID',"CMA_UID",'Population Density per square kilometre',
       'Dwellings',
       'v_CA06_119: Total number of occupied private dwellings by structural type of dwelling - 100% data',
       'v_CA06_120: Single-detached house', 'v_CA06_121: Semi-detached house',
       'v_CA06_122: Row house', 'v_CA06_123: Apartment, duplex',
       'v_CA06_124: Apartment, building that has five or more storeys',
       'v_CA06_125: Apartment, building that has fewer than five storeys',
       'v_CA06_126: Other single-attached house',
       'v_CA06_127: Movable dwelling', 
       'v_CA06_100: Average number of bedrooms per dwelling',
       'v_CA06_102: Owned',
       'v_CA06_103: Rented'
       ]

In [20]:
da2006 = da2006[col2006]

## 2001 Census Data

In [21]:
da2001["Population Density per square kilometre"] = da2001["Population"] / da2001["Area (sq km)"]

In [22]:
# da2001.columns

In [23]:
col2001 = ['GeoUID', "CMA_UID", 'Population Density per square kilometre',
       'Dwellings', 
       'v_CA01_112: Total number of occupied private dwellings by structural type of dwelling',
       'v_CA01_113: Single-detached house', 'v_CA01_114: Semi-detached house',
       'v_CA01_115: Row house', 'v_CA01_116: Apartment, detached duplex',
       'v_CA01_118: Apartment, building that has fewer than five storeys',
       'v_CA01_117: Apartment, building that has five or more storeys',
       'v_CA01_119: Other single-attached house',
       'v_CA01_98: Average number of bedrooms per dwelling',
       'v_CA01_120: Movable dwelling','v_CA01_99: Owned', 'v_CA01_100: Rented']

In [24]:
da2001 = da2001[col2001]

## Rename Columns

In [25]:
simplify_col_names(da2001)
simplify_col_names(da2006)
simplify_col_names(da2011)
simplify_col_names(da2016)
simplify_col_names(da2021)

In [26]:
da2001 = da2001.rename(columns={
    "Total number of occupied private dwellings by structural type of dwelling": "Total Occupied Private Dwellings",
    "Apartment, detached duplex": 'Apartment, duplex',
})

In [27]:
da2006 = da2006.rename(columns={
    "Total number of occupied private dwellings by structural type of dwelling - 100% data": "Total Occupied Private Dwellings"
})

In [28]:
da2011 = da2011.rename(columns={
    "Total number of occupied private dwellings by structural type of dwelling": "Total Occupied Private Dwellings",
    "Owner": "Owned",
    "Renter": "Rented"
})

In [29]:
da2016 = da2016.rename(columns={
    "Population density per square kilometre": "Population Density per square kilometre",
    "Occupied private dwellings by structural type of dwelling data": "Total Occupied Private Dwellings",
    "Apartment in a building that has fewer than five storeys": "Apartment, building that has fewer than five storeys",
    "Apartment in a building that has five or more storeys": "Apartment, building that has five or more storeys",
    'Apartment or flat in a duplex': 'Apartment, duplex',
    "Owner": "Owned",
    "Renter": "Rented"
})

In [30]:
da2021 = da2021.rename(columns={
    "Population density per square kilometre": "Population Density per square kilometre",
    "Occupied private dwellings by structural type of dwelling data": "Total Occupied Private Dwellings",
    "Apartment in a building that has fewer than five storeys": "Apartment, building that has fewer than five storeys",
    "Apartment in a building that has five or more storeys": "Apartment, building that has five or more storeys",
    'Apartment or flat in a duplex': 'Apartment, duplex',
    "Owner": "Owned",
    "Renter": "Rented"
})

## Add Columns

In [31]:
da2021["0 to 1 bedroom"] = da2021["No bedrooms"] + da2021["1 bedroom"]
da2016["0 to 1 bedroom"] = da2016["No bedrooms"] + da2016["1 bedroom"]

## Export all the data

In [32]:
da2001.to_csv('../../data_raw/cleaned_data/2001_census_data.csv', index=False)
da2006.to_csv('../../data_raw/cleaned_data/2006_census_data.csv', index=False)
da2011.to_csv('../../data_raw/cleaned_data/2011_census_data.csv', index=False)
da2016.to_csv('../../data_raw/cleaned_data/2016_census_data.csv', index=False)
da2021.to_csv('../../data_raw/cleaned_data/2021_census_data.csv', index=False)