In [1]:
import pandas as pd
import os
from pathlib import Path

In [4]:
# get parent directory using pathlib
parent_dir = Path(os.getcwd())

In [22]:
# remove dollar signs and commas
def clean_numeric_str(curr_str):
    if isinstance(curr_str, str):
        return(curr_str.replace('$', '').replace(',', '').replace('%', ''))
    return(curr_str)

In [23]:
# rental csv is a kaggle dataset available here
# https://www.kaggle.com/rajacsp/toronto-apartment-price
rental_csv = os.path.join(parent_dir, 'Resources', 'Toronto_apartment_rentals_2018.csv')
rental_df = pd.read_csv(rental_csv, thousands=',')

# clean up currency data types
rental_df['Price'] = rental_df['Price'].apply(clean_numeric_str).astype('float')

# check data types
#rental_df.dtypes

# see header
rental_df.head()

Unnamed: 0,Bedroom,Bathroom,Den,Address,Lat,Long,Price
0,2,2.0,0,"3985 Grand Park Drive, 3985 Grand Park Dr, Mis...",43.581639,-79.648193,2450.0
1,1,1.0,1,"361 Front St W, Toronto, ON M5V 3R5, Canada",43.643051,-79.391643,2150.0
2,1,1.0,0,"89 McGill Street, Toronto, ON, M5B 0B1",43.660605,-79.378635,1950.0
3,2,2.0,0,"10 York Street, Toronto, ON, M5J 0E1",43.641087,-79.381405,2900.0
4,1,1.0,0,"80 St Patrick St, Toronto, ON M5T 2X6, Canada",43.652487,-79.389622,1800.0


In [30]:
# 2016 census data by Toronto neighbourhood available in the City of Toronto Open Data Portal
census_csv = os.path.join(parent_dir, 'Resources', 'neighbourhoods-profiles-2016-csv-transposed.csv')
census_df = pd.read_csv(census_csv)

# clean up numeric data types
for column in census_df:
    try:
        census_df[column] = census_df[column].apply(clean_numeric_str).astype('float')
    except:
        continue
        
census_df

Unnamed: 0,Neighbourhood,Neighbourhood Number,TSNS2020 Designation,"Population, 2016","Population, 2011",Population Change 2011-2016,Total private dwellings,Private dwellings occupied by usual residents,Population density per square kilometre,Land area in square kilometres,...,External migrants,Total - Mobility status 5 years ago - 25% sample data,Non-movers.1,Movers.1,Non-migrants.1,Migrants.1,Internal migrants.1,Intraprovincial migrants.1,Interprovincial migrants.1,External migrants.1
0,City of Toronto,,,2731571.0,2615060.0,4.5,1179057.0,1112929.0,4334.0,630.20,...,59945.0,2556120.0,1516110.0,1040015.0,639060.0,400950.0,184120.0,141135.0,42985.0,216835.0
1,Agincourt North,129.0,No Designation,29113.0,30279.0,-3.9,9371.0,9120.0,3929.0,7.41,...,605.0,27490.0,18865.0,8610.0,5445.0,3170.0,880.0,735.0,135.0,2280.0
2,Agincourt South-Malvern West,128.0,No Designation,23757.0,21988.0,8.0,8535.0,8136.0,3034.0,7.83,...,490.0,22325.0,13565.0,8775.0,5610.0,3145.0,980.0,760.0,220.0,2170.0
3,Alderwood,20.0,No Designation,12054.0,11904.0,1.3,4732.0,4616.0,2435.0,4.95,...,70.0,11370.0,8235.0,3130.0,2200.0,925.0,680.0,615.0,70.0,245.0
4,Annex,95.0,No Designation,30526.0,29177.0,4.6,18109.0,15934.0,10863.0,2.81,...,835.0,27715.0,12980.0,14735.0,8340.0,6390.0,3930.0,2630.0,1310.0,2460.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136,Wychwood,94.0,No Designation,14349.0,13986.0,2.6,6185.0,5887.0,8541.0,1.68,...,225.0,12630.0,7930.0,4700.0,3355.0,1350.0,780.0,570.0,210.0,575.0
137,Yonge-Eglinton,100.0,No Designation,11817.0,10578.0,11.7,6103.0,5676.0,7162.0,1.65,...,325.0,11230.0,5785.0,5450.0,3020.0,2425.0,1260.0,970.0,290.0,1160.0
138,Yonge-St.Clair,97.0,No Designation,12528.0,11652.0,7.5,7475.0,7012.0,10708.0,1.17,...,265.0,11865.0,5660.0,6195.0,3895.0,2310.0,1355.0,1025.0,325.0,955.0
139,York University Heights,27.0,NIA,27593.0,27713.0,-0.4,11051.0,10170.0,2086.0,13.23,...,680.0,26110.0,14720.0,11400.0,6435.0,4965.0,1700.0,1490.0,195.0,3285.0
