## Visualization Preparation

In [81]:
import numpy as np
import pandas as pd

from geopy.geocoders import Nominatim

### Read Data

In [82]:
listings_df = pd.read_csv('../data/processed/processed_listings.csv')
listings_df.sample(5)

Unnamed: 0,Subtype,Style,Living Area,Lot Dimensions,Bedrooms,Bathrooms,Levels,Listing Date,Listing Year,Year of Construction,Location,RCM,Region,Price
13840,2 Storey,Detached,1855,7200,5,2,2,2017-09-01,2017,2005,Thérèse-De Blainville,Thérèse-De Blainville,Laurentides,395000
43920,Semi-detached,2 storey,2400,4471,4,2,2,2021-12-01,2021,1989,Gatineau,Gatineau,Outaouais,360000
102779,Bungalow,Detached,2184,17089,3,2,1,2012-01-01,2012,2008,Les Pays-d'en-Haut,Les Pays-d'en-Haut,Laurentides,381000
9241,2 Storey,2 storey,1191,4076,3,1,2,2020-12-01,2020,2004,Beauport,Québec,Capitale-Nationale,332500
98732,Bungalow,Detached,1008,12900,4,2,1,2017-03-01,2017,1973,Le Haut-Richelieu,Le Haut-Richelieu,Montérégie,224000


In [83]:
population_centers_df = pd.read_csv('../data/references/handmade/qc-population-centers.csv')
population_centers_df = population_centers_df[["Region", "Bounding Territory", "Display Name", "Bounding Population", "GeoPy Index"]]
population_centers_df = population_centers_df.drop_duplicates().rename(columns={"Display Name": "Location", "Bounding Population": "Population"}).set_index("Location")
population_centers_df.sample(10)

Unnamed: 0_level_0,Region,Bounding Territory,Population,GeoPy Index
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Brome-Missisquoi,Montérégie,Brome-Missisquoi,60000,0
Bellechasse,Chaudière-Appalaches,Bellechasse,40000,0
Ville de Mont-Royal,Montréal,Ville de Mont-Royal,20000,0
L'Île-Bizard-Sainte-Geneviève,Montréal,L'Île-Bizard-Sainte-Geneviève,20000,0
Saint-Lambert,Montérégie,Saint-Lambert,20000,0
Saint-Augustin-de-Desmaures,Capitale-Nationale,Saint-Augustin-de-Desmaures,20000,0
Beauce & Les Etchemins,Chaudière-Appalaches,"[La Nouvelle-Beauce,Robert-Cliché,Beauce-Sarti...",120000,0
Bécancour & Nicolet,Centre-du-Québec,"[Nicolet-Yamaska,Bécancour (MRC)]",40000,0
Pontiac & La Vallée-de-la-Gatineau,Outaouais,"[La Vallée-de-la-Gatineau,Pontiac]",35000,0
"Duvernay, Saint-Vincent-de-Paul & Saint-François",Laval,"[Duvernay,Saint-Vincent-de-Paul,Saint-François...",65000,0


### Location

Group by Location, calculate Mean

In [84]:
location_df = listings_df.groupby('Location').mean().astype(int).reset_index()

Find most common Subtype for each Location

In [85]:
subtypes = listings_df.groupby('Location')['Subtype'].apply(lambda x: x.mode().iloc[0])
location_df = pd.merge(subtypes, location_df, left_index=True, right_on="Location")

Number of listings per Location

In [86]:
location_df.insert(0, 'Nb of listings', '')
location_value_counts = listings_df['Location'].value_counts()
location_df['Nb of listings'] = location_df['Location'].map(location_value_counts).astype(int)

Price Range for each Location

In [87]:
conditions = [location_df['Price'] < 210000, 
(location_df['Price'] >= 210000) & (location_df['Price'] < 250000), 
(location_df['Price'] >= 250000) & (location_df['Price'] < 275000), 
(location_df['Price'] >= 275000) & (location_df['Price'] < 300000), 
(location_df['Price'] >= 300000) & (location_df['Price'] < 320000), 
(location_df['Price'] >= 320000) & (location_df['Price'] < 350000), 
(location_df['Price'] >= 350000) & (location_df['Price'] < 400000), 
location_df['Price'] >= 400000]

values = ['0-210k', '210k-250k', '250k-275k', '275k-300k', '300k-320k', '320k-350k', '350k-400k', '>400k']

location_df['Price Range'] = np.select(conditions, values, default=0)

Merge with Population Centers DataFrame

In [88]:
location_df = location_df.set_index("Location")
location_df = pd.merge(population_centers_df, location_df, left_index=True, right_index=True)

In [89]:
location_df.sample(5)

Unnamed: 0_level_0,Region,Bounding Territory,Population,GeoPy Index,Nb of listings,Subtype,Living Area,Lot Dimensions,Bedrooms,Bathrooms,Levels,Listing Year,Year of Construction,Price,Price Range
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Fabreville,Laval,Fabreville,45000,0,4565,Bungalow,1015,6126,3,1,1,2020,1989,385857,350k-400k
Bellechasse,Chaudière-Appalaches,Bellechasse,40000,0,709,Bungalow,1238,10566,3,1,1,2015,1996,244871,210k-250k
Antoine-Labelle,Laurentides,Antoine-Labelle,35000,2,162,Bungalow,1339,21105,3,1,1,2016,1989,230425,210k-250k
Bas-Saint-Laurent,Bas-Saint-Laurent,Bas-Saint-Laurent,200000,0,1031,Bungalow,1258,10586,3,1,1,2016,1985,207676,0-210k
Le Plateau-Mont-Royal,Montréal,Le Plateau-Mont-Royal,100000,0,884,Condominium,1011,6834,2,1,1,2014,1958,389243,350k-400k


Save Data

In [90]:
location_df.to_csv('../data/processed/visualization/locations.csv')