## Visualization Preparation

In [61]:
import numpy as np
import pandas as pd

from geopy.geocoders import Nominatim

### Read Data

In [62]:
listings_df = pd.read_csv('../data/processed/processed_listings.csv')
listings_df.sample(5)

Unnamed: 0,Subtype,Style,Living Area,Lot Dimensions,Bedrooms,Bathrooms,Levels,Listing Date,Listing Year,Year of Construction,Location,RCM,Region,Price
87882,Condominium,Divided Co-Ownership,541,9084,1,1,1,2017-01-01,2017,2014,Rosemont-La Petite-Patrie,Montréal,Montréal,210000
24663,Bungalow,Open area,1261,9500,2,1,1,2021-12-01,2021,1957,Portneuf,Portneuf,Capitale-Nationale,265000
11801,2 Storey,2 storey,1191,4076,3,1,2,2020-12-01,2020,2004,Beauport,Québec,Capitale-Nationale,332500
23134,Semi-detached,Unknown,881,5086,4,1,1,2021-12-01,2021,2018,Portneuf,Portneuf,Capitale-Nationale,242500
25485,Bungalow,Open area,1261,9500,2,1,1,2021-12-01,2021,1957,Portneuf,Portneuf,Capitale-Nationale,265000


In [63]:
population_centers_df = pd.read_csv('../data/references/handmade/qc-population-centers.csv')
population_centers_df = population_centers_df[["Region", "Bounding Territory", "Display Name", "Bounding Population", "GeoPy Index"]]
population_centers_df = population_centers_df.drop_duplicates().rename(columns={"Display Name": "Location", "Bounding Population": "Population"}).set_index("Location")
population_centers_df.sample(10)

Unnamed: 0_level_0,Region,Bounding Territory,Population,GeoPy Index
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Saint-Lambert,Montérégie,Saint-Lambert,20000,0
Dorval,Montréal,Dorval,20000,0
L'Ancienne-Lorette,Capitale-Nationale,L'Ancienne-Lorette,15000,0
Kirkland,Montréal,Kirkland,20000,0
Pierre-de-Saurel,Montérégie,Pierre-de-Saurel,50000,0
Roussillon,Montérégie,Roussillon,170000,0
Rosemont-La Petite-Patrie,Montréal,Rosemont-La Petite-Patrie,140000,0
La Cité-Limoilou,Capitale-Nationale,La Cité-Limoilou,100000,0
Deux-Montagnes,Laurentides,Deux-Montagnes,100000,1
L'Assomption,Lanaudière,L'Assomption (MRC),125000,0


### Location

Group by Location, calculate Mean

In [64]:
location_df = listings_df.groupby('Location').mean().astype(int).reset_index()

Find most common Subtype for each Location

In [65]:
subtypes = listings_df.groupby('Location')['Subtype'].apply(lambda x: x.mode().iloc[0])
location_df = pd.merge(subtypes, location_df, left_index=True, right_on="Location")

Number of listings per Location

In [66]:
location_df.insert(0, 'Nb of listings', '')
location_value_counts = listings_df['Location'].value_counts()
location_df['Nb of listings'] = location_df['Location'].map(location_value_counts).astype(int)

Price Range for each Location

In [67]:
conditions = [location_df['Price'] < 210000, 
(location_df['Price'] >= 210000) & (location_df['Price'] < 250000), 
(location_df['Price'] >= 250000) & (location_df['Price'] < 275000), 
(location_df['Price'] >= 275000) & (location_df['Price'] < 300000), 
(location_df['Price'] >= 300000) & (location_df['Price'] < 320000), 
(location_df['Price'] >= 320000) & (location_df['Price'] < 350000), 
(location_df['Price'] >= 350000) & (location_df['Price'] < 400000), 
location_df['Price'] >= 400000]

values = ['0-210k', '210k-250k', '250k-275k', '275k-300k', '300k-320k', '320k-350k', '350k-400k', '>400k']

location_df['Price Range'] = np.select(conditions, values, default=0)

Merge with Population Centers DataFrame

In [68]:
location_df = location_df.set_index("Location")
location_df = pd.merge(population_centers_df, location_df, left_index=True, right_index=True)

In [69]:
location_df.sample(5)

Unnamed: 0_level_0,Region,Bounding Territory,Population,GeoPy Index,Nb of listings,Subtype,Living Area,Lot Dimensions,Bedrooms,Bathrooms,Levels,Listing Year,Year of Construction,Price,Price Range
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Lac-Saint-Jean-Est,Saguenay-Lac-Saint-Jean,Lac-Saint-Jean-Est,50000,0,4535,2 Storey,1708,15232,4,1,1,2020,1989,271050,250k-275k
Vaudreuil-Soulanges,Montérégie,Vaudreuil-Soulanges,150000,0,822,2 Storey,1422,9815,3,1,1,2015,1999,327637,320k-350k
"Duvernay, Saint-Vincent-de-Paul & Saint-François",Laval,"[Duvernay,Saint-Vincent-de-Paul,Saint-François...",65000,0,351,Bungalow,1319,6105,3,1,1,2015,1983,344749,320k-350k
Le Haut-Richelieu,Montérégie,Le Haut-Richelieu,120000,0,1360,Bungalow,1305,8789,3,1,1,2015,1993,282129,275k-300k
Saint-Augustin-de-Desmaures,Capitale-Nationale,Saint-Augustin-de-Desmaures,20000,0,1441,2 Storey,1356,9535,3,1,1,2014,1995,293335,275k-300k


Save Data

In [70]:
location_df.to_csv('../data/processed/visualization/locations.csv')