## Visualization Preparation

In [254]:
import numpy as np
import pandas as pd

from geopy.geocoders import Nominatim

### Read Data

In [255]:
listings_df = pd.read_csv('../data/processed/processed_listings.csv')
listings_df.sample(5)

Unnamed: 0,Subtype,Style,Living Area,Lot Dimensions,Bedrooms,Bathrooms,Levels,Listing Date,Listing Year,Year of Construction,Location,RCM,Region,Price
104131,2 Storey,2 storey,1800,9522,3,2,2,2011-03-01,2011,1990,Les Pays-d'en-Haut,Les Pays-d'en-Haut,Laurentides,200000
17730,Condominium,Divided Co-Ownership,1247,9522,2,2,1,2015-02-01,2015,1988,Brossard,Longueuil,Montérégie,305000
36470,Bungalow,Detached,960,6157,3,2,1,2021-11-01,2021,1990,Fabreville,Laval,Laval,400000
98652,Semi-detached,Semi-detached,724,1425,3,2,2,2019-04-01,2019,2007,Les Maskoutains & Acton,Les Maskoutains,Montérégie,230000
44997,Semi-detached,2 storey,2400,4471,4,2,2,2021-12-01,2021,1989,Gatineau,Gatineau,Outaouais,360000


In [256]:
population_centers_df = pd.read_csv('../data/references/handmade/qc-population-centers.csv')
population_centers_df = population_centers_df[["Region", "Bounding Territory", "Display Name", "Bounding Population", "GeoPy Index"]]
population_centers_df = population_centers_df.drop_duplicates().rename(columns={"Display Name": "Location", "Bounding Population": "Population"}).set_index("Location")
population_centers_df.sample(10)

Unnamed: 0_level_0,Region,Bounding Territory,Population,GeoPy Index
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
L'Assomption,Lanaudière,L'Assomption (MRC),125000,0
Marguerite-D'Youville,Montérégie,Marguerite-D'Youville,80000,0
Papineau & Les Collines-de-l'Outaouais,Outaouais,"[Les Collines-de-l'Outaouais,Papineau]",60000,0
Beauport,Capitale-Nationale,Beauport,75000,0
Coaticook,Estrie,Coaticook (MRC),20000,0
Lachine,Montréal,Arrondissement de Lachine,45000,0
Deux-Montagnes,Laurentides,Deux-Montagnes,100000,1
Nord-du-Québec,Nord-du-Québec,Nord-du-Québec,45000,0
Portneuf,Capitale-Nationale,Portneuf,55000,1
Vimont,Laval,Vimont,30000,1


### Location

Group by Location, calculate Mean

In [257]:
location_df = listings_df.groupby('Location').mean().astype(int).reset_index()

Find most common Subtype for each Location

In [258]:
subtypes = listings_df.groupby('Location')['Subtype'].apply(lambda x: x.mode().iloc[0])
location_df = pd.merge(subtypes, location_df, left_index=True, right_on="Location")

Number of listings per Location

In [259]:
location_df.insert(0, 'Nb of listings', '')
location_value_counts = listings_df['Location'].value_counts()
location_df['Nb of listings'] = location_df['Location'].map(location_value_counts).astype(int)

Price Range for each Location

In [260]:
conditions = [location_df['Price'] < 210000, 
(location_df['Price'] >= 210000) & (location_df['Price'] < 250000), 
(location_df['Price'] >= 250000) & (location_df['Price'] < 275000), 
(location_df['Price'] >= 275000) & (location_df['Price'] < 300000), 
(location_df['Price'] >= 300000) & (location_df['Price'] < 320000), 
(location_df['Price'] >= 320000) & (location_df['Price'] < 350000), 
(location_df['Price'] >= 350000) & (location_df['Price'] < 400000), 
location_df['Price'] >= 400000]

values = ['0-210k', '210k-250k', '250k-275k', '275k-300k', '300k-320k', '320k-350k', '350k-400k', '>400k']

location_df['Price Range'] = np.select(conditions, values, default=0)

Merge with Population Centers DataFrame

In [261]:
location_df = location_df.set_index("Location")
location_df = pd.merge(population_centers_df, location_df, left_index=True, right_index=True)

In [262]:
location_df.sample(5)

Unnamed: 0_level_0,Region,Bounding Territory,Population,GeoPy Index,Nb of listings,Subtype,Living Area,Lot Dimensions,Bedrooms,Bathrooms,Levels,Listing Year,Year of Construction,Price,Price Range
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Côte Saint-Luc,Montréal,"[Côte Saint-Luc,Montréal-Ouest]",40000,0,29,Condominium,1476,5209,3,1,1,2015,1960,499482,>400k
Kirkland,Montréal,Kirkland,20000,0,37,2 Storey,1651,6518,3,1,1,2014,1986,446991,>400k
Côte-Nord,Côte-Nord,Côte-Nord,90000,0,262,Bungalow,1313,12534,3,1,1,2016,1977,198099,0-210k
Thérèse-De Blainville,Laurentides,Thérèse-De Blainville,160000,0,961,2 Storey,1402,7593,3,1,1,2016,1997,336314,320k-350k
Saint-Augustin-de-Desmaures,Capitale-Nationale,Saint-Augustin-de-Desmaures,20000,0,1443,2 Storey,1355,9759,3,1,1,2014,1995,293248,275k-300k


Save Data

In [263]:
location_df.to_csv('../data/processed/visualization/locations.csv')