## Visualization Preparation

In [41]:
import numpy as np
import pandas as pd

from geopy.geocoders import Nominatim

### Read Data

In [42]:
listings_df = pd.read_csv('../data/processed/processed_listings.csv')
listings_df.sample(5)

Unnamed: 0,Subtype,Style,Living Area,Lot Dimensions,Bedrooms,Bathrooms,Levels,Listing Date,Listing Year,Year of Construction,Location,RCM,Region,Price
38053,Townhouse,2 storey,1220,2575,3,1,2,2018-05-01,2018,1975,Gatineau,Gatineau,Outaouais,140000
89451,1 1/2 Storey,Waterfront,1460,9084,4,1,2,2018-05-01,2018,1978,Ville de Saguenay,Saguenay,Saguenay-Lac-Saint-Jean,291500
80351,Bungalow,Open area,1020,9084,2,1,1,2011-05-01,2011,2001,Antoine-Labelle,Antoine-Labelle,Laurentides,277500
90137,Bungalow,Detached,984,7644,4,2,1,2013-03-01,2013,1974,Ville de Saguenay,Saguenay,Saguenay-Lac-Saint-Jean,212000
104354,2 Storey,Townhouse,900,3224,3,1,2,2019-02-01,2019,2003,La Côte-de-Beaupré & L'Île-d'Orléans,La Côte-de-Beaupré,Capitale-Nationale,313000


In [43]:
population_centers_df = pd.read_csv('../data/references/handmade/qc-population-centers.csv')
population_centers_df = population_centers_df[["Region", "Bounding Territory", "Display Name", "Bounding Population", "GeoPy Index"]]
population_centers_df = population_centers_df.drop_duplicates().rename(columns={"Display Name": "Location", "Bounding Population": "Population"}).set_index("Location")
population_centers_df.sample(10)

Unnamed: 0_level_0,Region,Bounding Territory,Population,GeoPy Index
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Côte-des-Neiges-Notre-Dame-de-Grâce,Montréal,"[Côte-des-Neiges-Notre-Dame-de-Grâce,Hampstead]",170000,0
Montréal-Nord,Montréal,Montréal-Nord,85000,0
Laval-des-Rapides,Laval,"[Laval-des-Rapides,Pont-Viau Laval]",50000,0
"Senneville, Baie-D'Urfé & Saint-Anne-de-Bellevue",Montréal,"[Senneville,Baie-D'Urfé,Sainte-Anne-de-Bellevue]",10000,0
Le Domaine-du-Roy,Saguenay-Lac-Saint-Jean,Le Domaine-du-Roy,30000,0
La Haute-Saint-Charles,Capitale-Nationale,La Haute-Saint-Charles,85000,0
Ville de Saguenay,Saguenay-Lac-Saint-Jean,Saguenay,150000,1
Trois-Rivières,Mauricie,Trois-Rivières,135000,0
Abitibi-Témiscamingue,Abitibi-Témiscamingue,Abitibi-Témiscamingue,150000,0
D'Autray,Lanaudière,D'Autray,40000,0


### Location

Group by Location, calculate Mean

In [44]:
location_df = listings_df.groupby('Location').mean().astype(int).reset_index()

Find most common Subtype for each Location

In [45]:
subtypes = listings_df.groupby('Location')['Subtype'].apply(lambda x: x.mode().iloc[0])
location_df = pd.merge(subtypes, location_df, left_index=True, right_on="Location")

Number of listings per Location

In [46]:
location_df.insert(0, 'Nb of listings', '')
location_value_counts = listings_df['Location'].value_counts()
location_df['Nb of listings'] = location_df['Location'].map(location_value_counts).astype(int)

Price Range for each Location

In [47]:
conditions = [location_df['Price'] < 210000, 
(location_df['Price'] >= 210000) & (location_df['Price'] < 250000), 
(location_df['Price'] >= 250000) & (location_df['Price'] < 275000), 
(location_df['Price'] >= 275000) & (location_df['Price'] < 300000), 
(location_df['Price'] >= 300000) & (location_df['Price'] < 320000), 
(location_df['Price'] >= 320000) & (location_df['Price'] < 350000), 
(location_df['Price'] >= 350000) & (location_df['Price'] < 400000), 
location_df['Price'] >= 400000]

values = ['0-210k', '210k-250k', '250k-275k', '275k-300k', '300k-320k', '320k-350k', '350k-400k', '>400k']

location_df['Price Range'] = np.select(conditions, values, default=0)

Merge with Population Centers DataFrame

In [48]:
location_df = location_df.set_index("Location")
location_df = pd.merge(population_centers_df, location_df, left_index=True, right_index=True)

In [49]:
location_df.sample(5)

Unnamed: 0_level_0,Region,Bounding Territory,Population,GeoPy Index,Nb of listings,Subtype,Living Area,Lot Dimensions,Bedrooms,Bathrooms,Levels,Listing Year,Year of Construction,Price,Price Range
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Ville-Marie,Montréal,Ville-Marie,90000,1,443,Condominium,956,7933,1,1,1,2014,1977,342211,320k-350k
Drummond,Centre-du-Québec,Drummond,100000,0,1733,Bungalow,1270,9988,3,1,1,2016,1996,232144,210k-250k
"Senneville, Baie-D'Urfé & Saint-Anne-de-Bellevue",Montréal,"[Senneville,Baie-D'Urfé,Sainte-Anne-de-Bellevue]",10000,0,285,2 Storey,1334,7778,3,1,1,2016,1990,303445,300k-320k
Laval-des-Rapides,Laval,"[Laval-des-Rapides,Pont-Viau Laval]",50000,0,295,Condominium,1117,6457,2,1,1,2015,1980,308004,300k-320k
LaSalle,Montréal,LaSalle,75000,0,180,Condominium,1172,6909,2,1,1,2014,1990,321952,320k-350k


Save Data

In [50]:
location_df.to_csv('../data/processed/visualization/locations.csv')