## Visualization Preparation

In [77]:
import numpy as np
import pandas as pd

from geopy.geocoders import Nominatim

### Read Data

In [78]:
listings_df = pd.read_csv('../data/processed/processed_listings.csv')
listings_df.sample(5)

Unnamed: 0,Subtype,Style,Living Area,Lot Dimensions,Bedrooms,Bathrooms,Levels,Listing Date,Listing Year,Year of Construction,Location,RCM,Region,Price
108986,2 Storey,Custom Home,2840,5000,4,2,2,2016-10-01,2016,2015,"Senneville, Baie-D'Urfé & Saint-Anne-de-Bellevue",Montréal,Montréal,550000
102321,Bungalow,New construction,1415,6175,4,2,1,2018-01-01,2018,2014,Saint-Lambert,Longueuil,Montérégie,798750
107209,Semi-detached,2 storey,1560,3172,5,2,2,2021-12-01,2021,2016,La Côte-de-Beaupré & L'Île-d'Orléans,La Côte-de-Beaupré,Capitale-Nationale,610000
98481,Bungalow,Cathedral roof,1025,7474,3,2,1,2018-03-01,2018,1990,Le Fjord-du-Saguenay & Maria-Chapdelaine,Le Fjord-du-Saguenay,Saguenay-Lac-Saint-Jean,140000
40831,Bungalow,Open area,1090,9522,3,2,1,2011-02-01,2011,1997,Gatineau,Gatineau,Outaouais,222000


In [79]:
population_centers_df = pd.read_csv('../data/references/handmade/qc-population-centers.csv')
population_centers_df = population_centers_df[["RCM", "Region", "Bounding Territory", "Display Name", "Bounding Population"]]
population_centers_df = population_centers_df.drop_duplicates().rename(columns={"Display Name": "Location", "Bounding Population": "Population"}).set_index("Location")
population_centers_df.sample(10)

Unnamed: 0_level_0,RCM,Region,Bounding Territory,Population
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
La Haute-Yamaska,La Haute-Yamaska,Montérégie,La Haute-Yamaska,90000
Bécancour & Nicolet,Bécancour,Centre-du-Québec,"[Nicolet-Yamaska,Bécancour (MRC)]",40000
Beauport,Québec,Capitale-Nationale,Beauport,75000
Mirabel,Mirabel,Laurentides,Mirabel,60000
Boucherville,Longueuil,Montérégie,Boucherville,40000
Les Jardins-de-Napierville & Le Haut-Saint-Laurent,Le Haut-Saint-Laurent,Montérégie,"[Les Jardins-de-Napierville,Le Haut-Saint-Laur...",50000
L'Islet & Montmagny,L'Islet,Chaudière-Appalaches,"[L'Islet,Montmagny]",40000
Montréal-Nord,Montréal,Montréal,Montréal-Nord,85000
Ville-Marie,Montréal,Montréal,Ville-Marie,90000
Fabreville,Laval,Laval,Fabreville,45000


### Location

Group by Location, calculate Mean

In [80]:
location_df = listings_df.groupby('Location').mean().astype(int).reset_index()

Find most common Subtype for each Location

In [81]:
subtypes = listings_df.groupby('Location')['Subtype'].apply(lambda x: x.mode().iloc[0])
location_df = pd.merge(subtypes, location_df, left_index=True, right_on="Location")

Number of listings per Location

In [82]:
location_df.insert(0, 'Nb of listings', '')
location_value_counts = listings_df['Location'].value_counts()
location_df['Nb of listings'] = location_df['Location'].map(location_value_counts).astype(int)

Price Range for each Location

In [83]:
conditions = [(location_df['Price'] >= 0) & (location_df['Price'] < 200000), (location_df['Price'] >= 200000) & (
    location_df['Price'] < 300000), (location_df['Price'] >= 300000) & (location_df['Price'] < 400000), location_df['Price'] >= 400000]
values = ['0-200k', '200k-300k', '300k-400k', '>400k']

location_df['Price Range'] = np.select(conditions, values, default=0)

Merge with Population Centers DataFrame

In [84]:
location_df = location_df.set_index("Location")
location_df = pd.merge(population_centers_df, location_df, left_index=True, right_index=True)

In [85]:
location_df.sample(5)

Unnamed: 0_level_0,RCM,Region,Bounding Territory,Population,Nb of listings,Subtype,Living Area,Lot Dimensions,Bedrooms,Bathrooms,Levels,Listing Year,Year of Construction,Price,Price Range
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Pontiac & La Vallée-de-la-Gatineau,La Vallée-de-la-Gatineau,Outaouais,"[La Vallée-de-la-Gatineau,Pontiac]",35000,182,Bungalow,1432,24328,3,1,1,2015,1986,238759,200k-300k
Trois-Rivières,Trois-Rivières,Mauricie,Trois-Rivières,135000,2149,Bungalow,1304,8711,3,1,1,2016,1990,203470,200k-300k
Le Haut-Richelieu,Le Haut-Richelieu,Montérégie,Le Haut-Richelieu,120000,1365,Bungalow,1304,9067,3,1,1,2015,1993,281648,200k-300k
Villeray-Saint-Michel-Parc-Extension,Montréal,Montréal,Villeray-Saint-Michel-Parc-Extension,145000,454,Condominium,951,6941,2,1,1,2015,1984,339271,300k-400k
LaSalle,Montréal,Montréal,LaSalle,75000,182,Condominium,1174,7092,2,1,1,2014,1990,323040,300k-400k


Save Data

In [86]:
location_df.to_csv('../data/processed/visualization/locations.csv')