# Visualization Prep
Preparing datasets needed for visualization

In [1]:
import numpy as np
import pandas as pd

from geopy.geocoders import Nominatim

In [2]:
listings_df = pd.read_csv('../data/processed/processed-listings.csv')
listings_df.sample(5)

Unnamed: 0,Subtype,Style,Living Area,Lot Dimensions,Bedrooms,Bathrooms,Levels,Listing Date,Listing Year,Year of Construction,Age,Location,Price
44207,Bungalow,Multi-generation,1100,32880,3,2,1,2015-05-01,2015,2004,11,Antoine-Labelle,225000
15840,Townhouse,Divided Co-Ownership,2098,2116,2,1,3,2020-01-01,2020,2016,4,Deux-Montagnes,374900
49415,2 Storey,Unknown,966,4033,5,2,2,2014-07-01,2014,1930,84,Trois-Rivières,126500
64318,2 Storey,2 storey,1955,7110,3,1,2,2010-10-01,2010,1994,16,L'Île-Bizard-Sainte-Geneviève,358000
84622,Condominium,Mezzanine,1298,1703,2,1,2,2016-11-01,2016,1999,17,L'Assomption,224900


In [3]:
bounding_df = pd.read_csv('../data/references/handmade/bounding-territories.csv')
bounding_df = bounding_df.rename(columns={"Display Name": "Location", "Bounding Population": "Population"}).set_index("Location")
bounding_df.sample(10)

Unnamed: 0_level_0,Bounding Territory,Bounding Type,Population,GeoPy Index
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
D'Autray,D'Autray,Regional County Municipality (RCM),40000,0
Papineau & Les Collines-de-l'Outaouais,"[Les Collines-de-l'Outaouais,Papineau]",Regional County Municipality (RCM),60000,0
Laval-des-Rapides,"[Laval-des-Rapides,Pont-Viau Laval]",Laval Borough,50000,0
Beauce & Les Etchemins,"[La Nouvelle-Beauce,Robert-Cliché,Beauce-Sarti...",Regional County Municipality (RCM),120000,0
Memphrémagog,Memphrémagog,Regional County Municipality (RCM),50000,0
La Haute-Yamaska,La Haute-Yamaska,Regional County Municipality (RCM),90000,0
Shawinigan,Shawinigan,Municipality,50000,1
Sainte-Rose,Sainte-Rose,Laval Borough,35000,1
Sainte-Dorothée & Laval-Ouest,"[Sainte-Dorothée,Laval-Ouest,Îles Laval,Laval-...",Laval Borough,45000,0
Hull,Hull,Municipality,70000,0


Group by Location, calculate Mean

In [4]:
location_df = listings_df.groupby('Location').mean().astype(int).reset_index()

Find most common Subtype for each Location

In [5]:
subtypes = listings_df.groupby('Location')['Subtype'].apply(lambda x: x.mode().iloc[0])
location_df = pd.merge(subtypes, location_df, left_index=True, right_on="Location")

Number of listings per Location

In [6]:
location_df.insert(0, 'Nb of listings', '')
location_value_counts = listings_df['Location'].value_counts()
location_df['Nb of listings'] = location_df['Location'].map(location_value_counts).astype(int)

Price Range for each Location

In [7]:
conditions = [location_df['Price'] < 210000, 
(location_df['Price'] >= 210000) & (location_df['Price'] < 250000), 
(location_df['Price'] >= 250000) & (location_df['Price'] < 275000), 
(location_df['Price'] >= 275000) & (location_df['Price'] < 300000), 
(location_df['Price'] >= 300000) & (location_df['Price'] < 320000), 
(location_df['Price'] >= 320000) & (location_df['Price'] < 350000), 
(location_df['Price'] >= 350000) & (location_df['Price'] < 400000), 
location_df['Price'] >= 400000]

values = ['0-210k', '210k-250k', '250k-275k', '275k-300k', '300k-320k', '320k-350k', '350k-400k', '>400k']

location_df['Price Range'] = np.select(conditions, values, default=0)

Merge with Bounding

In [8]:
location_df = location_df.set_index("Location")
location_df = pd.merge(bounding_df, location_df, left_index=True, right_index=True)

In [9]:
location_df.sample(5)

Unnamed: 0_level_0,Bounding Territory,Bounding Type,Population,GeoPy Index,Nb of listings,Subtype,Living Area,Lot Dimensions,Bedrooms,Bathrooms,Levels,Listing Year,Year of Construction,Age,Price,Price Range
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Mercier-Hochelaga-Maisonneuve,Mercier-Hochelaga-Maisonneuve,Montréal Borough,135000,0,1451,Condominium,1084,6880,2,1,1,2015,1989,25,306592,300k-320k
L'Île-Bizard-Sainte-Geneviève,L'Île-Bizard-Sainte-Geneviève,Montréal Borough,20000,0,117,2 Storey,1526,7406,3,1,1,2014,1988,26,376597,350k-400k
Brome-Missisquoi,Brome-Missisquoi,Regional County Municipality (RCM),60000,0,923,Bungalow,1416,13165,3,1,1,2016,1994,21,300308,300k-320k
Verdun,Verdun,Montréal Borough,70000,0,516,Condominium,1112,7553,2,1,1,2014,1985,29,374386,350k-400k
Les Rivières,Les Rivières,Québec City Borough,75000,0,1407,Condominium,1171,6675,2,1,1,2015,1996,18,250244,250k-275k


In [10]:
location_df.to_csv('../data/processed/visualization/locations.csv')