# Visualization Prep
Preparing datasets needed for visualization

In [5]:
import numpy as np
import pandas as pd

from geopy.geocoders import Nominatim

In [6]:
listings_df = pd.read_csv('../data/processed/processed_listings.csv')
listings_df.sample(5)

Unnamed: 0,Subtype,Style,Living Area,Lot Dimensions,Bedrooms,Bathrooms,Levels,Listing Date,Listing Year,Year of Construction,Age,Location,Price
45809,Semi-detached,Unknown,1255,3270,4,1,2,2014-09-01,2014,1991,23,Gatineau,209000
44930,Semi-detached,Link,968,5589,4,1,1,2015-03-01,2015,1998,17,Trois-Rivières,145000
41037,2 Storey,Unknown,1600,5523,4,2,2,2015-12-01,2015,1997,18,Marguerite-D'Youville,385000
66911,2 Storey,Detached,1904,31821,3,2,2,2008-05-01,2008,2004,4,Le Haut-Richelieu,275000
70177,Condominium,Divided Co-Ownership,592,9283,1,1,1,2021-04-01,2021,2013,8,La Haute-Saint-Charles,122000


In [7]:
bounding_df = pd.read_csv('../data/references/handmade/bounding-territories.csv')
bounding_df = bounding_df.rename(columns={"Display Name": "Location", "Bounding Population": "Population"}).set_index("Location")
bounding_df.sample(10)

Unnamed: 0_level_0,Bounding Territory,Bounding Type,Population,GeoPy Index
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Le Haut-Richelieu,Le Haut-Richelieu,Regional County Municipality (RCM),120000,0
La Vallée-du-Richelieu,La Vallée-du-Richelieu,Regional County Municipality (RCM),125000,0
L'Île-Bizard-Sainte-Geneviève,L'Île-Bizard-Sainte-Geneviève,Montréal Borough,20000,0
"La Tuque, Mékinac & Les Chenaux","[La Tuque,Mékinac,Les Chenaux]",Regional County Municipality (RCM),40000,0
Argenteuil,Argenteuil,Regional County Municipality (RCM),30000,0
Côte-Nord,Côte-Nord,Administative Region,90000,0
Bas-Saint-Laurent,Bas-Saint-Laurent,Administative Region,200000,0
L'Islet & Montmagny,"[L'Islet,Montmagny]",Regional County Municipality (RCM),40000,1
Saint-Bruno-de-Montarville,Saint-Bruno-de-Montarville,Municipality,25000,0
Nord-du-Québec,Nord-du-Québec,Administative Region,45000,0


Group by Location, calculate Mean

In [8]:
location_df = listings_df.groupby('Location').mean().astype(int).reset_index()

Find most common Subtype for each Location

In [9]:
subtypes = listings_df.groupby('Location')['Subtype'].apply(lambda x: x.mode().iloc[0])
location_df = pd.merge(subtypes, location_df, left_index=True, right_on="Location")

Number of listings per Location

In [10]:
location_df.insert(0, 'Nb of listings', '')
location_value_counts = listings_df['Location'].value_counts()
location_df['Nb of listings'] = location_df['Location'].map(location_value_counts).astype(int)

Price Range for each Location

In [11]:
conditions = [location_df['Price'] < 210000, 
(location_df['Price'] >= 210000) & (location_df['Price'] < 250000), 
(location_df['Price'] >= 250000) & (location_df['Price'] < 275000), 
(location_df['Price'] >= 275000) & (location_df['Price'] < 300000), 
(location_df['Price'] >= 300000) & (location_df['Price'] < 320000), 
(location_df['Price'] >= 320000) & (location_df['Price'] < 350000), 
(location_df['Price'] >= 350000) & (location_df['Price'] < 400000), 
location_df['Price'] >= 400000]

values = ['0-210k', '210k-250k', '250k-275k', '275k-300k', '300k-320k', '320k-350k', '350k-400k', '>400k']

location_df['Price Range'] = np.select(conditions, values, default=0)

Merge with Bounding

In [12]:
location_df = location_df.set_index("Location")
location_df = pd.merge(bounding_df, location_df, left_index=True, right_index=True)

In [13]:
location_df.sample(5)

Unnamed: 0_level_0,Bounding Territory,Bounding Type,Population,GeoPy Index,Nb of listings,Subtype,Living Area,Lot Dimensions,Bedrooms,Bathrooms,Levels,Listing Year,Year of Construction,Age,Price,Price Range
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Argenteuil,Argenteuil,Regional County Municipality (RCM),30000,0,184,Bungalow,1409,14824,2,1,1,2016,1990,26,255578,250k-275k
Westmount,Westmount,Montréal Borough,20000,0,10,Condominium,1749,4793,3,1,1,2011,1945,65,675950,>400k
Le Plateau-Mont-Royal,Le Plateau-Mont-Royal,Montréal Borough,100000,0,868,Condominium,1013,6923,2,1,1,2014,1958,55,391287,350k-400k
Antoine-Labelle,Antoine-Labelle,Regional County Municipality (RCM),35000,2,243,Bungalow,1325,20356,2,1,1,2016,1990,26,232753,210k-250k
Maskinongé,Maskinongé (MRC),Regional County Municipality (RCM),35000,0,314,Bungalow,1447,16808,3,1,1,2016,1984,32,203863,0-210k


In [14]:
location_df.to_csv('../data/processed/visualization/locations.csv')