# Visualization Preparation
To visualize data, we need to prepare some dataframes that hold data such as mean price per location, mean price per listing date, listings per location, etc...

In [10]:
import numpy as np
import pandas as pd

### Read Data

In [11]:
listings_df = pd.read_csv('../data/processed/processed_listings.csv')
listings_df.head(10)

Unnamed: 0,subtype,style,living_area,lot_dimensions,bedrooms,bathrooms,levels,location,listing_year,age,yard_area,listing_date,year_of_construction,price
0,2 Storey,2 storey,1191,4076,3,1,2,Lévis,2020,16,2885,2020-12-01,2004,332500
1,Bungalow,Open area,1261,9500,2,1,1,Portneuf,2021,64,8239,2021-12-01,1957,265000
2,Townhouse,Unknown,1645,1360,3,1,3,Mercier,2021,15,0,2021-11-01,2006,612000
3,Bi-generation,Link,2024,17000,4,3,1,Stoneham-et-Tewkesbury,2021,2,14976,2021-12-01,2019,526500
4,Semi-detached,2 storey,2400,4471,4,2,2,Gatineau,2021,32,2071,2021-12-01,1989,360000
5,2 Storey,Unknown,1800,16090,5,2,2,Alma,2021,31,14290,2021-09-01,1990,284000
6,Bungalow,Detached,960,6157,3,2,1,Fabreville,2021,31,5197,2021-11-01,1990,400000
7,Semi-detached,2 storey,1560,3172,5,2,2,Saint-François,2021,5,1612,2021-12-01,2016,610000
8,Semi-detached,2 storey,1250,6322,4,2,2,Trois-Rivières,2021,16,5072,2021-12-01,2005,320000
9,Bungalow,Open area,1340,5500,4,2,1,Lévis,2021,34,4160,2021-11-01,1987,350000


### Location

In [12]:
location_df = listings_df.groupby('location').mean().astype(int).reset_index()

In [13]:
population_centers_coordinates = pd.read_csv('../data/processed/population_centers_coordinates.csv')
lat_dict = pd.Series(population_centers_coordinates.Latitude.values, index=population_centers_coordinates.Name).to_dict()
lon_dict = pd.Series(population_centers_coordinates.Longitude.values, index=population_centers_coordinates.Name).to_dict()

In [14]:
location_value_counts = listings_df['location'].value_counts()
location_df['nb_of_listings'] = location_df['location'].map(location_value_counts).astype(int)

In [15]:
location_df['latitude'] = location_df['location'].map(lat_dict)
location_df['longitude'] = location_df['location'].map(lon_dict)

In [16]:
mtl_island_df = pd.read_csv('../data/references/handmade/mtl-island.csv')
location_df['mtl_island'] = location_df['location'].isin(mtl_island_df['Name'])

In [17]:
location_df.head(20)

Unnamed: 0,location,living_area,lot_dimensions,bedrooms,bathrooms,levels,listing_year,age,yard_area,year_of_construction,price,nb_of_listings,latitude,longitude,mtl_island
0,Acton Vale,1399,11261,3,1,1,2016,36,9861,1980,225980,192,45.648177,-72.564952,False
1,Alma,1707,14974,4,1,1,2020,30,13267,1989,270800,4544,48.548887,-71.651459,False
2,Amos,1313,8831,3,1,1,2017,34,7517,1983,210140,25,48.571852,-78.116086,False
3,Anjou,1191,5533,2,1,1,2015,28,4351,1986,326092,175,45.604898,-73.546672,True
4,Auteuil,1313,6110,3,1,1,2015,21,4807,1993,311234,288,45.631013,-73.751998,False
5,Baie-Comeau,1208,9024,3,1,1,2016,39,7822,1976,176677,98,49.211837,-68.180141,False
6,Baie-Saint-Paul,1336,10658,3,1,1,2016,38,9322,1977,223198,146,47.444343,-70.505447,False
7,Beaconsfield,1911,9500,3,2,1,2013,43,7618,1970,495167,39,45.428977,-73.865439,True
8,Beauharnois,1241,8210,3,1,1,2017,29,6969,1987,263478,102,45.313978,-73.875834,False
9,Beloeil,1322,6812,3,1,1,2015,20,5496,1995,314847,554,45.564318,-73.204007,False


In [18]:
location_df.to_csv('../data/processed/visualization/locations.csv', index=False)