# Visualization


In [322]:
import folium
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from geopy.geocoders import Nominatim
from matplotlib.colors import ListedColormap
from tqdm import tqdm

plt.style.use("dark_background")

### Coloring Methods


In [323]:
def get_alpha_blend_cmap(cmap: str, alpha: float):
    cls = plt.get_cmap(cmap)(np.linspace(0, 1, 256))
    cls = (1-alpha) + alpha*cls
    return ListedColormap(cls)


def get_color_palette_from_cmap(cmap_label: str, cmap_width: int, range_width: int, reversed: bool):
    colors = []
    cmap = plt.get_cmap(cmap_label, cmap_width)

    color_range = range(cmap_width - 1, cmap_width -
                        range_width - 1, -1) if reversed else range(range_width)
    for i in color_range:
        colors.append(cmap(i))
    return colors


def get_plot_kws(scatter_kws: str, line_kws: str):
    return {'line_kws': {'color': line_kws}, 'scatter_kws': {'color': scatter_kws}}


### Read Processed Data


In [324]:
listings_df = pd.read_csv('../data/processed/processed_listings.csv')
listings_df.head(10)


Unnamed: 0,subtype,style,living_area,lot_dimensions,bedrooms,bathrooms,levels,location,listing_year,age,yard_area,listing_date,year_of_construction,price
0,2 Storey,2 storey,1191,4076,3,1,2,Lévis,2020,16,2885,2020-12-01,2004,332500
1,Bungalow,Open area,1261,9500,2,1,1,Portneuf,2021,64,8239,2021-12-01,1957,265000
2,Townhouse,Unknown,1645,1360,3,1,3,Hochelaga-Maisonneuve,2021,15,0,2021-11-01,2006,612000
3,Bi-generation,Link,2024,17000,4,3,1,Stoneham-et-Tewkesbury,2021,2,14976,2021-12-01,2019,526500
4,Semi-detached,2 storey,2400,4471,4,2,2,Gatineau,2021,32,2071,2021-12-01,1989,360000
5,2 Storey,Unknown,1800,16090,5,2,2,Alma,2021,31,14290,2021-09-01,1990,284000
6,Bungalow,Detached,960,6157,3,2,1,Fabreville,2021,31,5197,2021-11-01,1990,400000
7,Semi-detached,2 storey,1560,3172,5,2,2,Saint-François,2021,5,1612,2021-12-01,2016,610000
8,Semi-detached,2 storey,1250,6322,4,2,2,Trois-Rivières,2021,16,5072,2021-12-01,2005,320000
9,Bungalow,Open area,1340,5500,4,2,1,Lévis,2021,34,4160,2021-11-01,1987,350000


### Location


In [325]:
locations_df = pd.read_csv('../data/processed/visualization/locations.csv')
locations_df.sample(5)


Unnamed: 0,location,living_area,lot_dimensions,bedrooms,bathrooms,levels,listing_year,age,yard_area,year_of_construction,price,nb_of_listings,latitude,longitude,mtl_island,price_range
116,Terrebonne,1311,6840,3,1,1,2015,18,5535,1997,301486,1552,45.708101,-73.651516,False,300k-400k
36,Fabreville,990,6069,3,1,1,2020,30,5080,1989,390743,4237,45.562947,-73.857416,False,300k-400k
31,Deux-Montagnes,1265,9551,3,1,1,2020,41,8287,1978,503158,4552,45.537258,-73.904332,False,>400k
38,Gaspésie,1312,11858,3,1,1,2016,39,10546,1977,184555,226,48.658056,-65.752778,False,0-200k
108,Sainte-Rose,1377,4984,3,1,1,2014,22,3620,1992,350526,474,45.625262,-73.764434,False,300k-400k


In [326]:
qc_map = folium.Map(location=[46.0, -73.5], zoom_start=8)
folium.TileLayer('cartodbdark_matter').add_to(qc_map)  # Sets Tile Theme to (Dark Theme)

<folium.raster_layers.TileLayer at 0x23407d413c0>

In [327]:
colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred', 'lightred', 'beige', 'darkblue', 'darkgreen',
          'cadetblue', 'darkpurple', 'white', 'pink', 'lightblue', 'lightgreen', 'gray', 'black', 'lightgray']

price_range_colors = {'0-200k': 'white', '200k-300k': 'pink', '300k-400k': 'orange', '>400k': 'darkred'}

In [328]:
def plot_coordinate(location: pd.DataFrame):
    '''input: series that contains a numeric named latitude and a numeric named longitude
    this function creates a CircleMarker and adds it to your this_map'''
    radius = int(location.price/50000) + 1
    weight = int(location.price/25000) + 1
    folium.CircleMarker(location=[location.latitude, location.longitude], radius=2,
                        color=price_range_colors[location.price_range], popup=location.location).add_to(qc_map)


In [329]:
locations_df.apply(plot_coordinate, axis=1)

0      None
1      None
2      None
3      None
4      None
       ... 
125    None
126    None
127    None
128    None
129    None
Length: 130, dtype: object

In [330]:
qc_map

In [None]:
#############################################

In [None]:
qc_chloro_map

In [None]:
geolocator = Nominatim(user_agent='housing-qc-viz')
qc_chloro_map = folium.Map(location=[47.0, -70.5], zoom_start=8)

In [None]:
def get_location_geotext(location: str):
    location = geolocator.geocode(location, geometry='wkt')
    return location.raw["geotext"]


In [None]:
geometries = ["Topology", "Point", "MultiPoint", "LineString",
              "MultiLineString", "Polygon", "MultiPolygon", "GeometryCollection"]


def get_geometry_type(geotext: str):
    geometry_type = "Polygon"
    for geometry in geometries:
        geometry_type = geometry if geometry.upper() in geotext else geometry_type
    return geometry_type


def geotext_to_arcs(geotext: str):
    geometry_type = get_geometry_type(geotext)
    try:
        geotext = geotext.replace(geometry_type.upper(), "").replace(
            "(", "").replace(")", "").split(",")

        arc = []
        for text in geotext:
            coordinates = [float(i) for i in text.split(" ")]
            arc.append(coordinates)
        return arc, geometry_type
    except:
        print(geotext)


In [None]:
def create_topo(location: str, geometry_type: str, arcs):
    topo = {
        "type": "Topology",
        "objects": {
            location: {
                "type": "GeometryCollection",
                "geometries": [
                    {
                        "type": geometry_type,
                        "properties": {
                            "prop0": "value0",
                            "prop1": {
                                "this": "that"
                            }
                        },
                        "arcs": [[0]]
                    }
                ]
            }
        },
        "arcs": [
            arcs
        ]
    }
    return topo


In [None]:
for location in tqdm(locations_df['location'].unique()[0:80], desc="Creating Chloropleth Map"):
    geotext = get_location_geotext(location + ", Qc")
    if "POLYGON" not in geotext or "MULTIPOLYGON" in geotext:
        print(location)
    else:
        arcs, geometry_type = geotext_to_arcs(geotext)
        topo = create_topo(location, geometry_type, arcs)
        folium.TopoJson(
            topo,
            ("objects." + location),
            name="topojson",
        ).add_to(qc_chloro_map)


In [None]:
qc_chloro_map


In [None]:
################

In [None]:
def create_topo2(location: str, geometry_type: str, arcs):
    topo = {
        "type": geometry_type,
        "arcs": arcs
    }
    return topo


In [None]:
qc_chloro_map2 = folium.Map(location=[47.0, -70.5], zoom_start=7)


In [None]:
locations_df['location'].unique()[0:10]


In [None]:
location = "Ahuntsic-Cartierville"
geotext = get_location_geotext(location + ", Qc")
arcs, geometry_type = geotext_to_arcs(geotext)
topo = create_topo2(location, geometry_type, arcs)


In [None]:
arcs


In [None]:
geotext


In [None]:
topo


In [None]:
folium.TopoJson(
    topo,
    (""),
    name="topojson",
).add_to(qc_chloro_map2)


In [None]:
qc_chloro_map2


In [None]:
################

### Correlation Map


In [None]:
def plot_corr_map(df):
    # Compute the correlation matrix
    corr = df.corr()

    # Generate a mask for the upper triangle
    mask = np.triu(np.ones_like(corr, dtype=bool))

    # Set up the matplotlib figure
    _, _ = plt.subplots(figsize=(11, 9))

    # Draw the heatmap with the mask and correct aspect ratio
    sns.heatmap(corr, mask=mask, vmax=.3, cmap=get_alpha_blend_cmap("rocket_r", 0.9), center=0,
                square=True, linewidths=0, cbar_kws={"shrink": .5})


In [None]:
plot_corr_map(listings_df.drop(columns=['age']))


## Price


In [None]:
sns.displot(listings_df, x='price', hue='listing_year', kind="kde", fill=True, common_norm=False,
            height=8, aspect=1.5, alpha=.0, linewidth=2, palette=get_alpha_blend_cmap("viridis", 0.9))


## Listing Year


In [None]:
conditions = [(listings_df['price'] >= 0) & (listings_df['price'] < 200000), (listings_df['price'] >= 200000) & (
    listings_df['price'] < 300000), (listings_df['price'] >= 300000) & (listings_df['price'] < 400000), listings_df['price'] >= 400000]
values = ['0-200k', '200k-300k', '300k-400k', '>400k']

listings_df['price_range'] = np.select(conditions, values, default=0)

sns.displot(listings_df, x='listing_year', hue='price_range', hue_order=values, kind="kde", common_norm=True,
            height=8, aspect=1.4, linewidth=2, palette=get_color_palette_from_cmap("plasma", 5, 4, True))


In [None]:
mean_prices = []

min_y = min(listings_df['listing_year'].unique())
max_y = max(listings_df['listing_year'].unique())
years = range(min_y, max_y)

for y in years:
    mean_prices.append(
        listings_df[listings_df['listing_year'] == y]['price'].mean())

df = pd.DataFrame(list(zip(years, mean_prices)),
                  columns=['years', 'mean_price'])

s = sns.pairplot(data=df, x_vars='years', y_vars='mean_price', kind="reg",
                 height=8, aspect=1.5, plot_kws=get_plot_kws('cyan', 'yellow'))
s.set(xticks=years, yticks=range(100000, 400000, 50000))


## Living Area


In [None]:
sns.displot(listings_df, x='living_area', hue='price_range', hue_order=values, kind="kde", common_norm=True,
            height=8, aspect=1.4, linewidth=2, palette=get_color_palette_from_cmap("plasma", 5, 4, True))


In [None]:
listings_df


In [None]:
conditions = [(listings_df['living_area'] >= 500) & (listings_df['living_area'] < 1000), (listings_df['living_area'] >= 1000) & (
    listings_df['living_area'] < 2000), (listings_df['living_area'] >= 2000) & (listings_df['living_area'] < 2500), listings_df['living_area'] >= 2500]
values = ['500-1000 sq.ft', '1000-2000 sq.ft',
          '2000-2500 sq.ft', '>2500 sq.ft']
n_values = [500, 1000, 2000, 2500]
listings_df['living_area_range'] = np.select(conditions, values, default=0)
listings_df['n_values'] = np.select(conditions, n_values, default=0)

listings_df['living_area_mean_price'] = 0

for y in values:
    listings_df.loc[listings_df['living_area_range'] == y,
                    'living_area_mean_price'] = listings_df[listings_df['living_area_range'] == y]['price'].mean()

s = sns.pairplot(data=listings_df, x_vars='n_values', y_vars='living_area_mean_price',
                 kind="reg", height=8, aspect=1.5, plot_kws=get_plot_kws('cyan', 'yellow'))
s.set(xticks=values, yticks=range(100000, 400000, 50000))
