# Location Polygons
Compute Polygons of areas from curated list of locations

In [16]:
import numpy as np
import pandas as pd
import pickle

import geopandas as gpd
from geopy.distance import geodesic
from geopy.geocoders import Nominatim
from os import path
import matplotlib.pyplot as plt
import seaborn as sns
from shapely.geometry import Point
from shapely.ops import unary_union
from shapely import wkt
from tqdm import tqdm

In [18]:
geolocator = Nominatim(user_agent='housing-qc')

### Read Data

In [19]:
bounding_territories_df = pd.read_csv('../data/references/handmade/bounding-territories.csv')
bounding_territories_df.sample(5)

Unnamed: 0,Bounding Territory,Display Name,Bounding Type,Bounding Population,GeoPy Index
14,Lévis,Lévis,Municipality,145000,0
43,"[Les Jardins-de-Napierville,Le Haut-Saint-Laur...",Les Jardins-de-Napierville & Le Haut-Saint-Lau...,Regional County Municipality (RCM),50000,0
18,Les Appalaches,Les Appalaches,Regional County Municipality (RCM),40000,0
91,Les Moulins,Les Moulins,Regional County Municipality (RCM),160000,0
80,Villeray-Saint-Michel-Parc-Extension,Villeray-Saint-Michel-Parc-Extension,Montréal Borough,145000,0


In [28]:
mean_pop = bounding_territories_df['Bounding Population'].mean()
std_pop = bounding_territories_df['Bounding Population'].std()
print('Avg: ' + str(mean_pop) + ', Std: ' + str(std_pop))

Avg: 72812.5, Std: 48065.18307802774


### Methods

In [17]:
def get_bounding_polygons(geolocator: Nominatim, bounding: str, geopy_index: int):
    """Get polygons of locations in list format"""
    polygons = []
    substracts = []
    for location in bounding.replace("[", "").replace("]", "").split(","):
        if location.startswith("-"):
            geocodes = geolocator.geocode(location[1:] + ', QC', geometry='wkt', exactly_one=False)
            for geocode in geocodes:
                substracts.append(wkt.loads(geocode.raw['geotext']))
        else:
            geocode = geolocator.geocode(location + ', QC', geometry='wkt', exactly_one=False)[geopy_index]
            polygons.append(wkt.loads(geocode.raw['geotext']))

    return polygons, substracts

In [18]:
def get_bounding_polygon(geolocator: Nominatim, bounding: str, geopy_index: int):
    """Get single location polygon"""
    polygons, substracts = get_bounding_polygons(geolocator, bounding, geopy_index)
    return gpd.GeoSeries(unary_union(polygons).difference(unary_union(substracts))).simplify(tolerance=0.001).iloc[0]

In [19]:
def get_polygons(geolocator: Nominatim, bounding_territories_df: pd.DataFrame):
    """Get combination of polygons for locations in list format"""
    polygons = []
    for _, location in tqdm(bounding_territories_df.iterrows(), desc="Building GeoSeries", total=bounding_territories_df.shape[0]):
        polygons.append(get_bounding_polygon(geolocator, location["Bounding Territory"], location["GeoPy Index"]))
    return polygons

### Execute computations

In [20]:
output = '../data/processed/locations/location-polygons.gpkg'

if path.exists(output):
    unknown_locations = []
    polygons_gdf = gpd.read_file(output)
else:
    polygons = get_polygons(geolocator, bounding_territories_df)
    d = {'location': bounding_territories_df["Display Name"].to_list(), 'geometry': polygons}
    polygons_gdf = gpd.GeoDataFrame(d, crs="EPSG:4326")
    polygons_gdf.to_file(output)

In [21]:
polygons_gdf.sample(10)

Unnamed: 0,location,geometry
83,Trois-Rivières,"POLYGON ((-72.77942 46.31866, -72.77905 46.316..."
37,Pierre-de-Saurel,"POLYGON ((-73.20407 45.87555, -73.12387 45.817..."
30,Brossard,"POLYGON ((-73.52372 45.47791, -73.51924 45.468..."
87,L'Assomption,"POLYGON ((-73.58056 45.82039, -73.55007 45.816..."
18,Les Appalaches,"POLYGON ((-71.68612 45.96671, -71.56214 45.906..."
35,La Haute-Yamaska,"POLYGON ((-72.87613 45.28444, -72.86160 45.286..."
93,Hull,"POLYGON ((-75.79685 45.48246, -75.79399 45.478..."
76,Saint-Laurent,"POLYGON ((-73.77388 45.50051, -73.77185 45.499..."
66,Le Sud-Ouest,"POLYGON ((-73.63515 45.45074, -73.62886 45.448..."
102,Mirabel,"MULTIPOLYGON (((-74.20253 45.49178, -74.25107 ..."
