In [23]:
import os

import numpy as np
import pandas as pd
import geopandas as gpd
from tqdm import tqdm

from shapely.geometry import MultiPolygon, Polygon
from shapely.validation import make_valid

Identify FEDs and ONT-EDs from each redistricting period which are at least 50% within the GTA

In [4]:
FED_YEARS = [1952, 1966, 1976, 1987, 1996, 1999, 2003, 2013]
ONTED_YEARS = [1962, 1966, 1975, 1987, 1996, 2005, 2015]

In [5]:
mpoly_GTA = gpd.read_file('../data/geo/regions/GTA_2013_OrthoTile_Index/gta.gpkg').geometry.to_list()[0]

In [36]:
for year in tqdm(FED_YEARS):
    gdf_fed_all = gpd.read_file(f'../data/geo/{year}_fed/fed_{year}/fed_{year}.shp')
    gdf_fed_all['gta_overlap'] = gdf_fed_all.apply(lambda row: row.geometry.intersection(mpoly_GTA).area / row.geometry.area, axis=1)
    gdf_fed_gta = gdf_fed_all[gdf_fed_all.gta_overlap >= 0.5]
    gdf_fed_gta.to_file(f'../data/geo/{year}_fed/fed_gta_{year}.gpkg', driver="GPKG")

100%|█████████████████████████████████████████████| 8/8 [00:12<00:00,  1.54s/it]


In [35]:
def extract_valid_geometry(geom):
    geom = make_valid(geom)  # Ensure it's valid
    if geom.geom_type == "GeometryCollection":
        # Extract the first Polygon or MultiPolygon found
        polygons = [g for g in geom.geoms if isinstance(g, (Polygon, MultiPolygon))]
        return MultiPolygon(polygons) if len(polygons) > 1 else polygons[0] if polygons else None
    return geom  # If it's already a valid Polygon/MultiPolygon, return it as is

In [44]:
for year in tqdm(ONTED_YEARS):
    gdf_onted_all = gpd.read_file(f'../data/geo/{year}_ont-ed/ont-ed_{year}.geojson')
    if year <= 1987:
        gdf_onted_all = gdf_onted_all[['id', 'RIDINGNAME', 'geometry']].rename(
            columns={'id': 'onted_id', 'RIDINGNAME': 'geoname'}
        )
    elif year == 1996:
        gdf_onted_all = gdf_onted_all[['FED_NUM', 'FED_NAME', 'RIDINGNO', 'RIDINGNAME', 'geometry']].rename(
            columns={'FED_NUM': 'fed_id', 'RIDINGNO': 'onted_id', 'RIDINGNAME': 'geoname'}
        )
    elif year == 2005:
        gdf_onted_all = gdf_onted_all[['RIDINGNO', 'RIDINGNAME', 'geometry']].rename(
            columns={'RIDINGNO': 'onted_id', 'RIDINGNAME': 'geoname'}
        )
    elif year == 2015:
        gdf_onted_all = gdf_onted_all[['ED_ID', 'RIDINGNAME', 'geometry']].rename(
            columns={'ED_ID': 'onted_id', 'RIDINGNAME': 'geoname'}
        )
    
    gdf_onted_all['geometry'] = gdf_onted_all['geometry'].apply(make_valid)
    gdf_onted_all["geometry"] = gdf_onted_all["geometry"].apply(extract_valid_geometry)
    # gdf_onted_all['geometry'] = gdf_onted_all.make_valid() #[gdf_onted_all.geometry.is_valid]
    
    gdf_onted_all['gta_overlap'] = gdf_onted_all.apply(lambda row: row.geometry.intersection(mpoly_GTA).area / row.geometry.area, axis=1)
    gdf_onted_gta = gdf_onted_all[gdf_onted_all.gta_overlap >= 0.5]
    
    gdf_onted_gta.to_file(f'../data/geo/{year}_ont-ed/ont-ed_gta_{year}.gpkg', driver='GPKG')

100%|█████████████████████████████████████████████| 7/7 [00:01<00:00,  5.53it/s]
