In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np

from tqdm import tqdm
from shapely.ops import unary_union
tqdm.pandas()

In [2]:
EQUITY_QUINTILES = [0, 0.2985527, 0.37396513, 0.43815575, 0.52905651, 1]

In [3]:
gdf_locations = gpd.read_file('../joined-data/simplified_matches_4326_full.geo.json').to_crs(3857)
gdf_ct = gpd.read_file('../ct-data/ct-data-all.geo.json').to_crs(3857)

Finding 3: Average number of agency spaces (ie. individual locations) within a census tract area (ie. completely within a census tract or no more than 800 meters away from it). We want to break this down by each of the quantiles of the equity index as well.

In [10]:
# Assign quintile labels based on equity index
gdf_ct['quintile'] = pd.cut(
    gdf_ct['Equity Index'], bins=EQUITY_QUINTILES, labels=['Q1', 'Q2', 'Q3', 'Q4', 'Q5'], include_lowest=True
)

result = []

# Compute averages for all quintiles combined and by region
all_quintiles = {
    'quintile': 'Q_all',
    'total_avg': gdf_ct['total_count_800m'].mean()
}
for region in ['Peel', 'Toronto', 'York']:
    all_quintiles[region] = gdf_ct.loc[gdf_ct['region'] == region, 'total_count_800m'].mean()
result.append(all_quintiles)

# Compute averages for each quintile and by region
for quintile in ['Q1', 'Q2', 'Q3', 'Q4', 'Q5']:
    quintile_data = gdf_ct[gdf_ct['quintile'] == quintile]
    row = {
        'quintile': quintile,
        'total_avg': quintile_data['total_count_800m'].mean()
    }
    for region in ['Peel', 'Toronto', 'York']:
        row[region] = quintile_data.loc[quintile_data['region'] == region, 'total_count_800m'].mean()
    result.append(row)

# Save results to a DataFrame and CSV
average_locations_df = pd.DataFrame(result)
average_locations_df = average_locations_df.round(4)  # Round all numbers to 4 decimal places
average_locations_df.to_csv('../report-results/3_avg_locations.csv', index=False)

average_locations_df

Unnamed: 0,quintile,total_avg,Peel,Toronto,York
0,Q_all,11.3906,3.3262,17.3333,5.3182
1,Q1,12.1981,1.5581,18.8516,2.5854
2,Q2,10.9387,2.4179,19.8778,6.6909
3,Q3,10.1362,3.3333,19.3012,5.7115
4,Q4,9.2736,4.6567,13.0446,5.8485
5,Q5,14.5236,5.0741,16.7824,5.9333


Finding 4: Percentage of agency spaces near existing and future transit stations (ie. within 800m). We want this broken down by owned (CORE) and leased (CLRE)

In [4]:
# Load transit station data
gdf_existing_stations = gpd.read_file('../transit-data/transitStops-toronto.geo.json').to_crs(3857)
gdf_future_stations = gpd.read_file('../transit-data/transitStops-toronto-future.geo.json').to_crs(3857)

# Create temporary columns for proximity to transit stations
gdf_locations['near_existing'] = gdf_locations.geometry.progress_apply(
    lambda loc: gdf_existing_stations.geometry.buffer(800).intersects(loc).any()
)
gdf_locations['near_future'] = gdf_locations.geometry.progress_apply(
    lambda loc: gdf_future_stations.geometry.buffer(800).intersects(loc).any()
)
# New column: near_any (True if near either existing or future transit)
gdf_locations['near_any'] = gdf_locations['near_existing'] | gdf_locations['near_future']

# Initialize result table
result = []
location_types = ['Own', 'Rent', 'All']
columns = ['location_type', 'existing_transit', 'future_transit', 'not_near_any_transit']

# Compute percentages for each location type
for loc_type in location_types:
    if loc_type == 'All':
        subset = gdf_locations
    else:
        subset = gdf_locations[gdf_locations['Tenure'] == loc_type]

    total_count = len(subset)
    existing_count = subset['near_existing'].sum()
    future_count = subset['near_future'].sum()
    not_near_any_count = (~subset['near_any']).sum()

    result.append({
        'location_type': loc_type,
        'existing_transit': round((existing_count / total_count) * 100, 4) if total_count > 0 else 0,
        'future_transit': round((future_count / total_count) * 100, 4) if total_count > 0 else 0,
        'not_near_any_transit': round((not_near_any_count / total_count) * 100, 4) if total_count > 0 else 0
    })

# Convert result to DataFrame
transit_proximity_df = pd.DataFrame(result, columns=columns)
transit_proximity_df.to_csv('../report-results/4_transit_proximity.csv', index=False)

transit_proximity_df

100%|██████████| 2267/2267 [00:05<00:00, 413.63it/s]
100%|██████████| 2267/2267 [00:03<00:00, 609.17it/s]


Unnamed: 0,location_type,existing_transit,future_transit,not_near_any_transit
0,Own,25.0,17.8819,63.0208
1,Rent,25.0847,23.322,61.2203
2,All,25.9815,22.0997,61.3586


Finding 5: Percentage of agency spaces which are rented (CLRE), broken down by municipality and equity index quintile

In [12]:
# Initialize result table
result = []
quintiles = ['Q_all', 'Q1', 'Q2', 'Q3', 'Q4', 'Q5']
columns = ['quintile', 'total_pct', 'Peel', 'Toronto', 'York']

# Compute percentages for each quintile and region
for quintile in tqdm(quintiles):
    if quintile == 'Q_all':
        subset = gdf_ct
    else:
        subset = gdf_ct[gdf_ct['quintile'] == quintile]

    row = {'quintile': quintile}

    # Compute total percentage for all locations
    total_locations = gdf_locations[gdf_locations.geometry.intersects(
        unary_union(subset.geometry.buffer(800))
    )]
    rental_locations = total_locations[total_locations['Tenure'] == 'Rent']
    row['total_pct'] = round((len(rental_locations) / len(total_locations)) * 100, 4) if len(total_locations) > 0 else 0

    # Compute percentages for each region
    for region in ['Peel', 'Toronto', 'York']:
        region_subset = subset[subset['region'] == region]
        region_locations = gdf_locations[gdf_locations.geometry.intersects(
            unary_union(region_subset.geometry.buffer(800))
        )]
        region_rental_locations = region_locations[region_locations['Tenure'] == 'Rent']
        row[region] = round((len(region_rental_locations) / len(region_locations)) * 100, 4) if len(region_locations) > 0 else 0

    result.append(row)

# Convert result to DataFrame
rental_percentages_df = pd.DataFrame(result, columns=columns)
rental_percentages_df.to_csv('../report-results/5_rental_percentages.csv', index=False)

rental_percentages_df

100%|██████████| 6/6 [00:13<00:00,  2.17s/it]


Unnamed: 0,quintile,total_pct,Peel,Toronto,York
0,Q_all,65.0485,83.7607,61.1208,71.875
1,Q1,59.5331,83.6735,57.6404,64.4444
2,Q2,65.3944,84.9057,61.039,72.2467
3,Q3,65.2113,85.9155,60.4227,73.5772
4,Q4,64.0496,85.6164,60.2972,66.6667
5,Q5,59.8024,78.6517,57.7645,71.9298
