In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np

from tqdm import tqdm
from shapely.ops import unary_union
tqdm.pandas()

In [2]:
EQUITY_QUINTILES = [0, 0.2985527, 0.37396513, 0.43815575, 0.52905651, 1]

In [3]:
gdf_locations = gpd.read_file('../joined-data/simplified_matches_4326_full.geo.json').to_crs(3857)
gdf_ct = gpd.read_file('../ct-data/ct-data-all.geo.json').to_crs(3857)

# Assign quintile labels based on equity index
gdf_ct['quintile'] = pd.cut(
    gdf_ct['Equity Index'], bins=EQUITY_QUINTILES, labels=['Q1', 'Q2', 'Q3', 'Q4', 'Q5'], include_lowest=True
)

Finding 3: Average number of agency spaces (ie. individual locations) within a census tract area (ie. completely within a census tract or no more than 800 meters away from it) weighted by population. We want to break this down by each of the quantiles of the equity index as well.

In [4]:
result = []

# Compute population-weighted averages for all quintiles combined and by region
all_quintiles = {
    'quintile': 'Q_all',
    'total_avg': np.average(gdf_ct['total_count_800m'], weights=gdf_ct['Popu_2021'])
}
for region in ['Peel', 'Toronto', 'York']:
    region_subset = gdf_ct[gdf_ct['region'] == region]
    all_quintiles[region] = np.average(region_subset['total_count_800m'], weights=region_subset['Popu_2021'])
result.append(all_quintiles)

# Compute population-weighted averages for each quintile and by region
for quintile in ['Q1', 'Q2', 'Q3', 'Q4', 'Q5']:
    quintile_data = gdf_ct[gdf_ct['quintile'] == quintile]
    row = {
        'quintile': quintile,
        'total_avg': np.average(quintile_data['total_count_800m'], weights=quintile_data['Popu_2021'])
    }
    for region in ['Peel', 'Toronto', 'York']:
        region_subset = quintile_data[quintile_data['region'] == region]
        if len(region_subset) > 0:
            row[region] = np.average(region_subset['total_count_800m'], weights=region_subset['Popu_2021'])
        else:
            row[region] = np.nan
    result.append(row)

# Save results to a DataFrame and CSV
average_locations_df = pd.DataFrame(result)
average_locations_df = average_locations_df.round(4)  # Round all numbers to 4 decimal places
average_locations_df.to_csv('../report-results/3_avg_locations.csv', index=False)

average_locations_df

Unnamed: 0,quintile,total_avg,Peel,Toronto,York
0,Q_all,11.945,3.3871,19.2289,5.1813
1,Q1,13.717,1.3693,21.9675,2.393
2,Q2,10.4546,2.4879,21.1966,6.1955
3,Q3,10.7674,3.5817,21.9524,5.2045
4,Q4,9.487,4.5,13.9897,6.2356
5,Q5,15.768,5.0676,18.3919,5.9199


Finding 4: Percentage of agency spaces near existing and future transit stations (ie. within 800m). We want this broken down by owned (CORE) and leased (CLRE)

In [5]:
# Load transit station data
gdf_existing_stations = gpd.read_file('../transit-data/transitStops-toronto.geo.json').to_crs(3857)
gdf_future_stations = gpd.read_file('../transit-data/transitStops-toronto-future.geo.json').to_crs(3857)

# Create temporary columns for proximity to transit stations
gdf_locations['near_existing'] = gdf_locations.geometry.progress_apply(
    lambda loc: gdf_existing_stations.geometry.buffer(800).intersects(loc).any()
)
gdf_locations['near_future'] = gdf_locations.geometry.progress_apply(
    lambda loc: gdf_future_stations.geometry.buffer(800).intersects(loc).any()
)
# New column: near_any (True if near either existing or future transit)
gdf_locations['near_any'] = gdf_locations['near_existing'] | gdf_locations['near_future']

# Initialize result table
result = []
location_types = ['Own', 'Rent', 'All']
columns = ['location_type', 'existing_transit', 'future_transit', 'not_near_any_transit']

# Compute percentages for each location type
for loc_type in location_types:
    if loc_type == 'All':
        subset = gdf_locations
    else:
        subset = gdf_locations[gdf_locations['Tenure'] == loc_type]

    total_count = len(subset)
    existing_count = subset['near_existing'].sum()
    future_count = subset['near_future'].sum()
    not_near_any_count = (~subset['near_any']).sum()

    result.append({
        'location_type': loc_type,
        'existing_transit': round((existing_count / total_count) * 100, 4) if total_count > 0 else 0,
        'future_transit': round((future_count / total_count) * 100, 4) if total_count > 0 else 0,
        'not_near_any_transit': round((not_near_any_count / total_count) * 100, 4) if total_count > 0 else 0
    })

# Convert result to DataFrame
transit_proximity_df = pd.DataFrame(result, columns=columns)
transit_proximity_df.to_csv('../report-results/4_transit_proximity.csv', index=False)

transit_proximity_df

100%|██████████| 2267/2267 [00:05<00:00, 452.52it/s]
100%|██████████| 2267/2267 [00:03<00:00, 647.63it/s]


Unnamed: 0,location_type,existing_transit,future_transit,not_near_any_transit
0,Own,25.0,17.8819,63.0208
1,Rent,25.0847,23.322,61.2203
2,All,25.9815,22.0997,61.3586


Finding 5: Percentage of agency spaces which are rented (CLRE), broken down by municipality and equity index quintile

In [6]:
# Initialize result table
result = []
quintiles = ['Q1', 'Q2', 'Q3', 'Q4', 'Q5']
columns = ['quintile', 'total_pct', 'Peel', 'Toronto', 'York']

# Compute percentages for each quintile and region
for quintile in tqdm(quintiles):
    subset = gdf_ct[gdf_ct['quintile'] == quintile]
    row = {'quintile': quintile}
    
    # Compute total percentage for all locations
    total_locations = gdf_locations[gdf_locations.geometry.intersects(
        unary_union(subset.geometry.buffer(800))
    )]
    rental_locations = total_locations[total_locations['Tenure'] == 'Rent']
    row['total_pct'] = round((len(rental_locations) / len(total_locations)) * 100, 4) if len(total_locations) > 0 else 0

    # Compute percentages for each region
    for region in ['Peel', 'Toronto', 'York']:
        region_subset = subset[subset['region'] == region]
        if len(region_subset) > 0:
            region_locations = gdf_locations[gdf_locations.geometry.intersects(
                unary_union(region_subset.geometry.buffer(800))
            )]
            region_rental_locations = region_locations[region_locations['Tenure'] == 'Rent']
            row[region] = round((len(region_rental_locations) / len(region_locations)) * 100, 4) if len(region_locations) > 0 else 0
        else:
            row[region] = 0

    result.append(row)

# Convert result to DataFrame
rental_percentages_df = pd.DataFrame(result, columns=columns)
rental_percentages_df.to_csv('../report-results/5_rental_percentages.csv', index=False)

rental_percentages_df

100%|██████████| 5/5 [00:10<00:00,  2.08s/it]


Unnamed: 0,quintile,total_pct,Peel,Toronto,York
0,Q1,59.5331,83.6735,57.6404,64.4444
1,Q2,65.3944,84.9057,61.039,72.2467
2,Q3,65.2113,85.9155,60.4227,73.5772
3,Q4,64.0496,85.6164,60.2972,66.6667
4,Q5,59.8024,78.6517,57.7645,71.9298


Appendix 7: Average number of agency spaces in or near a census tract, and percent of agency spaces that are rentals in or near a census tract, broken down by: equity indicators and municipalities

In [None]:
# Create a new GeoDataFrame with demographic counts instead of percentages
demographic_counts_gdf = gdf_ct.copy()

# List of percentage columns to convert
percentage_columns = ['Immigrant%', 'VM%', '1-ParentFam%', 'MBM%', 'LIM%',
                     'Neet%', '%CHN', '%Affordable', '%ofWP', 'ShortTerm%', '%Evic',
                     '%Unemp', '%NoEdu', '%IncomeGT']

# Convert percentages to absolute counts and replace columns with new names
for col in percentage_columns:
    # Convert percentage to proportion, then multiply by population
    # Fill NaN values with 0 before calculation
    percentage_value = gdf_ct[col].fillna(0)
    new_col_name = col.replace('%', '')
    demographic_counts_gdf[new_col_name] = ((percentage_value / 100) * gdf_ct['Popu_2021']).fillna(0).round().astype(int)
    # Drop the original percentage column
    demographic_counts_gdf = demographic_counts_gdf.drop(columns=[col])

demographic_counts_gdf

Unnamed: 0,ctuid,Equity Index,Equity Index 2024,Popu_2021,PopuDenPerKM,region,own_count,rent_count,unknown_count,total_count,...,LIM,Neet,CHN,Affordable,ofWP,ShortTerm,Evic,Unemp,NoEdu,IncomeGT
0,5350001.00,0.309595,0.400,599,87.8,Toronto,0,0,0,0,...,79,138,163,163,45,27,0,44,69,35
1,5350002.00,0.513469,0.783,604,178.0,Toronto,0,1,0,1,...,95,121,302,302,95,108,0,85,54,100
2,5350003.00,0.141948,0.163,457,483.3,Toronto,0,0,0,0,...,52,0,0,0,30,55,16,32,30,25
3,5350004.00,0.671040,0.732,6306,18525.3,Toronto,3,7,1,11,...,2002,1489,2784,2025,675,646,200,1214,1090,1162
4,5350005.00,0.491055,0.574,6957,18483.0,Toronto,20,7,0,27,...,1981,1227,2132,2026,657,669,257,1020,1056,820
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1060,5350586.05,0.271047,0.223,9563,2586.9,Peel,0,0,0,0,...,1211,995,1024,2049,0,886,191,1150,1480,898
1061,5350587.01,0.410372,0.432,6072,46.0,Peel,1,0,0,1,...,572,1084,2429,2602,227,553,277,547,686,553
1062,5350587.02,0.297805,0.404,5640,37.0,Peel,1,1,0,2,...,550,794,1762,1880,258,682,22,571,658,484
1063,5350802.01,0.148813,0.143,4011,1844.8,Toronto,0,0,0,0,...,455,375,0,0,159,381,124,487,353,433
