In [413]:
import pandas as pd
import geopy
from geopy.geocoders import Nominatim
import numpy as np
import geopandas as gpd
from shapely.geometry import Point

In [415]:
def get_coordinate(df):
    def get_location(city):
        location = Nominatim(user_agent="abcd", timeout=None).geocode(city)
        return location

    df["Full_Address"]=df["Street"]+","+df["City"]+","+df["Province"].copy()
    df["Location"] = df["Full_Address"].apply(get_location)
    df["Latitude"] = df["Location"].apply(lambda x: x.latitude if x else np.nan)
    df["Longitude"] = df["Location"].apply(lambda x: x.longitude if x else np.nan)

    return df

In [417]:
import geopandas as gpd
from shapely.geometry import Point
import json

def df_to_gdf(input_df):
    """
    Convert a DataFrame with longitude and latitude columns
    to a GeoDataFrame.
    """
    df = input_df.copy()
    geometry = [Point(xy) for xy in zip(df.Longitude, df.Latitude)]
    gdf = gpd.GeoDataFrame(df, crs=4326, geometry=geometry)
    return gdf

## Load Data

### SPRE Locations

In [479]:
spre2021 = pd.read_excel("SPRE_Count/2021_SPRE_DATA_Jul23.xlsx", sheet_name="Primary-2021-Clean")
spre2021_gdf = df_to_gdf(spre2021)
spre2021_gdf = spre2021_gdf.dropna(subset=['211_Address1'])
spre2021_gdf.head()

Unnamed: 0,PublicName,211 Parent Agency Name,211_Address1,211_Address2,211_City,211_County,211_Province,211_PostalCode,Latitude,Longitude,...,Country,CRA Address,Category-SubCategory Match Code,secD_LandOwnship,Land Value,Occupancy Costs,Total Location Count,All Land Ownership,Tenure,geometry
0,Access for Parents and Children in Ontario - A...,Access for Parents and Children in Ontario,100 Sheppard Ave E,Suite 504,Toronto,Toronto,ON,M2N 6N5,43.763248,-79.405415,...,CA,"100 SHEPPARD AVENUE EAST,TORONTO,ON","1, 99",,,84625.0,1,0,Rent,POINT (-79.40542 43.76325)
1,Across U-hub - Across U-hub,Across U-hub,232 Hood Rd,,Markham,York,ON,L3R 3K8,43.82456,-79.328103,...,CA,"232 HOOD ROAD,MARKHAM,ON","160, 19",,590062.0,21033.0,1,1,Own,POINT (-79.32810 43.82456)
2,Adam House - Adam House,Adam House,430 Gladstone Ave,,Toronto,Toronto,ON,M6H 3H9,43.657755,-79.433036,...,CA,"430 GLADSTONE AVE.,TORONTO,ON","1, 15",,818693.0,78934.0,1,1,Own,POINT (-79.43304 43.65776)
3,Adoption Council of Ontario - Adoption Council...,Adoption Council of Ontario,36 Eglinton Ave W,Suite 503,Toronto,Toronto,ON,M4R 1A1,43.706647,-79.39993,...,CA,"503 - 36 EGLINTON AVE W,TORONTO,ON","1, 99",,,27694.0,1,0,Rent,POINT (-79.39993 43.70665)
4,AdvantAge Ontario - AdvantAge Ontario,AdvantAge Ontario,7050 Weston Rd,Suite 700,Vaughan,York,ON,L4L 8G7,43.772305,-79.54445,...,CA,"3B - 64 JARDIN DR,CONCORD,ON","1, 99",,,59893.0,1,0,Rent,POINT (-79.54445 43.77231)


In [245]:
# spre2021_wgs = spre2021_gdf.to_crs(epsg=4326)
# spre2021_wgs.to_file("GIS Analysis/SPRE_Location/SPRE_2021_wgs84.geojson", driver="GeoJSON")

### CT Polygon

In [481]:
ct = gpd.read_file("GIS Analysis/census_variables/ct/ct.geojson")
ct = ct[["id", "geometry"]]
ct = ct.rename(columns={"id":"GeoUID"})
ct.head()

Unnamed: 0,GeoUID,geometry
0,5350001.0,"MULTIPOLYGON (((-79.33526 43.62681, -79.33561 ..."
1,5350002.0,"MULTIPOLYGON (((-79.38245 43.62556, -79.38200 ..."
2,5350003.0,"MULTIPOLYGON (((-79.43466 43.63369, -79.43328 ..."
3,5350004.0,"MULTIPOLYGON (((-79.43466 43.63369, -79.43566 ..."
4,5350005.0,"MULTIPOLYGON (((-79.43603 43.63717, -79.43726 ..."


## Create Buffer and Groupby Count of SPRE Location

Check projection system to be in UTM

In [483]:
ct = ct.to_crs(epsg = 32617)
ct.crs

<Projected CRS: EPSG:32617>
Name: WGS 84 / UTM zone 17N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Between 84°W and 78°W, northern hemisphere between equator and 84°N, onshore and offshore. Bahamas. Ecuador - north of equator. Canada - Nunavut; Ontario; Quebec. Cayman Islands. Colombia. Costa Rica. Cuba. Jamaica. Nicaragua. Panama. United States (USA).
- bounds: (-84.0, 0.0, -78.0, 84.0)
Coordinate Operation:
- name: UTM zone 17N
- method: Transverse Mercator
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [485]:
spre2021_gdf = spre2021_gdf.to_crs(epsg = 32617)
spre2021_gdf.crs

<Projected CRS: EPSG:32617>
Name: WGS 84 / UTM zone 17N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Between 84°W and 78°W, northern hemisphere between equator and 84°N, onshore and offshore. Bahamas. Ecuador - north of equator. Canada - Nunavut; Ontario; Quebec. Cayman Islands. Colombia. Costa Rica. Cuba. Jamaica. Nicaragua. Panama. United States (USA).
- bounds: (-84.0, 0.0, -78.0, 84.0)
Coordinate Operation:
- name: UTM zone 17N
- method: Transverse Mercator
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [487]:
#set radius of buffer in here
RADIUS_BUFFER = 500

#Function to create the buffer 
def polygon_buffer(df):
    df["buffer"]=df.geometry.buffer(RADIUS_BUFFER)

    return df

In [489]:
buffered_ct = polygon_buffer(ct)
buffered_ct = buffered_ct.drop(columns=["geometry"]).rename(columns={"buffer":"geometry"})
buffered_ct.head(1)

Unnamed: 0,GeoUID,geometry
0,5350001.0,"POLYGON ((632050.852 4833978.039, 632060.428 4..."


### Create 500m buffer and count SPRE locations by each buffered polygon

In [493]:
def buffer_spre_count(ct_df, spre_df, buffer_radius):
    buffers = ct_df.geometry.buffer(buffer_radius)
    buffer_df = pd.concat((ct_df["GeoUID"], buffers.rename("geometry")), axis=1)
    for tenure in ["Own", "Rent", "Unknown"]:
        buffer_df[f"{tenure}_count"] = buffer_df.geometry.apply(lambda x: spre_df[spre_df["Tenure"] == tenure].intersects(x).sum())
    buffer_df["Total_count"] = buffer_df["Own_count"] + buffer_df["Rent_count"] + buffer_df["Unknown_count"]
    return buffer_df

In [495]:
spre_buffer_count = buffer_spre_count(ct, spre2021_gdf, 500)
spre_buffer_count

Unnamed: 0,GeoUID,geometry,Own_count,Rent_count,Unknown_count,Total_count
0,5350001.00,"POLYGON ((632050.852 4833978.039, 632060.428 4...",5,10,0,15
1,5350002.00,"POLYGON ((628717.203 4831113.996, 628701.849 4...",0,2,0,2
2,5350003.00,"POLYGON ((623667.205 4831737.064, 623635.129 4...",3,5,0,8
3,5350004.00,"POLYGON ((625239.229 4833413.891, 625609.873 4...",3,8,0,11
4,5350005.00,"POLYGON ((626294.599 4832284.310, 626246.804 4...",3,8,0,11
...,...,...,...,...,...,...
1060,5350586.05,"POLYGON ((593033.487 4843856.470, 592383.366 4...",0,0,0,0
1061,5350587.01,"POLYGON ((583071.467 4866665.146, 583297.807 4...",1,0,0,1
1062,5350587.02,"POLYGON ((572750.397 4862471.265, 572803.194 4...",1,1,0,2
1063,5350802.01,"POLYGON ((648207.509 4852613.681, 648189.929 4...",0,1,0,1


In [497]:
spre_buffer_count = spre_buffer_count.drop(columns=["geometry"])

### Joining Count to CT Polygons 

In [499]:
ct_data = gpd.read_file("GIS Analysis/census_variables/ct/equity_layer_index.geojson", driver="GeoJSON")
ct_data.head()

Unnamed: 0,id,PopuDenPerKM,Immigrant%,VM%,1-ParentFam%,MBM%,LIM%,Neet%,%CHN,%Affordable,%ofWP,ShortTerm%,Equity Index,geometry
0,5350001.0,87.8,1.695,31.356,14.286,9.322,13.115,23.077,27.273,27.273,7.5,4.545,0.326619,"MULTIPOLYGON (((-79.33526 43.62681, -79.33561 ..."
1,5350002.0,178.0,0.0,1.667,17.143,13.333,15.686,20.0,50.0,50.0,15.789,17.857,0.617931,"MULTIPOLYGON (((-79.38245 43.62556, -79.38200 ..."
2,5350003.0,483.3,14.423,52.381,8.333,6.731,11.364,0.0,0.0,0.0,6.667,12.069,0.214454,"MULTIPOLYGON (((-79.43466 43.63369, -79.43328 ..."
3,5350004.0,18525.3,8.975,50.55,25.0,23.942,31.749,23.611,44.154,32.112,10.704,10.243,0.716577,"MULTIPOLYGON (((-79.43466 43.63369, -79.43566 ..."
4,5350005.0,18483.0,6.389,47.697,24.161,17.175,28.473,17.629,30.651,29.119,9.444,9.613,0.558658,"MULTIPOLYGON (((-79.43603 43.63717, -79.43726 ..."


In [501]:
ct_data = ct_data.join(spre_buffer_count.set_index("GeoUID"), on="id", how="inner")
ct_data.head()

Unnamed: 0,id,PopuDenPerKM,Immigrant%,VM%,1-ParentFam%,MBM%,LIM%,Neet%,%CHN,%Affordable,%ofWP,ShortTerm%,Equity Index,geometry,Own_count,Rent_count,Unknown_count,Total_count
0,5350001.0,87.8,1.695,31.356,14.286,9.322,13.115,23.077,27.273,27.273,7.5,4.545,0.326619,"MULTIPOLYGON (((-79.33526 43.62681, -79.33561 ...",5,10,0,15
1,5350002.0,178.0,0.0,1.667,17.143,13.333,15.686,20.0,50.0,50.0,15.789,17.857,0.617931,"MULTIPOLYGON (((-79.38245 43.62556, -79.38200 ...",0,2,0,2
2,5350003.0,483.3,14.423,52.381,8.333,6.731,11.364,0.0,0.0,0.0,6.667,12.069,0.214454,"MULTIPOLYGON (((-79.43466 43.63369, -79.43328 ...",3,5,0,8
3,5350004.0,18525.3,8.975,50.55,25.0,23.942,31.749,23.611,44.154,32.112,10.704,10.243,0.716577,"MULTIPOLYGON (((-79.43466 43.63369, -79.43566 ...",3,8,0,11
4,5350005.0,18483.0,6.389,47.697,24.161,17.175,28.473,17.629,30.651,29.119,9.444,9.613,0.558658,"MULTIPOLYGON (((-79.43603 43.63717, -79.43726 ...",3,8,0,11


### Feature Engineering

In [503]:
ct_data["Rent%"] = (ct_data["Rent_count"]/ct_data["Total_count"])*100
ct_data["Own%"] = (ct_data["Own_count"]/ct_data["Total_count"])*100
ct_data.head()

Unnamed: 0,id,PopuDenPerKM,Immigrant%,VM%,1-ParentFam%,MBM%,LIM%,Neet%,%CHN,%Affordable,%ofWP,ShortTerm%,Equity Index,geometry,Own_count,Rent_count,Unknown_count,Total_count,Rent%,Own%
0,5350001.0,87.8,1.695,31.356,14.286,9.322,13.115,23.077,27.273,27.273,7.5,4.545,0.326619,"MULTIPOLYGON (((-79.33526 43.62681, -79.33561 ...",5,10,0,15,66.666667,33.333333
1,5350002.0,178.0,0.0,1.667,17.143,13.333,15.686,20.0,50.0,50.0,15.789,17.857,0.617931,"MULTIPOLYGON (((-79.38245 43.62556, -79.38200 ...",0,2,0,2,100.0,0.0
2,5350003.0,483.3,14.423,52.381,8.333,6.731,11.364,0.0,0.0,0.0,6.667,12.069,0.214454,"MULTIPOLYGON (((-79.43466 43.63369, -79.43328 ...",3,5,0,8,62.5,37.5
3,5350004.0,18525.3,8.975,50.55,25.0,23.942,31.749,23.611,44.154,32.112,10.704,10.243,0.716577,"MULTIPOLYGON (((-79.43466 43.63369, -79.43566 ...",3,8,0,11,72.727273,27.272727
4,5350005.0,18483.0,6.389,47.697,24.161,17.175,28.473,17.629,30.651,29.119,9.444,9.613,0.558658,"MULTIPOLYGON (((-79.43603 43.63717, -79.43726 ...",3,8,0,11,72.727273,27.272727


In [505]:
#Add population column

popu = pd.read_csv("GIS Analysis/census_variables/population/population.csv")
popu = popu[["GeoUID", "v_CA21_1: Population, 2021"]].copy()
popu = popu.rename(columns={"v_CA21_1: Population, 2021":"Popu_2021"})
popu["GeoUID"] = popu["GeoUID"].astype(str)

In [507]:
def add_length_column(df, source_column, new_column):
    # Create a new column with the length of characters in the source column
    df[new_column] = df[source_column].apply(len)
    
    # For rows where the length is 9, append '0' to the value in the source column
    df.loc[df[new_column] == 9, source_column] = df[source_column] + '0'
    
    return df

popu = add_length_column(popu, "GeoUID", "len")
popu = popu.drop(columns=["len"])

In [509]:
#Joining back to the ct_data table
ct_data = ct_data.join(popu.set_index("GeoUID"), on="id", how="inner")
ct_data.head()

Unnamed: 0,id,PopuDenPerKM,Immigrant%,VM%,1-ParentFam%,MBM%,LIM%,Neet%,%CHN,%Affordable,...,ShortTerm%,Equity Index,geometry,Own_count,Rent_count,Unknown_count,Total_count,Rent%,Own%,Popu_2021
0,5350001.0,87.8,1.695,31.356,14.286,9.322,13.115,23.077,27.273,27.273,...,4.545,0.326619,"MULTIPOLYGON (((-79.33526 43.62681, -79.33561 ...",5,10,0,15,66.666667,33.333333,599
1,5350002.0,178.0,0.0,1.667,17.143,13.333,15.686,20.0,50.0,50.0,...,17.857,0.617931,"MULTIPOLYGON (((-79.38245 43.62556, -79.38200 ...",0,2,0,2,100.0,0.0,604
2,5350003.0,483.3,14.423,52.381,8.333,6.731,11.364,0.0,0.0,0.0,...,12.069,0.214454,"MULTIPOLYGON (((-79.43466 43.63369, -79.43328 ...",3,5,0,8,62.5,37.5,457
3,5350004.0,18525.3,8.975,50.55,25.0,23.942,31.749,23.611,44.154,32.112,...,10.243,0.716577,"MULTIPOLYGON (((-79.43466 43.63369, -79.43566 ...",3,8,0,11,72.727273,27.272727,6306
4,5350005.0,18483.0,6.389,47.697,24.161,17.175,28.473,17.629,30.651,29.119,...,9.613,0.558658,"MULTIPOLYGON (((-79.43603 43.63717, -79.43726 ...",3,8,0,11,72.727273,27.272727,6957


In [511]:
ct_data["Popu_2021"] = ct_data["Popu_2021"].fillna(0)
ct_data["Rent%"] = ct_data["Rent%"].fillna(0)
ct_data["Own%"] = ct_data["Own%"].fillna(0)
ct_data.head()

Unnamed: 0,id,PopuDenPerKM,Immigrant%,VM%,1-ParentFam%,MBM%,LIM%,Neet%,%CHN,%Affordable,...,ShortTerm%,Equity Index,geometry,Own_count,Rent_count,Unknown_count,Total_count,Rent%,Own%,Popu_2021
0,5350001.0,87.8,1.695,31.356,14.286,9.322,13.115,23.077,27.273,27.273,...,4.545,0.326619,"MULTIPOLYGON (((-79.33526 43.62681, -79.33561 ...",5,10,0,15,66.666667,33.333333,599
1,5350002.0,178.0,0.0,1.667,17.143,13.333,15.686,20.0,50.0,50.0,...,17.857,0.617931,"MULTIPOLYGON (((-79.38245 43.62556, -79.38200 ...",0,2,0,2,100.0,0.0,604
2,5350003.0,483.3,14.423,52.381,8.333,6.731,11.364,0.0,0.0,0.0,...,12.069,0.214454,"MULTIPOLYGON (((-79.43466 43.63369, -79.43328 ...",3,5,0,8,62.5,37.5,457
3,5350004.0,18525.3,8.975,50.55,25.0,23.942,31.749,23.611,44.154,32.112,...,10.243,0.716577,"MULTIPOLYGON (((-79.43466 43.63369, -79.43566 ...",3,8,0,11,72.727273,27.272727,6306
4,5350005.0,18483.0,6.389,47.697,24.161,17.175,28.473,17.629,30.651,29.119,...,9.613,0.558658,"MULTIPOLYGON (((-79.43603 43.63717, -79.43726 ...",3,8,0,11,72.727273,27.272727,6957


In [513]:
ct_data = ct_data[[
    'id', 'Popu_2021','PopuDenPerKM', 'Immigrant%', 'VM%', '1-ParentFam%', 'MBM%',
       'LIM%', 'Neet%', '%CHN', '%Affordable', '%ofWP', 'ShortTerm%',
       'Equity Index',  'Own_count','Own%','Rent_count','Rent%',  'Unknown_count','Total_count','geometry'
]].copy()

In [515]:
ct_data.to_csv("GIS Analysis/census_variables/ct/ct_data_v2.csv")