In [24]:
import pandas as pd
from shapely.geometry import Point
import geopandas as gpd
import folium
from geocoding import geocode
import numpy as np

In [8]:
gdf = gpd.read_file('City Wards Data.geojson')

Checking to see if the file loaded into the dataframe properly

In [None]:
gdf.plot()

Loading data samples into dataframes and creating columns for the address

In [7]:
#lets read our sample dataset
transportation_sample = "transporation_sample.csv"
transportation_df = pd.read_csv(transportation_sample)

#handling the null values for postal codes
transportation_df['Postal'] = transportation_df['Postal'].replace(r'^\s*$', '###', regex=True)

#creating new column
transportation_df['Address'] = transportation_df['Street_Num'] + " " +transportation_df['Street_Name'] + " "+ transportation_df['Street_Type'] +" " + transportation_df['Street_Direction'] + " " + transportation_df["Postal"]

print(transportation_df.head())

In [36]:
real_estate_df = pd.read_csv('realestate_sample.csv')

#handling the null values for postal codes
real_estate_df['Postal'] = transportation_df['Postal'].replace(r'^\s*$', '###', regex=True)

#creating new column
real_estate_df['Address'] = real_estate_df['Street_Num'] + " " +real_estate_df['Street_Name'] + " "+ real_estate_df['Street_Type'] +" " + real_estate_df['Street_Direction'] + " " + real_estate_df['Postal']

print(real_estate_df.head())


In [3]:
amenities_df = pd.read_csv('amenities_sample.csv')

#handling the null values for postal codes
amenities_df['Postal'] = amenities_df['Postal'].replace(r'^\s*$', '###', regex=True)

#creating new column
amenities_df['Address'] = amenities_df['Street_Num'] + " " +amenities_df['Street_Name'] + " "+ amenities_df['Street_Type'] +" " + amenities_df['Street_Direction'] +" " + amenities_df['Postal']

print(amenities_df.head())

In [None]:
demolition_df = pd.read_csv("DemolitionDimension_Sample.csv")

print(demolition_df.head())

In [25]:
social_dev_df = pd.read_csv('social_development_sample.csv')
social_dev_df['Postal'] = social_dev_df['Postal'].replace(r'^\s*$', '###', regex=True)
#creating new column
social_dev_df['Address'] = social_dev_df['Street_Num'] + " " +social_dev_df['Street_Name'] + " "+ social_dev_df['Street_Type'] +" " + social_dev_df['Street_Direction'] + " " + social_dev_df['Postal']


Turning Full Addresses into Coordinates using our geocoder

In [None]:
#coordinate extraction
from tqdm.notebook import tqdm  #progress bar
tqdm.pandas()
transportation_df[
    'Coordinates'] = transportation_df['Address'].progress_apply(lambda x: geocode(x))
print(transportation_df.head())

In [30]:
tqdm.pandas()
amenities_df[
    'Coordinates'] = amenities_df['Address'].progress_apply(lambda x: geocode(x))
print(amenities_df.head())

In [None]:
tqdm.pandas()
real_estate_df[
    'Coordinates'] = real_estate_df['Address'].progress_apply(lambda x: geocode(x))
print(real_estate_df['Coordinates'])

In [None]:
tqdm.pandas()
demolition_df[
    'Coordinates'] = demolition_df['Address'].progress_apply(lambda x: geocode(x))
print(real_estate_df['Coordinates'])

In [None]:
from tqdm.notebook import tqdm  #progress bar
tqdm.pandas()
social_dev_df[
    'Coordinates'] = social_dev_df['Address'].progress_apply(lambda x: geocode(x))
print(social_dev_df.head())

MAPPING SECTION -- taking our points and linking them to a ward

In [9]:
#getting our map ready. we need to project the initial GeoJSON onto the folium map
gdf_projected = gdf.to_crs(epsg=4326)
s_index = gdf_projected.sindex
centroid = gdf_projected.geometry.centroid


  centroid = gdf_projected.geometry.centroid


In [10]:
mean_latitude = centroid.y.mean()
mean_longitude = centroid.x.mean()

In [11]:
m = folium.Map(location=[mean_latitude, mean_longitude], zoom_start=10)

In [13]:
#ward mapping function
def ward_mapper(point):
    ward_index = None
    ward_name = None

    if point is not None:
        possible_matches_index = list(s_index.intersection(point.bounds)) #list of possible wards
        found_match = False
        for idx in possible_matches_index: #iterate to find wards
            if gdf['geometry'][idx].contains(point):
                print("Point is contained within geometry at index:", idx)
                # Optionally, you can also print more information about the geometry
                print(gdf.iloc[idx]['AREA_NAME'])
                found_match = True
                ward_name = gdf.iloc[idx]['AREA_NAME']
                ward_index = idx

                break

        if not found_match:
            print(f"Point {point} is not contained in any ward.")
        latitude, longitude = point.y, point.x
        folium.Marker(location=(latitude, longitude), popup='Point').add_to(m)
    return ward_index,ward_name


In [18]:
folium.GeoJson(gdf_projected.to_json()).add_to(m)

In [None]:
# Unpack the result into separate columns in the DataFrame
transportation_df['Ward_Index'], transportation_df['Ward'] = zip(*transportation_df['Coordinates'].apply(ward_mapper))
print(transportation_df.loc[:, ['Coordinates','Ward_Index', 'Ward']])

In [None]:
amenities_df['Ward_Index'], amenities_df['Ward'] = zip(*amenities_df['Coordinates'].apply(ward_mapper))
print(amenities_df.loc[:, ['Coordinates','Ward_Index', 'Ward']])

In [None]:
real_estate_df['Ward_Index'], real_estate_df['Ward'] = zip(*real_estate_df['Coordinates'].apply(ward_mapper))
print(real_estate_df.loc[:, ['Coordinates','Ward_Index', 'Ward']])

In [None]:
demolition_df['Ward_Index'], demolition_df['Ward'] = zip(*demolition_df['Coordinates'].apply(ward_mapper))
print(demolition_df.loc[:, ['Coordinates','Ward_Index', 'Ward']])

In [42]:
social_dev_df['Ward_Index'], social_dev_df['Ward'] = zip(*social_dev_df['Coordinates'].apply(ward_mapper))

Point is contained within geometry at index: 2
Willowdale
Point is contained within geometry at index: 1
York Centre
Point is contained within geometry at index: 17
Eglinton-Lawrence
Point is contained within geometry at index: 18
Don Valley North
Point is contained within geometry at index: 3
University-Rosedale
Point is contained within geometry at index: 13
Parkdale-High Park
Point is contained within geometry at index: 2
Willowdale
Point is contained within geometry at index: 14
Etobicoke North
Point is contained within geometry at index: 20
Davenport
Point is contained within geometry at index: 0
Humber River-Black Creek
Point is contained within geometry at index: 23
Toronto-Danforth
Point is contained within geometry at index: 13
Parkdale-High Park
Point is contained within geometry at index: 21
Toronto Centre
Point is contained within geometry at index: 21
Toronto Centre
Point is contained within geometry at index: 14
Etobicoke North
Point is contained within geometry at index:

In [43]:
social_dev_df.to_csv("Social_Development_Sample_Wards.csv",index=False)

In [None]:
m # to view the points on the map