## City of Toronto Bicycle Network Data

## Setup Notebook

In [1]:
# Import 3rd party libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pylab as plt
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
from IPython.display import display
import geopandas as gpd
from shapely.geometry import Point

# Configure Notebook
%matplotlib inline
plt.style.use('fivethirtyeight')
sns.set_context("notebook")
import warnings
warnings.filterwarnings('ignore')



## Import GeoJson Data

In [2]:
# Import dataset as a GeoDataFrame
bicycle_data = gpd.read_file('cycling-network - 4326.geojson')

bicycle_data.tail()


Unnamed: 0,_id,OBJECTID,SEGMENT_ID,INSTALLED,UPGRADED,PRE_AMALGAMATION,STREET_NAME,FROM_STREET,TO_STREET,ROADCLASS,...,INFRA_HIGHORDER,SEPA_HIGHORDER,SEPB_HIGHORDER,ORIG_HIGHORDER,BYLAWED,EDITOR,LAST_EDIT_DATE,UPGRADE_DESCRIPTION,CONVERTED,geometry
1459,1460,1460,1460,2023,0,,Dundas St W,Beamish Dr,Beamish Dr,,...,Cycle Track,,,,,,2024-06-11 16:40:31,,,"MULTILINESTRING ((-79.53625 43.64000, -79.5362..."
1460,1461,1461,1461,2023,0,,Bloor St W,Beamish Dr,Beamish Dr,,...,Cycle Track,,,,,,2024-06-11 16:40:31,,,"MULTILINESTRING ((-79.53746 43.64136, -79.5372..."
1461,1462,1462,1462,2023,0,,Kipling Ave,St Albans Rd,St Albans Rd,,...,Cycle Track,,,,,,2024-06-11 16:40:31,,,"MULTILINESTRING ((-79.53433 43.63922, -79.5342..."
1462,1463,1463,1463,2023,0,,Dundas St W,Jopling Ave S,Jopling Ave S,,...,Cycle Track,,,,,,2024-06-11 16:40:31,,,"MULTILINESTRING ((-79.53755 43.63906, -79.5376..."
1463,1464,1464,1464,2024,0,,University Ave,Wellington St W,[150m S] Wellington St W,,...,Cycle Track,,,,,,2024-06-11 16:40:31,,,"MULTILINESTRING ((-79.38410 43.64638, -79.3838..."


## Data Analysis

In [3]:
# Check the number of columns and rows
bicycle_data.shape

(1464, 29)

In [4]:
# Check the columns in DataFrame
bicycle_data.columns

Index(['_id', 'OBJECTID', 'SEGMENT_ID', 'INSTALLED', 'UPGRADED',
       'PRE_AMALGAMATION', 'STREET_NAME', 'FROM_STREET', 'TO_STREET',
       'ROADCLASS', 'CNPCLASS', 'SURFACE', 'OWNER', 'DIR_LOWORDER',
       'INFRA_LOWORDER', 'SEPA_LOWORDER', 'SEPB_LOWORDER',
       'ORIG_LOWORDER_INFRA', 'DIR_HIGHORDER', 'INFRA_HIGHORDER',
       'SEPA_HIGHORDER', 'SEPB_HIGHORDER', 'ORIG_HIGHORDER', 'BYLAWED',
       'EDITOR', 'LAST_EDIT_DATE', 'UPGRADE_DESCRIPTION', 'CONVERTED',
       'geometry'],
      dtype='object')

- _id: A unique identifier for each record. Typically used to uniquely reference each row in the dataset.
- OBJECTID: Another unique identifier, often used in geographic or GIS datasets to represent the object in a spatial database. This ID helps to track features in the system.
- SEGMENT_ID:An identifier for a specific segment of road, street, or infrastructure. It likely refers to a portion of a larger infrastructure asset, useful for analysis of road conditions or maintenance.
- INSTALLED:The date or year when the infrastructure (e.g., road, street feature, or installation) was installed. This can be important for asset management and aging infrastructure analysis.
- UPGRADED: Indicates whether the infrastructure has been upgraded, often with a specific date or year of the upgrade. This field is useful for understanding the timeline of infrastructure improvements.
- PRE_AMALGAMATION: A flag or value indicating whether the feature existed before a certain amalgamation event (e.g., before the merging of municipalities, districts, or other administrative boundaries).
- STREET_NAME: The name of the street or road that the feature corresponds to. This is crucial for identifying and mapping locations.
- FROM_STREET: The starting street or intersection for a particular road segment. It helps in defining the segment's geographic location and orientation.
- TO_STREET: The ending street or intersection for the road segment. This, together with the FROM_STREET, helps define the entire segment’s extent.
- ROADCLASS: The classification of the road based on factors like its size, importance, and traffic capacity (e.g., arterial, residential, highway). This can help in traffic planning and infrastructure management.
- CNPCLASS: Likely refers to a specific classification type relevant to the road or asset (could be related to a "CNP" code system, which might relate to "Class of Network Plan" or something similar, depending on local regulations or standards).
- SURFACE:The type of surface material for the road (e.g., asphalt, concrete, gravel). This helps in maintenance planning and determining the durability of the road.
- OWNER: The entity or agency that owns the infrastructure asset. This could refer to municipal, provincial, or private ownership, and is important for responsibility and asset management.
- DIR_LOWORDER: A directional classification, likely referring to the "low order" roads or lanes within a given system (possibly lower-traffic or smaller roads in a hierarchy).
- INFRA_LOWORDER: Indicates infrastructure components that are part of the lower-order (or minor) part of the road system. These could be related to smaller roads, curbs, or other features.
- SEPA_LOWORDER:May refer to a low-order separation feature such as sidewalks or barriers, depending on local definitions. Could be part of a classifying system for features in road infrastructure.
- SEPB_LOWORDER: Similar to SEPA_LOWORDER, this likely refers to another kind of separation feature (e.g., barriers or buffers) associated with lower-order roads.
- ORIG_LOWORDER_INFRA: Refers to the original infrastructure classification for low-order roads or features. This might track historical data or previous classifications before any changes or upgrades were made.
- DIR_HIGHORDER: A directional classification for higher-order roads or lanes (e.g., major highways, arterial roads). These roads typically handle more traffic and are crucial for transportation planning.
- INFRA_HIGHORDER: Refers to infrastructure components associated with high-order roads, such as major roadways, highways, and interchanges.
- SEPA_HIGHORDER: A separation feature for higher-order roads, which could include things like dividers, buffers, or barriers designed for major roadways.
- SEPB_HIGHORDER: Similar to SEPA_HIGHORDER, but might refer to a different type or class of separation feature (e.g., barriers or safety features) for high-order roads.
- ORIG_HIGHORDER: The original classification or state of the high-order infrastructure, before any changes, upgrades, or reclassification.
- BYLAWED:
This field could indicate whether the infrastructure feature is subject to specific bylaws or regulations (e.g., zoning or building codes). It may signify restrictions or requirements tied to the feature.
- EDITOR:The name or ID of the person or entity who last edited or modified the record. This is important for tracking changes and maintaining the integrity of the data.
- LAST_EDIT_DATE:The date when the feature was last modified or updated. It helps track the freshness of the data and when changes were made.
- UPGRADE_DESCRIPTION:A description of any upgrades or improvements that have been made to the infrastructure. This could include road widening, resurfacing, or additions of new features.
- CONVERTED:A flag or value indicating whether the infrastructure feature has been converted to another type, such as from one road class to another or from one material to another.
- geometry:The geometric representation of the infrastructure feature, typically stored as Shapely geometry objects. This column contains spatial data (points, lines, or polygons) that represent the physical location and shape of the asset.

In [5]:
# Drop columns
bicycle_data = bicycle_data.drop(columns=[
    'OBJECTID',               
    'SEGMENT_ID',                  
    'UPGRADED',      
    'PRE_AMALGAMATION',      
    'FROM_STREET',              
    'TO_STREET',               
    'CNPCLASS',               
    'SURFACE',               
    'OWNER',              
    'INFRA_LOWORDER',
    'DIR_LOWORDER',             
    'SEPA_LOWORDER',             
    'SEPB_LOWORDER',        
    'ORIG_LOWORDER_INFRA',          
    'DIR_HIGHORDER',             
    'INFRA_HIGHORDER', 
    'SEPA_HIGHORDER',
    'SEPB_HIGHORDER',
    'ORIG_HIGHORDER',
    'BYLAWED',
    'EDITOR',
    'LAST_EDIT_DATE',
    'UPGRADE_DESCRIPTION',
    'CONVERTED',           
], errors='ignore')

# Check if columns are removed and remaining columns have been renamed
bicycle_data.columns

Index(['_id', 'INSTALLED', 'STREET_NAME', 'ROADCLASS', 'geometry'], dtype='object')

In [6]:
bicycle_data.head()

Unnamed: 0,_id,INSTALLED,STREET_NAME,ROADCLASS,geometry
0,1,2001,Kilbarry Rd,,"MULTILINESTRING ((-79.40351 43.69526, -79.4030..."
1,2,2001,Martin Goodman / Waterfront Trl,,"MULTILINESTRING ((-79.40364 43.63499, -79.4035..."
2,3,2001,Gatineau Hydro Corridor Trl,,"MULTILINESTRING ((-79.27522 43.74158, -79.2751..."
3,4,2001,G Ross Lord Park Trl,,"MULTILINESTRING ((-79.46772 43.77208, -79.4675..."
4,5,2001,Etobicoke Creek Trl,,"MULTILINESTRING ((-79.54812 43.59027, -79.5480..."


In [7]:
# Check data types per column
print(bicycle_data.dtypes)

_id               int64
INSTALLED         int64
STREET_NAME      object
ROADCLASS        object
geometry       geometry
dtype: object


In [8]:
# Check numerical statistics for each column
bicycle_data.describe()

Unnamed: 0,_id,INSTALLED
count,1464.0,1464.0
mean,732.5,1987.971311
std,422.76471,202.477544
min,1.0,0.0
25%,366.75,2001.0
50%,732.5,2006.0
75%,1098.25,2015.0
max,1464.0,2024.0


In [11]:
# Check for missing values
print(bicycle_data.isnull().sum())

# Drop rows with any NaN values
bicycle_data_clean = bicycle_data.dropna()

# Verify the result by checking the number of missing values after dropping NaNs
print(bicycle_data_clean.isnull().sum())


_id            0
INSTALLED      0
STREET_NAME    0
ROADCLASS      0
geometry       0
dtype: int64
_id            0
INSTALLED      0
STREET_NAME    0
ROADCLASS      0
geometry       0
dtype: int64


In [13]:
bicycle_data_clean.shape

(1464, 5)

In [14]:
# View DataFrame
bicycle_data_clean.tail()

Unnamed: 0,_id,INSTALLED,STREET_NAME,ROADCLASS,geometry
1459,1460,2023,Dundas St W,,"MULTILINESTRING ((-79.53625 43.64000, -79.5362..."
1460,1461,2023,Bloor St W,,"MULTILINESTRING ((-79.53746 43.64136, -79.5372..."
1461,1462,2023,Kipling Ave,,"MULTILINESTRING ((-79.53433 43.63922, -79.5342..."
1462,1463,2023,Dundas St W,,"MULTILINESTRING ((-79.53755 43.63906, -79.5376..."
1463,1464,2024,University Ave,,"MULTILINESTRING ((-79.38410 43.64638, -79.3838..."


In [19]:
# Step 3: Count the total number of rows for each year
installation_count_per_year = bicycle_data.groupby('INSTALLED').size()

# Step 4: Count how many rows have been installed (non-null INSTALLED values) for each year
installed_count_per_year = bicycle_data[bicycle_data['INSTALLED'].notnull()].groupby('INSTALLED').size()

# Step 5: Calculate the percentage of bicycle lanes installed each year
installation_percentage = (installed_count_per_year / installation_count_per_year) * 100

# Step 6: Combine the counts and percentage into a single DataFrame
installation_stats = pd.DataFrame({
    'Number_Installed': installed_count_per_year,
    'Percentage_Installed': installation_percentage
}).reset_index()

# Step 7: Display the top rows using .head() to show the table
installation_stats.head()


Unnamed: 0,INSTALLED,Number_Installed,Percentage_Installed
0,1970-01-01 00:00:00.000000000,15,100.0
1,1970-01-01 00:00:00.000002001,539,100.0
2,1970-01-01 00:00:00.000002002,12,100.0
3,1970-01-01 00:00:00.000002003,15,100.0
4,1970-01-01 00:00:00.000002004,20,100.0


In [None]:
output_path = "../Clean Data/wards_data.geojson"
wards_data.to_file(output_path, driver="GeoJSON")