In [5]:
# 3rd Party Imports
import pandas as pd
import seaborn as sns
from datetime import datetime
import matplotlib.pyplot as plt 
import numpy as np
import matplotlib as plt
import geopandas as gpd
from fuzzywuzzy import fuzz, process

# Configure Notebook
%matplotlib inline
plt.style.use('fivethirtyeight')
sns.set_context("notebook")
import warnings
warnings.filterwarnings('ignore')
from pathlib import Path


In [None]:
# Import eqao, osslt, and highschool data
current_dir = Path.cwd()
parent_dir = current_dir.parent

eqao = gpd.read_file(parent_dir / "data" / "csv" / "EQAO_final.csv")
osslt = gpd.read_file(parent_dir / "data" / "csv" / "OSSLT_final.csv")
high = gpd.read_file(parent_dir / "data" / "csv" / "Highschool_final.csv")

In [86]:
# Rename Columns
eqao.columns = ['EQAO_' + col if col != 'School Name' else col for col in eqao.columns]
osslt.columns = ['OSSLT_' + col if col != 'School Name' else col for col in osslt.columns]

In [87]:
# suffixes = ['Secondary School', "Collegiate Institute", "Technical School", "Academy", "Collegiate and Technical Institute", "High School", "School of the Arts", "Technical-Commercial School"]

# Normalize the 'School_Name' in df_ranking by removing the suffixes
# for suffix in suffixes:
#     eqao['School Name'] = eqao['School Name'].str.replace(suffix, '', regex=False)
#     osslt['School Name'] = osslt['School Name'].str.replace(suffix, '', regex=False)

# Strip any leading/trailing spaces after removing suffixes
# eqao['School Name'] = eqao['School Name'].str.strip()
# osslt['School Name'] = osslt['School Name'].str.strip()

# Merge the dataframes (assuming the normalized 'School_Name' is the key)
merged_df = pd.merge(eqao, osslt, on='School Name')

# Display the merged dataframe
merged_df

Unnamed: 0,School Name,EQAO_Total # of\nStudents,EQAO_# of\nParticipating\nStudents,EQAO_# of\nStudents at\nLevels 3/4,EQAO_% at Levels\n3/4,OSSLT_field_2,OSSLT_field_3,OSSLT_Number of\nPreviously\nEligible\nStudents,OSSLT_field_5,OSSLT_field_6,...,OSSLT_field_15,OSSLT_Number of\nFully\nParticipating\nStudents,OSSLT_field_17,OSSLT_field_18,OSSLT_Successful\n(Fully\nParticipating),OSSLT_field_20,OSSLT_field_21,OSSLT_Not\nSuccessful\n(Fully\nParticipating),OSSLT_field_23,OSSLT_field_24
0,A Y Jackson Secondary School,112,103,96,93%,,,312,,,...,,286,,,88%,,,12%,,
1,Agincourt Collegiate Institute,184,172,125,73%,,,388,,,...,,381,,,94%,,,6%,,
2,Albert Campbell Collegiate Institute,169,158,100,63%,,,261,,,...,,231,,,88%,,,12%,,
3,Birchmount Park Collegiate Institute,104,87,28,32%,,,169,,,...,,128,,,79%,,,21%,,
4,Bloor Collegiate Institute,78,75,59,79%,,,197,,,...,,189,,,94%,,,6%,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,Wexford Collegiate School for the Arts,118,107,45,42%,257,,,4%,,...,,,86%,,,14%,,,,
60,Winston Churchill Collegiate Institute,64,54,11,20%,173,,,16%,,...,,,70%,,,30%,,,,
61,Woburn Collegiate Institute,92,83,49,59%,247,,,5%,,...,,,82%,,,18%,,,,
62,York Memorial Collegiate Institute,67,63,21,33%,210,,,<1%,,...,,,83%,,,17%,,,,


In [88]:
# Example dataframes (replace these with your actual data)
# merged_df = pd.DataFrame({'School Name': ['School A', 'School B', 'School C']})
# high = pd.DataFrame({'School Name': ['Sch A', 'Sch B', 'Sch D']})

# Extract the 'School Name' columns
merged_schools = merged_df['School Name'].str.lower()  # Convert to lowercase for case-insensitive comparison
high_schools = high['School Name'].str.lower()  # Convert to lowercase for case-insensitive comparison

# Find common schools (partial match in high within full names in merged_df)
common_schools = []
for partial_name in high_schools:
    matches = merged_schools[merged_schools.str.contains(partial_name, case=False, na=False)]
    common_schools.extend(matches.tolist())

# Find schools in merged_df but not in high
merged_only_schools = merged_schools[~merged_schools.isin(common_schools)].tolist()

# Find schools in high but not in merged_df (these are the partial names with no full match)
high_only_schools = high_schools[~high_schools.isin([name.lower() for name in common_schools])].tolist()

# Print the results
print("Common Schools (partial match in high within full names in merged_df):", common_schools)
print("Schools only in merged_df:", merged_only_schools)
print("Schools only in high:", high_only_schools)

Common Schools (partial match in high within full names in merged_df): ['a y jackson secondary school', 'agincourt collegiate institute', 'albert campbell collegiate institute', 'birchmount park collegiate institute', 'bloor collegiate institute', 'c w jefferys collegiate institute', 'cedarbrae collegiate institute', 'central technical school', 'central toronto academy', 'don mills collegiate institute', 'downsview secondary school', 'dr norman bethune collegiate institute', 'earl haig secondary school', 'east york collegiate institute', 'etobicoke collegiate institute', 'etobicoke school of the arts', 'forest hill collegiate institute', 'george s henry academy', 'georges vanier secondary school', 'harbord collegiate institute', 'humberside collegiate institute', 'jarvis collegiate institute', 'john polanyi collegiate institute', 'kipling collegiate institute', "l'amoreaux collegiate institute", 'lakeshore collegiate institute', 'lawrence park collegiate institute', 'leaside high schoo

In [89]:
# Problem Schools
# Greenwood Secondary School: Not included as only for Immigrant Students
# Heydon Park Secondary School: Not included as only for Young Women, Transgender and Non-Binary Students
# Native Learning Centre East: Not included as only for Indigeneous Students
# Rosedale Heights School of the Arts:
# SATEC @ WA Porter Collegiate Institute:
# Wexford Collegiate School for the Arts:

In [90]:
# Extract the 'School Name' columns
merged_schools = merged_df['School Name'].str.lower()  # Convert to lowercase for case-insensitive comparison
high_schools = high['School Name'].str.lower()  # Convert to lowercase for case-insensitive comparison

# Manually add the shortened names for special schools to the high_schools list
high_schools = high_schools.tolist() 

# Find common schools (partial match in high within full names in merged_df)
common_schools = []
for partial_name in high_schools:
    matches = merged_schools[merged_schools.str.contains(partial_name, case=False, na=False)]
    common_schools.extend(matches.tolist())

# Find schools in merged_df but not in high
merged_only_schools = merged_schools[~merged_schools.isin(common_schools)].tolist()

# Find schools in high but not in merged_df (these are the partial names with no full match)
# We need to check that each name in high_schools is not in the common_schools
high_only_schools = [name for name in high_schools if name.lower() not in [s.lower() for s in common_schools]]

# Add a new "Partial Name" column in merged_df
merged_df['Partial Name'] = merged_df['School Name'].apply(lambda x: next((name for name in high_schools if name.lower() in x.lower()), None))

# Add a new "Partial Name" column in high
high['Partial Name'] = high['School Name'].apply(lambda x: next((name for name in merged_df['School Name'] if name.lower() in x.lower()), None))

# Print the results
print("Common Schools (partial match in high within full names in merged_df):", common_schools)
print("Schools only in merged_df:", merged_only_schools)
print("Schools only in high:", high_only_schools)

high["School Name"]


Common Schools (partial match in high within full names in merged_df): ['a y jackson secondary school', 'agincourt collegiate institute', 'albert campbell collegiate institute', 'birchmount park collegiate institute', 'bloor collegiate institute', 'c w jefferys collegiate institute', 'cedarbrae collegiate institute', 'central technical school', 'central toronto academy', 'don mills collegiate institute', 'downsview secondary school', 'dr norman bethune collegiate institute', 'earl haig secondary school', 'east york collegiate institute', 'etobicoke collegiate institute', 'etobicoke school of the arts', 'forest hill collegiate institute', 'george s henry academy', 'georges vanier secondary school', 'harbord collegiate institute', 'humberside collegiate institute', 'jarvis collegiate institute', 'john polanyi collegiate institute', 'kipling collegiate institute', "l'amoreaux collegiate institute", 'lakeshore collegiate institute', 'lawrence park collegiate institute', 'leaside high schoo

0            A Y Jackson
1              Agincourt
2        Albert Campbell
3      Archbishop Romero
4        Birchmount Park
             ...        
100    Winston Churchill
101               Woburn
102        York Memorial
103           York Mills
104        Étienne-Brûlé
Name: School Name, Length: 105, dtype: object

In [91]:
special_schools = ['rosedale heights-arts', 'w a porter', 'wexford collegiate-arts']
merged_df.loc[merged_df['School Name'] == 'Rosedale Heights School of the Arts', 'Partial Name'] = 'rosedale heights-arts'
merged_df.loc[merged_df['School Name'] == 'SATEC @ WA Porter Collegiate Institute', 'Partial Name'] = 'w a porter'
merged_df.loc[merged_df['School Name'] == 'Wexford Collegiate School for the Arts', 'Partial Name'] = 'wexford collegiate-arts'

In [92]:
# Ensure both columns are strings and convert to lowercase, handling NaNs
merged_df["Partial Name"] = merged_df["Partial Name"].fillna("").astype(str).str.lower()
high["School Name"] = high["School Name"].fillna("").astype(str).str.lower()

# Perform a left merge, keeping all rows from merged_df and adding matching rows from high
merged_result = pd.merge(merged_df, high, how='left', left_on='Partial Name', right_on='School Name')

# Optional: Drop the 'School Name' column from the merged result if you don't need it
# merged_result = merged_result.drop(columns=['School Name'])

# Display the merged dataframe
merged_result

Unnamed: 0,School Name_x,EQAO_Total # of\nStudents,EQAO_# of\nParticipating\nStudents,EQAO_# of\nStudents at\nLevels 3/4,EQAO_% at Levels\n3/4,OSSLT_field_2,OSSLT_field_3,OSSLT_Number of\nPreviously\nEligible\nStudents,OSSLT_field_5,OSSLT_field_6,...,OSSLT_field_20,OSSLT_field_21,OSSLT_Not\nSuccessful\n(Fully\nParticipating),OSSLT_field_23,OSSLT_field_24,Partial Name_x,School Name_y,Score,Rank,Partial Name_y
0,A Y Jackson Secondary School,112,103,96,93%,,,312,,,...,,,12%,,,a y jackson,a y jackson,7.9,77/689,
1,Agincourt Collegiate Institute,184,172,125,73%,,,388,,,...,,,6%,,,agincourt,agincourt,8.3,41/689,
2,Albert Campbell Collegiate Institute,169,158,100,63%,,,261,,,...,,,12%,,,albert campbell,albert campbell,7,210/689,
3,Birchmount Park Collegiate Institute,104,87,28,32%,,,169,,,...,,,21%,,,birchmount park,birchmount park,5.2,496/689,
4,Bloor Collegiate Institute,78,75,59,79%,,,197,,,...,,,6%,,,bloor,bloor,8.5,26/689,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,Wexford Collegiate School for the Arts,118,107,45,42%,257,,,4%,,...,14%,,,,,wexford collegiate-arts,wexford collegiate-arts,6.6,273/689,
60,Winston Churchill Collegiate Institute,64,54,11,20%,173,,,16%,,...,30%,,,,,winston churchill,winston churchill,3.4,628/689,
61,Woburn Collegiate Institute,92,83,49,59%,247,,,5%,,...,18%,,,,,woburn,woburn,6.9,227/689,
62,York Memorial Collegiate Institute,67,63,21,33%,210,,,<1%,,...,17%,,,,,york memorial,york memorial,6.7,260/689,


In [93]:
merged_result = merged_result.drop(columns=['Partial Name_y'])
merged_result = merged_result.dropna()
merged_result


Unnamed: 0,School Name_x,EQAO_Total # of\nStudents,EQAO_# of\nParticipating\nStudents,EQAO_# of\nStudents at\nLevels 3/4,EQAO_% at Levels\n3/4,OSSLT_field_2,OSSLT_field_3,OSSLT_Number of\nPreviously\nEligible\nStudents,OSSLT_field_5,OSSLT_field_6,...,OSSLT_Successful\n(Fully\nParticipating),OSSLT_field_20,OSSLT_field_21,OSSLT_Not\nSuccessful\n(Fully\nParticipating),OSSLT_field_23,OSSLT_field_24,Partial Name_x,School Name_y,Score,Rank
0,A Y Jackson Secondary School,112,103,96,93%,,,312,,,...,88%,,,12%,,,a y jackson,a y jackson,7.9,77/689
1,Agincourt Collegiate Institute,184,172,125,73%,,,388,,,...,94%,,,6%,,,agincourt,agincourt,8.3,41/689
2,Albert Campbell Collegiate Institute,169,158,100,63%,,,261,,,...,88%,,,12%,,,albert campbell,albert campbell,7,210/689
3,Birchmount Park Collegiate Institute,104,87,28,32%,,,169,,,...,79%,,,21%,,,birchmount park,birchmount park,5.2,496/689
4,Bloor Collegiate Institute,78,75,59,79%,,,197,,,...,94%,,,6%,,,bloor,bloor,8.5,26/689
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,Wexford Collegiate School for the Arts,118,107,45,42%,257,,,4%,,...,,14%,,,,,wexford collegiate-arts,wexford collegiate-arts,6.6,273/689
60,Winston Churchill Collegiate Institute,64,54,11,20%,173,,,16%,,...,,30%,,,,,winston churchill,winston churchill,3.4,628/689
61,Woburn Collegiate Institute,92,83,49,59%,247,,,5%,,...,,18%,,,,,woburn,woburn,6.9,227/689
62,York Memorial Collegiate Institute,67,63,21,33%,210,,,<1%,,...,,17%,,,,,york memorial,york memorial,6.7,260/689


In [94]:
merged_result.to_csv("combined_schools.csv", index = False)

PermissionError: [Errno 13] Permission denied: 'combined_schools.csv'

In [None]:
# simple school map
import pandas as pd
import folium
import geopandas as gpd
from shapely.geometry import Point
import numpy as np

# Read the data and clean it
data = pd.read_csv("combined_schools_with_cords.csv")
data.columns = data.columns.str.replace('\r\n', ' ', regex=True)

# Ensure no missing coordinates
data = data.dropna(subset=['latitude', 'longitude'])

# Convert to numeric
data['latitude'] = pd.to_numeric(data['latitude'], errors='coerce')
data['longitude'] = pd.to_numeric(data['longitude'], errors='coerce')

# Check if there are any missing coordinates after conversion
data = data.dropna(subset=['latitude', 'longitude'])

# Check the first few rows of the data to confirm lat/long are correct
print(data[['School Name_x', 'latitude', 'longitude']].head())

# Create GeoDataFrame
geometry = [Point(xy) for xy in zip(data['longitude'], data['latitude'])]
gdf = gpd.GeoDataFrame(data, geometry=geometry)

# Set CRS to EPSG:4326 (WGS84)
gdf.set_crs("EPSG:4326", allow_override=True, inplace=True)

# Create the base map centered around Toronto
m = folium.Map(location=[43.7, -79.4], zoom_start=11)

# Add Circle Markers for each school
for index, row in data.iterrows():
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=5,
        color="blue",
        fill=True,
        fill_color="blue",
        fill_opacity=0.6,
        popup=f"School: {row['School Name_x']}<br>EQAO: {row['EQAO_% at Levels 3/4']}<br>OSSLT: {row['OSSLT_Successful (Fully Participating)']}<br>Score: {row['Score']}"
    ).add_to(m)

folium.LayerControl().add_to(m)

# Display the map
m


                          School Name_x   latitude  longitude
0          A Y Jackson Secondary School  43.804716 -79.367003
1        Agincourt Collegiate Institute  43.789069 -79.278470
2  Albert Campbell Collegiate Institute  43.810001 -79.273357
3  Birchmount Park Collegiate Institute  43.697853 -79.259599
5     C W Jefferys Collegiate Institute  43.758312 -79.500646


In [103]:
import folium
import pandas as pd
import numpy as np
import branca  # Import branca for color scales
from folium import CircleMarker
from shapely.geometry import Point
data = pd.read_csv("combined_schools_with_cords.csv")
data.columns = data.columns.str.replace('\r\n', ' ', regex=True)
data = data.dropna(subset=['latitude', 'longitude'])

# Convert to numeric and drop rows with invalid data
data['latitude'] = pd.to_numeric(data['latitude'], errors='coerce')
data['longitude'] = pd.to_numeric(data['longitude'], errors='coerce')
data = data.dropna(subset=['latitude', 'longitude'])

# Convert the relevant columns to numeric (EQAO, OSSLT, Score)
data["EQAO_% at Levels 3/4"] = pd.to_numeric(data["EQAO_% at Levels 3/4"], errors='coerce')
data["OSSLT_Successful (Fully Participating)"] = pd.to_numeric(data["OSSLT_Successful (Fully Participating)"], errors='coerce')
data["Score"] = pd.to_numeric(data["Score"], errors='coerce')

# Function to calculate color gradient based on value using branca
def calculate_color(value, min_val, max_val):
    """
    Calculate a color based on the value, using a color gradient from 'YlOrRd' (Yellow -> Orange -> Red).
    """
    # Ensure that min_val and max_val are in ascending order
    min_val, max_val = sorted([min_val, max_val])  # Sort the values to avoid issues with inverted ranges
    
    # Normalize value between 0 and 1
    normalized_value = (value - min_val) / (max_val - min_val)
    
    # Use branca to apply a color scale (YlOrRd)
    color_scale = branca.colormap.LinearColormap(
        ['yellow', 'orange', 'red'], vmin=min_val, vmax=max_val
    )
    color = color_scale(normalized_value)  # Get the color based on the normalized value
    return color

# Function to add CircleMarkers for each school
def add_circle_markers(map_obj, data, column, min_val, max_val):
    """
    Add CircleMarkers for each school to the map based on the specified column values.
    """
    for _, row in data.iterrows():
        value = row[column]
        
        # Only continue if the value is valid (not NaN)
        if pd.isna(value):
            continue
        
        # Calculate the color based on the value
        color = calculate_color(value, min_val, max_val)
        
        # Use radius to represent the magnitude of the value (larger score, larger radius)
        radius = 5 + (value - min_val) / (max_val - min_val) * 30  # Adjust the radius scale as needed
        
        # Create the CircleMarker
        CircleMarker(
            location=[row['latitude'], row['longitude']],
            radius=radius,
            color=color,
            fill=True,
            fill_color=color,
            fill_opacity=0.7,
            popup=f"School: {row['School Name_x']}<br>{column}: {value:.2f}"
        ).add_to(map_obj)

# Create the map centered on a specific location (e.g., Toronto)
m = folium.Map(location=[43.7, -79.4], zoom_start=11)

# Choose the metric for which you want to visualize the color-filled circles
metric_column = "EQAO_% at Levels 3/4"  # You can replace this with "OSSLT_Successful (Fully Participating)" or "Score"

# Find the min and max values for the chosen column (for the color scaling)
min_val = data[metric_column].min()
max_val = data[metric_column].max()

# Ensure that the min and max are valid (i.e., not NaN or invalid)
if pd.isna(min_val) or pd.isna(max_val) or min_val == max_val:
    raise ValueError(f"Invalid min or max values: min_val = {min_val}, max_val = {max_val}")

# Add CircleMarkers for each school, with the color based on the selected metric
add_circle_markers(m, data, metric_column, min_val, max_val)

# Optional: Add a custom legend for clarity
def add_gradient_legend(map_obj, min_val, max_val):
    legend_html = f'''
        <div style="position: absolute; top: 10px; left: 10px; background-color: white; 
        padding: 10px; border-radius: 5px; border: 2px solid black; z-index: 9999;">
            <b>{metric_column} Legend</b><br>
            <i style="background: {branca.colormap.LinearColormap(['yellow', 'orange', 'red'], vmin=min_val, vmax=max_val)(0)}; width: 20px; height: 20px; display: inline-block;"></i> Low<br>
            <i style="background: {branca.colormap.LinearColormap(['yellow', 'orange', 'red'], vmin=min_val, vmax=max_val)(0.5)}; width: 20px; height: 20px; display: inline-block;"></i> Medium<br>
            <i style="background: {branca.colormap.LinearColormap(['yellow', 'orange', 'red'], vmin=min_val, vmax=max_val)(1)}; width: 20px; height: 20px; display: inline-block;"></i> High
        </div>
    '''
    map_obj.get_root().html.add_child(folium.Element(legend_html))

# Add the gradient legend to the map
add_gradient_legend(m, min_val, max_val)

# Display the map
m

ValueError: Invalid min or max values: min_val = nan, max_val = nan

In [99]:
data = pd.read_csv("combined_schools_with_cords.csv")
data.columns = data.columns.str.replace('\r\n', ' ', regex=True)
data = data.dropna(subset=['latitude', 'longitude'])

# Convert to numeric and drop rows with invalid data
data['latitude'] = pd.to_numeric(data['latitude'], errors='coerce')
data['longitude'] = pd.to_numeric(data['longitude'], errors='coerce')
data = data.dropna(subset=['latitude', 'longitude'])

# Convert the relevant columns to numeric (EQAO, OSSLT, Score)
data["EQAO_% at Levels 3/4"] = pd.to_numeric(data["EQAO_% at Levels 3/4"], errors='coerce')
data["OSSLT_Successful (Fully Participating)"] = pd.to_numeric(data["OSSLT_Successful (Fully Participating)"], errors='coerce')
data["Score"] = pd.to_numeric(data["Score"], errors='coerce')

# Create GeoDataFrame with geometry
geometry = [Point(xy) for xy in zip(data['longitude'], data['latitude'])]
gdf = gpd.GeoDataFrame(data, geometry=geometry)

# Set CRS to EPSG:4326 (WGS84)
gdf.set_crs("EPSG:4326", allow_override=True, inplace=True)

# Function to calculate bins for choropleth
def calculate_bins(data, column):
    """
    Calculate evenly spaced bins for a given column in a DataFrame.
    """
    col_data = data[column].dropna()
    min_val, max_val = col_data.min(), col_data.max()
    bins = np.linspace(min_val, max_val, 6).tolist()  # Adjust number of bins
    return bins

# Function to add a choropleth layer with custom color gradient
def add_choropleth(map_obj, geo_data, name, column):
    bins = calculate_bins(geo_data, column)
    
    choropleth_layer = folium.Choropleth(
        geo_data=geo_data.to_json(),
        name=name,
        data=geo_data,
        columns=["School Name_x", column],
        key_on="feature.properties.School Name_x",
        fill_color="YlOrRd",  # Choose color gradient for fill
        fill_opacity=0.7,
        line_opacity=0.2,
        bins=bins,
        legend_name=name
    ).add_to(map_obj)

# Create the map centered on a specific location (e.g., Toronto)
m = folium.Map(location=[43.7, -79.4], zoom_start=11)

# Add choropleth layers for each ranking (EQAO, OSSLT, Score)
add_choropleth(m, gdf, "EQAO_% at Levels 3/4", "EQAO_% at Levels 3/4")
add_choropleth(m, gdf, "OSSLT_Successful (Fully Participating)", "OSSLT_Successful (Fully Participating)")
add_choropleth(m, gdf, "Score", "Score")

# Add Layer Control to toggle between layers
folium.LayerControl().add_to(m)

# Function to add a custom gradient legend to the map
def add_gradient_legend(map_obj):
    legend_html = '''
        <div style="position: absolute; top: 10px; left: 10px; background-color: white; 
        padding: 10px; border-radius: 5px; border: 2px solid black; z-index: 9999;">
            <b>Color Gradient Legend</b><br>
            <i style="background: #f7f4f9; width: 20px; height: 20px; display: inline-block;"></i> Low<br>
            <i style="background: #f1ebd7; width: 20px; height: 20px; display: inline-block;"></i> Moderate<br>
            <i style="background: #e7c6bc; width: 20px; height: 20px; display: inline-block;"></i> High<br>
            <i style="background: #e7a3b8; width: 20px; height: 20px; display: inline-block;"></i> Very High<br>
            <i style="background: #d8002f; width: 20px; height: 20px; display: inline-block;"></i> Highest
        </div>
    '''
    map_obj.get_root().html.add_child(folium.Element(legend_html))

# Add the custom gradient legend to the map
add_gradient_legend(m)

# Display the map
m

In [98]:
data = pd.read_csv("combined_schools_with_cords.csv")
data.columns = data.columns.str.replace('\r\n', ' ', regex=True)
data = data.dropna(subset=['latitude', 'longitude'])

# Convert to numeric and drop rows with invalid data
data['latitude'] = pd.to_numeric(data['latitude'], errors='coerce')
data['longitude'] = pd.to_numeric(data['longitude'], errors='coerce')
data = data.dropna(subset=['latitude', 'longitude'])

# Convert the relevant columns to numeric (EQAO, OSSLT, Score)
data["EQAO_% at Levels 3/4"] = pd.to_numeric(data["EQAO_% at Levels 3/4"], errors='coerce')
data["OSSLT_Successful (Fully Participating)"] = pd.to_numeric(data["OSSLT_Successful (Fully Participating)"], errors='coerce')
data["Score"] = pd.to_numeric(data["Score"], errors='coerce')

# Create GeoDataFrame with geometry
geometry = [Point(xy) for xy in zip(data['longitude'], data['latitude'])]
gdf = gpd.GeoDataFrame(data, geometry=geometry)

# Set CRS to EPSG:4326 (WGS84)
gdf.set_crs("EPSG:4326", allow_override=True, inplace=True)

# Create the map centered on a specific location (e.g., Toronto)
m = folium.Map(location=[43.7, -79.4], zoom_start=11)

# Function to add a heatmap layer with custom shades and gradient
def add_heatmap(map_obj, gdf, column, name):
    # Drop rows with NaN values in the specified column
    gdf_no_na = gdf.dropna(subset=[column])
    
    # Extract coordinates and values for the heatmap
    heat_data = [[row['latitude'], row['longitude'], row[column]] for index, row in gdf_no_na.iterrows()]
    
    # Define the gradient for the heatmap (custom color scale)
    gradient = {0.0: 'blue', 0.2: 'green', 0.4: 'yellow', 0.6: 'orange', 0.8: 'red', 1.0: 'darkred'}
    
    # Create a heatmap layer
    HeatMap(heat_data, name=name, radius=15, max_zoom=13, gradient=gradient).add_to(map_obj)

# Add heatmap layers for each ranking (EQAO, OSSLT, Score)
add_heatmap(m, gdf, "EQAO_% at Levels 3/4", "EQAO Heatmap")
add_heatmap(m, gdf, "OSSLT_Successful (Fully Participating)", "OSSLT Heatmap")
add_heatmap(m, gdf, "Score", "School Score Heatmap")

# Add Layer Control for toggling between heatmaps
folium.LayerControl().add_to(m)

# Function to add a custom legend for the heatmap
def add_legend(map_obj):
    legend_html = '''
        <div style="position: absolute; top: 10px; left: 10px; background-color: white; 
        padding: 10px; border-radius: 5px; border: 2px solid black; z-index: 9999;">
            <b>Heatmap Legend</b><br>
            <i style="background: blue; width: 20px; height: 20px; display: inline-block;"></i> 0-20%<br>
            <i style="background: green; width: 20px; height: 20px; display: inline-block;"></i> 20-40%<br>
            <i style="background: yellow; width: 20px; height: 20px; display: inline-block;"></i> 40-60%<br>
            <i style="background: orange; width: 20px; height: 20px; display: inline-block;"></i> 60-80%<br>
            <i style="background: red; width: 20px; height: 20px; display: inline-block;"></i> 80-100%<br>
            <i style="background: darkred; width: 20px; height: 20px; display: inline-block;"></i> 100%+
        </div>
    '''
    map_obj.get_root().html.add_child(folium.Element(legend_html))

# Adding the custom legend to the map
add_legend(m)

# Display the map
m

In [95]:
import folium
import pandas as pd
import geopandas as gpd
import numpy as np
from folium.plugins import HeatMap
from shapely.geometry import Point

# Load data
data = pd.read_csv("combined_schools_with_cords.csv")
data.columns = data.columns.str.replace('\r\n', ' ', regex=True)
data = data.dropna(subset=['latitude', 'longitude'])

# Convert to numeric and drop rows with invalid data
data['latitude'] = pd.to_numeric(data['latitude'], errors='coerce')
data['longitude'] = pd.to_numeric(data['longitude'], errors='coerce')
data = data.dropna(subset=['latitude', 'longitude'])

# Convert the relevant columns to numeric (EQAO, OSSLT, Score)
data["EQAO_% at Levels 3/4"] = pd.to_numeric(data["EQAO_% at Levels 3/4"], errors='coerce')
data["OSSLT_Successful (Fully Participating)"] = pd.to_numeric(data["OSSLT_Successful (Fully Participating)"], errors='coerce')
data["Score"] = pd.to_numeric(data["Score"], errors='coerce')

# Create GeoDataFrame with geometry
geometry = [Point(xy) for xy in zip(data['longitude'], data['latitude'])]
gdf = gpd.GeoDataFrame(data, geometry=geometry)

# Set CRS to EPSG:4326 (WGS84)
gdf.set_crs("EPSG:4326", allow_override=True, inplace=True)

# Create the map centered on a specific location (e.g., Toronto)
m = folium.Map(location=[43.7, -79.4], zoom_start=11)

# Function to add a heatmap layer
def add_heatmap(map_obj, gdf, column, name):
    # Drop rows with NaN values in the specified column
    gdf_no_na = gdf.dropna(subset=[column])
    
    # Extract coordinates and values for the heatmap
    heat_data = [[row['latitude'], row['longitude'], row[column]] for index, row in gdf_no_na.iterrows()]
    
    # Create a heatmap layer
    HeatMap(heat_data, name=name, radius=15, max_zoom=13).add_to(map_obj)

# Add heatmap layers for each ranking (EQAO, OSSLT, Score)
add_heatmap(m, gdf, "EQAO_% at Levels 3/4", "EQAO Heatmap")
add_heatmap(m, gdf, "OSSLT_Successful (Fully Participating)", "OSSLT Heatmap")
add_heatmap(m, gdf, "Score", "School Score Heatmap")

# Add Layer Control for toggling between heatmaps
folium.LayerControl().add_to(m)

# Display the map
m

In [None]:
import pandas as pd
import folium
import numpy as np
import geopandas as gpd
from shapely.geometry import Point

data = pd.read_csv("combined_schools_with_cords.csv")
data.columns = data.columns.str.replace('\r\n', ' ', regex=True)
data = data.dropna(subset=['latitude', 'longitude'])

# Convert to numeric and drop rows with invalid data
data['latitude'] = pd.to_numeric(data['latitude'], errors='coerce')
data['longitude'] = pd.to_numeric(data['longitude'], errors='coerce')
data = data.dropna(subset=['latitude', 'longitude'])

# Convert the relevant columns to numeric (EQAO, OSSLT, Score)
data["EQAO_% at Levels 3/4"] = pd.to_numeric(data["EQAO_% at Levels 3/4"], errors='coerce')
data["OSSLT_Successful (Fully Participating)"] = pd.to_numeric(data["OSSLT_Successful (Fully Participating)"], errors='coerce')
data["Score"] = pd.to_numeric(data["Score"], errors='coerce')

# Create GeoDataFrame with geometry
geometry = [Point(xy) for xy in zip(data['longitude'], data['latitude'])]
gdf = gpd.GeoDataFrame(data, geometry=geometry)

# Set CRS to EPSG:4326 (WGS84)
gdf.set_crs("EPSG:4326", allow_override=True, inplace=True)

# Function to calculate bins for choropleth
def calculate_bins(gdf, column):
    col_data = gdf[column].dropna()
    min_val, max_val = col_data.min(), col_data.max()
    bins = np.linspace(min_val, max_val, 6).tolist()
    return bins

# Function to add choropleth layer to the map
def add_choropleth(map_obj, gdf, name, column):
    bins = calculate_bins(gdf, column)
    geo_json_data = gdf.to_json()
    
    choropleth = folium.Choropleth(
        geo_data=geo_json_data,
        name=name,  # Adding a name for this layer
        data=gdf,
        columns=["School Name_x", column],
        key_on="feature.properties.School Name_x",
        fill_color="YlOrRd",
        fill_opacity=0.7,
        line_opacity=0.2,
        bins=bins,
        legend_name=name
    ).add_to(map_obj)
    
    return choropleth

# Create the map (with markers and choropleth)
m = folium.Map(location=[43.7, -79.4], zoom_start=11)

# Add CircleMarkers for individual schools (check if markers appear)
for index, row in data.iterrows():
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=5,
        color="blue",
        fill=True,
        fill_color="blue",
        fill_opacity=0.6,
        popup=f"School: {row['School Name_x']}<br>EQAO: {row['EQAO_% at Levels 3/4']}<br>OSSLT: {row['OSSLT_Successful (Fully Participating)']}<br>Score: {row['Score']}"
    ).add_to(m)

# Add choropleth layers
add_choropleth(m, gdf, "EQAO Percentage at Levels 3/4", "EQAO_% at Levels 3/4")
add_choropleth(m, gdf, "OSSLT Successful", "OSSLT_Successful (Fully Participating)")
add_choropleth(m, gdf, "Score", "Score")

# Add Layer Control for toggling between layers
folium.LayerControl().add_to(m)

# Display the map
m