# 1. Imports and load data

In [69]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon, MultiPolygon
import sqlite3
import folium

## walk zones:

In [70]:
with sqlite3.connect('../database.db') as conn:
    df_wz = pd.read_sql_query('''
                                   SELECT s.school_id, s.name, wz.polygon_number, wz.lat_coordinate, wz.long_coordinate
FROM walk_zones wz
INNER JOIN schools s on s.school_id = wz.school_id
                                   ''', conn)

In [71]:
df_wz.head(2)

Unnamed: 0,school_id,name,polygon_number,lat_coordinate,long_coordinate
0,142,A. E. Cross School,0,51.017392,-114.117939
1,142,A. E. Cross School,0,51.020302,-114.117945


### Testing .dissolve function:

In [72]:
gdf_wz_d = gpd.GeoDataFrame(df_wz,geometry = gpd.points_from_xy(df_wz['long_coordinate'], df_wz['lat_coordinate'], crs="EPSG:4326"))

In [73]:
gdf_wz_d.head()

Unnamed: 0,school_id,name,polygon_number,lat_coordinate,long_coordinate,geometry
0,142,A. E. Cross School,0,51.017392,-114.117939,POINT (-114.11794 51.01739)
1,142,A. E. Cross School,0,51.020302,-114.117945,POINT (-114.11794 51.02030)
2,142,A. E. Cross School,0,51.021632,-114.117947,POINT (-114.11795 51.02163)
3,142,A. E. Cross School,0,51.022792,-114.117949,POINT (-114.11795 51.02279)
4,142,A. E. Cross School,0,51.023261,-114.117932,POINT (-114.11793 51.02326)


In [74]:
gdf_wz_dis= gdf_wz_d.dissolve(by=['school_id','name','polygon_number'])

In [75]:
gdf_wz_dis.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,geometry,lat_coordinate,long_coordinate
school_id,name,polygon_number,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
6,Richmond School,0,"MULTIPOINT ((-114.13126 51.02814), (-114.13126...",51.037837,-114.111247
7,Alexander Ferguson School,0,"MULTIPOINT ((-114.12274 51.04907), (-114.12270...",51.04907,-114.122735
7,Alexander Ferguson School,1,"MULTIPOINT ((-114.13550 51.03471), (-114.13550...",51.04907,-114.122735


In [76]:
gdf_wz_dis2 = gdf_wz_dis.dissolve(by=['school_id','name'])

In [77]:
gdf_wz_dis2.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,geometry,lat_coordinate,long_coordinate
school_id,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
6,Richmond School,"MULTIPOINT ((-114.13126 51.02814), (-114.13126...",51.037837,-114.111247
7,Alexander Ferguson School,"MULTIPOINT ((-114.13550 51.03471), (-114.13550...",51.04907,-114.122735
8,Jennie Elliott School,"MULTIPOINT ((-114.14130 51.00431), (-114.14130...",51.005794,-114.134527


### adjusting the transform function using dissolve()

In [78]:
def transform_to_geometry_test(df):
    """
    Transform a DataFrame with individual coordinate points into a GeoDataFrame with polygon geometries.
    
    This function takes a DataFrame that includes school IDs, names, polygon numbers, and pairs
    of latitude and longitude coordinates. It groups the points by school and polygon number to
    create polygons, and then creates a MultiPolygon for schools with multiple polygons.
    The resulting GeoDataFrame has one row per school with a geometry column containing the 
    corresponding MultiPolygon or Polygon.
    
    Parameters:
    - df (pd.DataFrame): A DataFrame with the following columns:
        - 'school_id': An identifier for the school.
        - 'name': The name of the school.
        - 'polygon_number': An identifier for a polygon (in case a school has multiple polygons).
        - 'long_coordinate': Longitude part of the coordinate.
        - 'lat_coordinate': Latitude part of the coordinate.
        
    Returns:
    - gpd.GeoDataFrame: A GeoDataFrame with the following columns:
        - 'school_id': An identifier for the school.
        - 'name': The name of the school.
        - 'geometry': A shapely.geometry.Polygon or shapely.geometry.MultiPolygon object representing the school's geometry.
    """
    
    # First, create a new DataFrame for polygons
    df_polygons = df.groupby(['school_id', 'name', 'polygon_number']).apply(
        lambda group: Polygon(zip(group['long_coordinate'], group['lat_coordinate']))
    ).reset_index().rename(columns={0: 'geometry'})
    
    # Convert our DataFrame with Polygon objects into a GeoDataFrame
    gdf_polygons = gpd.GeoDataFrame(df_polygons, geometry='geometry',crs="EPSG:4326")
    
    # Now use dissolve to merge the polygons into MultiPolygons where necessary
    gdf = gdf_polygons.dissolve(by=['school_id', 'name'], as_index=False)
    
    # Ensure the geometry column contains only MultiPolygon instances
    #gdf['geometry'] = gdf.apply(lambda row: MultiPolygon([row.geometry]) if type(row.geometry) is Polygon else row.geometry, axis=1)

    gdf.drop(columns = ['polygon_number'],inplace=True)
    return gdf

In [79]:
df_wz_test= transform_to_geometry_test(df_wz)

In [80]:
df_wz_test.head(10)

Unnamed: 0,school_id,name,geometry
0,6,Richmond School,"POLYGON ((-114.11125 51.03784, -114.11326 51.0..."
1,7,Alexander Ferguson School,"MULTIPOLYGON (((-114.12279 51.04906, -114.1228..."
2,8,Jennie Elliott School,"POLYGON ((-114.13453 51.00579, -114.13608 51.0..."
3,9,Killarney School,"POLYGON ((-114.14115 51.01245, -114.13875 51.0..."
4,11,Altadore School,"POLYGON ((-114.09209 51.01611, -114.09205 51.0..."
5,12,Belvedere Parkway School,"POLYGON ((-114.22291 51.09864, -114.22302 51.0..."
6,13,Bowcroft School,"POLYGON ((-114.19730 51.09800, -114.19734 51.0..."
7,14,Connaught School,"POLYGON ((-114.07151 51.04419, -114.07152 51.0..."
8,15,Earl Grey School,"POLYGON ((-114.07830 51.03781, -114.07936 51.0..."
9,16,Elbow Park School,"POLYGON ((-114.07730 51.02642, -114.07797 51.0..."


In [81]:
df_wz_test.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 237 entries, 0 to 236
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   school_id  237 non-null    int64   
 1   name       237 non-null    object  
 2   geometry   237 non-null    geometry
dtypes: geometry(1), int64(1), object(1)
memory usage: 5.7+ KB


#### export to GeoJSON to Tableau

In [113]:
df_wz_test.reset_index(inplace=True)
df_wz_test["row_id"] = df_wz_test.index + 1
df_wz_test.reset_index(drop=True, inplace=True)
df_wz_test.set_index("row_id", inplace = True)

In [114]:
df_wz_test.to_file('walk_zones.geojson', driver='GeoJSON')

#### Continue

In [82]:
id_interest = [7, 31, 129, 146, 167, 173, 174, 182, 184, 201, 226, 15486]
df_wz_test_filtered = df_wz_test[df_wz_test['school_id'].isin(id_interest)]
#15486

# Initialize the map at the first geometry's centroid
initial_location = list(df_wz_test_filtered['geometry'].iloc[0].centroid.coords)[0][::-1]
m = folium.Map(location=initial_location, zoom_start=15)

# Add the GeoDataFrame to the map
folium.GeoJson(
    df_wz_test_filtered,
    name='geojson'
).add_to(m)

# Add layer control to toggle on/off
folium.LayerControl().add_to(m)

# Display the map
m

conclusion: using dissolve() can achieve the same result, the syntax is arguably cleaner.

### Continue with original testing

In [83]:
df_wz.school_id.nunique()

237

In [84]:
df_wz[df_wz['polygon_number']>3]['school_id'].value_counts()

school_id
15486    108
174       65
226       17
Name: count, dtype: int64

In [85]:
def transform_to_geometry(df):
    """
    Transform a DataFrame with individual coordinate points into a DataFrame with polygon geometries.
    
    This function takes a DataFrame that includes school IDs, names, polygon numbers, and pairs
    of latitude and longitude coordinates. It groups the points by school and polygon number to
    create polygons, and then creates a MultiPolygon for schools with multiple polygons.
    The resulting DataFrame has one row per school with a geometry column containing the 
    corresponding MultiPolygon or Polygon.
    
    Parameters:
    - df_wz (pd.DataFrame): A DataFrame with the following columns:
        - 'school_id': An identifier for the school.
        - 'name': The name of the school.
        - 'polygon_number': An identifier for a polygon (in case a school has multiple polygons).
        - 'long_coordinate': Longitude part of the coordinate.
        - 'lat_coordinate': Latitude part of the coordinate.
        
    Returns:
    - pd.DataFrame: A DataFrame with the following columns:
        - 'school_id': An identifier for the school.
        - 'name': The name of the school.
        - 'geometry': A shapely.geometry.Polygon or shapely.geometry.MultiPolygon object representing the school's geometry.
    """
    # Group by 'school_id' and 'polygon_number' to create distinct polygons
    grouped = df.groupby(['school_id', 'polygon_number'])
    
    # List to hold the DataFrame rows
    rows_list = []
    
    # Iterate over the groups and create polygons
    for (school_id, polygon_number), group in grouped:
        # Sort the group by lat/long if necessary to ensure the points are in the correct order
        #group = group.sort_values(['lat_coordinate', 'long_coordinate'])
        
        # Create a polygon using the coordinates from the group
        polygon = Polygon(zip(group['long_coordinate'], group['lat_coordinate']))
        
        # Add the polygon data to our list
        rows_list.append({
            'school_id': school_id,
            'name': group['name'].iloc[0],  # Assumes all names are the same for the same school_id
            'geometry': polygon
        })
    
    # Convert the list of rows into a DataFrame
    schools_geometry = pd.DataFrame(rows_list)
    
    # Group by 'school_id' and 'name' and create MULTIPOLYGON where necessary
    schools_geometry = schools_geometry.groupby(['school_id', 'name'])['geometry'].apply(
        lambda x: MultiPolygon(x.tolist()) if len(x) > 1 else x.iloc[0]
    ).reset_index()
    
    return schools_geometry


In [86]:
df_wz_t = transform_to_geometry(df_wz)

In [87]:
df_wz_test[df_wz_test['geometry']!= df_wz_t['geometry']]['school_id'].to_list()

[7, 31, 129, 146, 167, 173, 174, 182, 184, 201, 226, 15486]

In [88]:
df_wz_t.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 237 entries, 0 to 236
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   school_id  237 non-null    int64 
 1   name       237 non-null    object
 2   geometry   237 non-null    object
dtypes: int64(1), object(2)
memory usage: 5.7+ KB


In [89]:
gdf_wz_t = gpd.GeoDataFrame(
    df_wz_t, geometry='geometry', crs="EPSG:4326"
)

In [90]:
gdf_wz_t.head(3)

Unnamed: 0,school_id,name,geometry
0,6,Richmond School,"POLYGON ((-114.11125 51.03784, -114.11326 51.0..."
1,7,Alexander Ferguson School,"MULTIPOLYGON (((-114.12274 51.04907, -114.1227..."
2,8,Jennie Elliott School,"POLYGON ((-114.13453 51.00579, -114.13608 51.0..."


Check the results:

In [91]:
id_interest = [7, 31, 129, 146, 167, 173, 174, 182, 184, 201, 226, 15486]
gdf_wz_filtered = gdf_wz_t[gdf_wz_t['school_id'].isin(id_interest)]
#15486


# Initialize the map at the first geometry's centroid
initial_location = list(gdf_wz_filtered['geometry'].iloc[0].centroid.coords)[0][::-1]
m = folium.Map(location=initial_location, zoom_start=15)

# Add the GeoDataFrame to the map
folium.GeoJson(
    gdf_wz_filtered,
    name='geojson'
).add_to(m)

# Add layer control to toggle on/off
folium.LayerControl().add_to(m)

# Display the map
m

## attendance areas:

In [92]:
with sqlite3.connect('../database.db') as conn:
    df_aa = pd.read_sql_query('''
                                   SELECT s.school_id, s.name, aa.polygon_number, aa.lat_coordinate, aa.long_coordinate
FROM attendance_areas aa
INNER JOIN schools s on s.school_id = aa.school_id
                                   ''', conn)

In [93]:
df_aa.head(3)

Unnamed: 0,school_id,name,polygon_number,lat_coordinate,long_coordinate
0,142,A. E. Cross School,0,51.071998,-114.175908
1,142,A. E. Cross School,0,51.071197,-114.175905
2,142,A. E. Cross School,0,51.068347,-114.17588


In [94]:
df_aa[df_aa['polygon_number']>2]['school_id'].value_counts()

school_id
172    174
159     79
Name: count, dtype: int64

In [95]:
df_aa[df_aa['school_id']==172].groupby(by=['polygon_number']).first()

Unnamed: 0_level_0,school_id,name,lat_coordinate,long_coordinate
polygon_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,172,James Fowler High School,51.106265,-114.082533
1,172,James Fowler High School,51.110637,-113.920327
2,172,James Fowler High School,51.145173,-114.065477
3,172,James Fowler High School,51.176069,-113.918397
4,172,James Fowler High School,51.197966,-114.048094


In [96]:
df_aa_t = transform_to_geometry(df_aa)

In [97]:
gdf_aa_t = gpd.GeoDataFrame(
    df_aa_t, geometry='geometry', crs="EPSG:4326"
)

In [98]:
gdf_aa_t.head(3)

Unnamed: 0,school_id,name,geometry
0,6,Richmond School,"POLYGON ((-114.11123 51.03783, -114.11324 51.0..."
1,7,Alexander Ferguson School,"POLYGON ((-114.12244 51.04949, -114.12247 51.0..."
2,8,Jennie Elliott School,"POLYGON ((-114.11798 51.00871, -114.12142 51.0..."


In [99]:
gdf_aa_t[gdf_aa_t['school_id']==172]

Unnamed: 0,school_id,name,geometry
157,172,James Fowler High School,"MULTIPOLYGON (((-114.08253 51.10626, -114.0826..."


In [100]:
gdf_aa_filtered = gdf_aa_t[gdf_aa_t['school_id'] == 172]

# Initialize the map at the first geometry's centroid
initial_location = list(gdf_aa_filtered['geometry'].iloc[0].centroid.coords)[0][::-1]
m_aa = folium.Map(location=initial_location, zoom_start=15)

# Add the GeoDataFrame to the map
folium.GeoJson(
    gdf_aa_filtered,
    name='geojson'
).add_to(m_aa)

# Add layer control to toggle on/off
folium.LayerControl().add_to(m_aa)

# Display the map
m_aa

## rental listings:

In [101]:
with sqlite3.connect('../database.db') as conn:
    df_listings = pd.read_sql_query('''
                                   SELECT *
                                   FROM rental_listings
                                   
                                   ''', conn)



In [102]:
gdf_listings = gpd.GeoDataFrame(df_listings,geometry=gpd.points_from_xy(df_listings['longitude'], df_listings['latitude']), crs="EPSG:4326"
)

In [103]:
gdf_listings.head(3)

Unnamed: 0,id,city,community,latitude,longitude,link,type,price,beds,has_den,sq_feet,baths,cats,dogs,activation_date,last_update,is_active,geometry
0,1500,Calgary,Woodbine,50.94231,-114.1252,/ab/calgary/rentals/main-floor/3-bedrooms/wood...,Main Floor,2100,3.0,0,1200.0,1.0,0,0,2024-01-03 13:38:58,2024-01-06 23:13:21,1,POINT (-114.12520 50.94231)
1,2248,Calgary,Falconridge,51.108143,-113.936489,/ab/calgary/rentals/apartment/1-bedroom/falcon...,Apartment,1450,1.0,0,500.0,1.0,1,1,2024-01-05 07:49:33,2024-01-06 23:13:21,1,POINT (-113.93649 51.10814)
2,3078,Calgary,Eau Claire,51.050855,-114.077663,/ab/calgary/rentals/condo/1-bedroom/eau-claire...,Condo Unit,1900,1.0,0,600.0,1.0,0,0,2023-12-30 15:00:58,2024-01-06 23:13:21,1,POINT (-114.07766 51.05085)


# 2. Spatial Joins

## 2.1 Walk zones and rental listings:

In [104]:
gdf_wz_listings = gpd.sjoin(gdf_listings,gdf_wz_t, how="inner",lsuffix='l',rsuffix='r')

In [105]:
gdf_wz_listings.head(3)

Unnamed: 0,id,city,community,latitude,longitude,link,type,price,beds,has_den,...,baths,cats,dogs,activation_date,last_update,is_active,geometry,index_r,school_id,name
1,2248,Calgary,Falconridge,51.108143,-113.936489,/ab/calgary/rentals/apartment/1-bedroom/falcon...,Apartment,1450,1.0,0,...,1.0,1,1,2024-01-05 07:49:33,2024-01-06 23:13:21,1,POINT (-113.93649 51.10814),149,164,Terry Fox School
100,33988,Calgary,Castleridge,51.10378,-113.95685,/ab/calgary/rentals/main-floor/3-bedrooms/cast...,Main Floor,1950,3.0,0,...,1.5,0,0,2024-01-03 07:48:14,2024-01-06 23:13:21,1,POINT (-113.95685 51.10378),149,164,Terry Fox School
146,58367,Calgary,Castleridge,51.10378,-113.95685,/ab/calgary/rentals/house/3-bedrooms/castlerid...,House,2375,3.0,0,...,2.0,0,0,2023-12-30 15:00:58,2024-01-06 23:13:21,1,POINT (-113.95685 51.10378),149,164,Terry Fox School


In [106]:
gdf_wz_listings.columns

Index(['id', 'city', 'community', 'latitude', 'longitude', 'link', 'type',
       'price', 'beds', 'has_den', 'sq_feet', 'baths', 'cats', 'dogs',
       'activation_date', 'last_update', 'is_active', 'geometry', 'index_r',
       'school_id', 'name'],
      dtype='object')

In [107]:
school_id_plot = 226
gdf_wz_filtered = gdf_wz_t[gdf_wz_t['school_id'] == school_id_plot]

# Initialize the map at the first geometry's centroid
initial_location = list(gdf_wz_filtered['geometry'].iloc[0].centroid.coords)[0][::-1]
m = folium.Map(location=initial_location, zoom_start=15)

# Add the GeoDataFrame to the map
folium.GeoJson(
    gdf_wz_filtered,
    name='geojson'
).add_to(m)

# Add layer control to toggle on/off
folium.LayerControl().add_to(m)

# Filter the merged listings for the specific school_id
listings_for_school = gdf_wz_listings[gdf_wz_listings['school_id'] == school_id_plot]

# Initialize the map at the first geometry's centroid
initial_location = list(gdf_wz_filtered['geometry'].iloc[0].centroid.coords)[0][::-1]
m = folium.Map(location=initial_location, zoom_start=15)

# Add the GeoDataFrame to the map (school's walk zone)
folium.GeoJson(
    gdf_wz_filtered,
    name='School Walk Zone'
).add_to(m)

# Add each rental listing as a point on the map
for index, row in listings_for_school.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=folium.Popup(f"{row['city']}, {row['type']}, Price: {row['price']}", max_width=300),
        icon=folium.Icon(color="blue", icon="home")
    ).add_to(m)

# Add layer control to toggle on/off
folium.LayerControl().add_to(m)

# Display the map
m

## 2.2 Attendance area and rental listings

In [108]:
gdf_aa_listings = gpd.sjoin(gdf_listings,gdf_aa_t, how="inner",lsuffix='l',rsuffix='r')

In [109]:
school_id_plot = 172
gdf_aa_filtered = gdf_aa_t[gdf_aa_t['school_id'] == school_id_plot]

# Initialize the map at the first geometry's centroid
#initial_location = list(gdf_aa_filtered['geometry'].iloc[0].centroid.coords)[0][::-1]
#m_aa = folium.Map(location=initial_location, zoom_start=15)

# Add the GeoDataFrame to the map
'''folium.GeoJson(
    gdf_aa_filtered,
    name='geojson'
).add_to(m_aa)'''

# Add layer control to toggle on/off
#folium.LayerControl().add_to(m_aa)

# Filter the merged listings for the specific school_id
listings_for_school = gdf_aa_listings[gdf_aa_listings['school_id'] == school_id_plot]

# Initialize the map at the first geometry's centroid
initial_location = list(gdf_aa_filtered['geometry'].iloc[0].centroid.coords)[0][::-1]
m_aa = folium.Map(location=initial_location, zoom_start=15)

# Add the GeoDataFrame to the map (school's walk zone)
folium.GeoJson(
    gdf_aa_filtered,
    name='School Attendance Area'
).add_to(m_aa)

# Add each rental listing as a point on the map
for index, row in listings_for_school.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=folium.Popup(f"{row['city']}, {row['type']}, Price: {row['price']}", max_width=300),
        icon=folium.Icon(color="blue", icon="home")
    ).add_to(m_aa)

# Add layer control to toggle on/off
folium.LayerControl().add_to(m_aa)



<folium.map.LayerControl at 0x22187edd270>

In [110]:
# Display the map
m_aa