In [1]:
import pandas as pd
import geopy
import numpy as np
import geopandas as gpd
from shapely.geometry import Point

In [2]:
def df_to_gdf(input_df):
    """
    Convert a DataFrame with longitude and latitude columns
    to a GeoDataFrame.
    """
    df = input_df.copy()
    geometry = [Point(xy) for xy in zip(df.Longitude, df.Latitude)]
    gdf = gpd.GeoDataFrame(df, crs=4326, geometry=geometry)
    return gdf

## Load Data

### SPRE Locations

In [3]:
spre2021 = pd.read_excel("../SPRE_data/2021_SPRE_DATA_Aug8.xlsx", sheet_name="Primary-2021-Clean")
spre2021_gdf = df_to_gdf(spre2021)
spre2021_gdf = spre2021_gdf.dropna(subset=['211_Address1'])
spre2021_gdf.head()
spre2021_gdf = spre2021_gdf.to_crs(epsg=4326)

### CT Polygon

In [4]:
ct = gpd.read_file("ct-geom.geo.json")
ct = ct[["id", "geometry"]]
ct = ct.rename(columns={"id":"GeoUID"})
ct.head()

Unnamed: 0,GeoUID,geometry
0,5350001.0,"MULTIPOLYGON (((-79.33526 43.62681, -79.33497 ..."
1,5350002.0,"MULTIPOLYGON (((-79.38245 43.62556, -79.38265 ..."
2,5350003.0,"POLYGON ((-79.43466 43.63369, -79.43566 43.633..."
3,5350004.0,"POLYGON ((-79.43466 43.63369, -79.43603 43.637..."
4,5350005.0,"POLYGON ((-79.43603 43.63717, -79.43089 43.638..."


## Create Buffer and Groupby Count of SPRE Location

Check projection system to be in UTM

In [5]:
ct = ct.to_crs(epsg = 32617)
ct.crs

<Derived Projected CRS: EPSG:32617>
Name: WGS 84 / UTM zone 17N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Between 84°W and 78°W, northern hemisphere between equator and 84°N, onshore and offshore. Bahamas. Ecuador - north of equator. Canada - Nunavut; Ontario; Quebec. Cayman Islands. Colombia. Costa Rica. Cuba. Jamaica. Nicaragua. Panama. United States (USA).
- bounds: (-84.0, 0.0, -78.0, 84.0)
Coordinate Operation:
- name: UTM zone 17N
- method: Transverse Mercator
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [6]:
spre2021_gdf = spre2021_gdf.to_crs(epsg = 32617)
spre2021_gdf.crs

<Derived Projected CRS: EPSG:32617>
Name: WGS 84 / UTM zone 17N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Between 84°W and 78°W, northern hemisphere between equator and 84°N, onshore and offshore. Bahamas. Ecuador - north of equator. Canada - Nunavut; Ontario; Quebec. Cayman Islands. Colombia. Costa Rica. Cuba. Jamaica. Nicaragua. Panama. United States (USA).
- bounds: (-84.0, 0.0, -78.0, 84.0)
Coordinate Operation:
- name: UTM zone 17N
- method: Transverse Mercator
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [7]:
#set radius of buffer in here
RADIUS_BUFFER = 500

#Function to create the buffer 
def polygon_buffer(df):
    df["buffer"]=df.geometry.buffer(RADIUS_BUFFER)
    return df

In [8]:
buffered_ct = polygon_buffer(ct)
buffered_ct = buffered_ct.drop(columns=["geometry"]).rename(columns={"buffer":"geometry"})
buffered_ct.head(1)

Unnamed: 0,GeoUID,geometry
0,5350001.0,"POLYGON ((632050.852 4833978.039, 632060.428 4..."


### Create 500m buffer and count SPRE locations by each buffered polygon

In [9]:
def buffer_spre_count(ct_df, spre_df, buffer_radius):
    buffers = ct_df.geometry.buffer(buffer_radius)
    buffer_df = pd.concat((ct_df["GeoUID"], buffers.rename("geometry")), axis=1)
    for tenure in ["Own", "Rent", "Unknown"]:
        buffer_df[f"{tenure}_count"] = buffer_df.geometry.apply(lambda x: spre_df[spre_df["Tenure"] == tenure].intersects(x).sum())
    buffer_df["Total_count"] = buffer_df["Own_count"] + buffer_df["Rent_count"] + buffer_df["Unknown_count"]
    return buffer_df

In [10]:
spre_buffer_count = buffer_spre_count(ct, spre2021_gdf, 500)
spre_buffer_count

Unnamed: 0,GeoUID,geometry,Own_count,Rent_count,Unknown_count,Total_count
0,5350001.00,"POLYGON ((632050.852 4833978.039, 632060.428 4...",5,10,1,16
1,5350002.00,"POLYGON ((628717.203 4831113.996, 628701.849 4...",0,2,0,2
2,5350003.00,"POLYGON ((623667.205 4831737.064, 623635.129 4...",3,5,3,11
3,5350004.00,"POLYGON ((625239.229 4833413.891, 625609.873 4...",3,8,4,15
4,5350005.00,"POLYGON ((625672.199 4832623.108, 625566.250 4...",3,8,7,18
...,...,...,...,...,...,...
1060,5350586.05,"POLYGON ((593033.487 4843856.470, 592383.366 4...",0,0,0,0
1061,5350587.01,"POLYGON ((583071.467 4866665.146, 583297.807 4...",1,0,0,1
1062,5350587.02,"POLYGON ((572750.397 4862471.265, 572803.194 4...",1,1,0,2
1063,5350802.01,"POLYGON ((648207.509 4852613.681, 648189.929 4...",0,1,0,1


In [11]:
spre_buffer_count = spre_buffer_count.drop(columns=["geometry"])


Unnamed: 0,ctuid,Own_count,Rent_count,Unknown_count,Total_count
0,5350001.00,5,10,1,16
1,5350002.00,0,2,0,2
2,5350003.00,3,5,3,11
3,5350004.00,3,8,4,15
4,5350005.00,3,8,7,18
...,...,...,...,...,...
1060,5350586.05,0,0,0,0
1061,5350587.01,1,0,0,1
1062,5350587.02,1,1,0,2
1063,5350802.01,0,1,0,1


In [35]:
spre_buffer_count = spre_buffer_count.rename(columns={'GeoUID': 'ctuid'})
spre_buffer_count["ctuid"] = spre_buffer_count["ctuid"].astype(str)
spre_buffer_count.to_csv("spre-near-ct.csv", index=False)

### Joining it all together

In [42]:
ct = gpd.read_file("ct-geom.geo.json", dtype={'id': str})
ct = ct[["id", "geometry"]]
ct = ct.rename(columns={'id': 'ctuid'})
ct["ctuid"] = ct["ctuid"].astype(str)

equity = pd.read_csv("equity_index.csv", dtype={'ctuid': str})
equity["ctuid"] = equity["ctuid"].astype(str)

spre = pd.read_csv("spre-near-ct.csv", dtype={'ctuid': str})
spre["ctuid"] = spre["ctuid"].astype(str)

census = pd.read_csv("census-2021-data.csv", dtype={'ctuid': str})
census["ctuid"] = census["ctuid"].astype(str)


In [43]:
spre, ct, equity, census

(           ctuid  Own_count  Rent_count  Unknown_count  Total_count
 0     5350001.00          5          10              1           16
 1     5350002.00          0           2              0            2
 2     5350003.00          3           5              3           11
 3     5350004.00          3           8              4           15
 4     5350005.00          3           8              7           18
 ...          ...        ...         ...            ...          ...
 1060  5350586.05          0           0              0            0
 1061  5350587.01          1           0              0            1
 1062  5350587.02          1           1              0            2
 1063  5350802.01          0           1              0            1
 1064  5350802.02          0           0              0            0
 
 [1065 rows x 5 columns],
            ctuid                                           geometry
 0     5350001.00  MULTIPOLYGON (((-79.33526 43.62681, -79.33497 ...
 1    

In [51]:
df = pd.merge(ct, spre, on='ctuid', how='left')
df = pd.merge(df, equity, on='ctuid', how='left')
df = pd.merge(df, census, on='ctuid', how='left')

round1 = [
    'Immigrant%', 'VM%', '1-ParentFam%', 'MBM%', 'LIM%', 'Neet%', '%CHN', '%Affordable', '%ofWP', 'ShortTerm%'
]
df[round1] = df[round1].round(2)

df["Equity Index"] = df["Equity Index"].round(3)

df.to_file('d.geo.json', driver='GeoJSON', precision=5)