In [1]:
import pandas as pd
import numpy as np
import re

#!pip install geopandas
import geopandas as gpd
from shapely.geometry import Point

# Public Transit Data

In [18]:
# Public Bus Transit data
df_bus = pd.read_csv('Public_transit/cta-bus-stops-2.csv')

# Public Rails Transit data
df_rails = pd.read_csv('Public_transit/cta-rail-stops.csv')

# Geospatial file for Chicago community areas
geo_communities = 'Public_transit/community_areas_chicago.geojson'

# Load the community areas data
geo_community_areas = gpd.read_file(geo_communities)

In [19]:
geo_community_areas.head()

Unnamed: 0,community,area,shape_area,perimeter,area_num_1,area_numbe,comarea_id,comarea,shape_len,geometry
0,DOUGLAS,0,46004621.1581,0,35,35,0,0,31027.0545098,"MULTIPOLYGON (((-87.60914 41.84469, -87.60915 ..."
1,OAKLAND,0,16913961.0408,0,36,36,0,0,19565.5061533,"MULTIPOLYGON (((-87.59215 41.81693, -87.59231 ..."
2,FULLER PARK,0,19916704.8692,0,37,37,0,0,25339.0897503,"MULTIPOLYGON (((-87.62880 41.80189, -87.62879 ..."
3,GRAND BOULEVARD,0,48492503.1554,0,38,38,0,0,28196.8371573,"MULTIPOLYGON (((-87.60671 41.81681, -87.60670 ..."
4,KENWOOD,0,29071741.9283,0,39,39,0,0,23325.1679062,"MULTIPOLYGON (((-87.59215 41.81693, -87.59215 ..."


In [20]:
geo_community_areas.shape

(77, 10)

There are 77 community areas in Chicago, so this verifies we have all communities

In [21]:
df_bus.head()

Unnamed: 0,SYSTEMSTOP,OBJECTID,the_geom,STREET,CROSS_ST,DIR,POS,ROUTESSTPG,OWLROUTES,CITY,STATUS,PUBLIC_NAM,POINT_X,POINT_Y
0,11953,193,POINT (-87.54862703700002 41.72818418100002),92ND STREET,BALTIMORE,EB,NS,95,,CHICAGO,1,92nd Street & Baltimore,-87.548627,41.728184
1,2723,194,POINT (-87.737227163 41.749111071000016),79TH STREET,KILPATRICK (east leg),EB,NS,79,,CHICAGO,1,79th Street & Kilpatrick,-87.737227,41.749111
2,1307,195,POINT (-87.74397362600001 41.924143016000016),FULLERTON,KILPATRICK,EB,NS,74,,CHICAGO,1,Fullerton & Kilpatrick,-87.743974,41.924143
3,6696,196,POINT (-87.65929365400001 41.86931424800002),TAYLOR,THROOP,EB,NS,157,,CHICAGO,1,Taylor & Throop,-87.659294,41.869314
4,22,197,POINT (-87.72780787099998 41.877006596),JACKSON,KARLOV,EB,FS,126,,CHICAGO,1,Jackson & Karlov,-87.727808,41.877007


In [22]:
df_bus['STATUS'].value_counts()

1    10760
2      220
6       66
5       21
3        7
Name: STATUS, dtype: int64

In [23]:
# We only take the ones in service (status 1)
df_bus = df_bus[df_bus['STATUS'] == 1]

## Match bus stop coordinates to the corresponding community areas

### Helper function to convert to Point format

In [25]:
def string_to_point(point_str):
    # Extract coordinates using regex
    coords = re.findall(r"[-\d\.]+", point_str)
    # Convert strings to float and create Point geometry
    return Point(float(coords[0]), float(coords[1]))

In [26]:
df_bus[df_bus['OBJECTID'] == 193]['the_geom'][0]

'POINT (-87.54862703700002 41.72818418100002)'

In [27]:
# List of all the coordinate locations of the bus stops
list_of_points = df_bus['the_geom'].tolist()
points = []

# Convert to Point datatype
for p in list_of_points:
    points.append(string_to_point(p))

In [28]:
gdf_points = gpd.GeoDataFrame(geometry=points, crs="EPSG:4326")
# gdf_point = gpd.GeoDataFrame([{'geometry': point}], crs="EPSG:4326")

In [29]:
points_in_community = gpd.sjoin(gdf_points, geo_community_areas, how="inner", op='within')

  if (await self.run_code(code, result,  async_=asy)):


In [30]:
points_in_community = points_in_community[['geometry', 'community', 'area_numbe']]
points_in_community

Unnamed: 0,geometry,community,area_numbe
0,POINT (-87.54863 41.72818),SOUTH CHICAGO,46
62,POINT (-87.56119 41.75174),SOUTH CHICAGO,46
114,POINT (-87.54823 41.75223),SOUTH CHICAGO,46
262,POINT (-87.54699 41.73003),SOUTH CHICAGO,46
272,POINT (-87.55430 41.73729),SOUTH CHICAGO,46
...,...,...,...
9037,POINT (-87.84665 41.96824),OHARE,76
9616,POINT (-87.83669 41.97576),OHARE,76
9708,POINT (-87.84147 41.97942),OHARE,76
10135,POINT (-87.84400 41.96635),OHARE,76


In [31]:
points_in_community['community'].value_counts()

AUSTIN                    405
NEAR WEST SIDE            366
WEST TOWN                 291
NEAR NORTH SIDE           268
GREATER GRAND CROSSING    257
                         ... 
OHARE                      30
OAKLAND                    25
RIVERDALE                  23
BURNSIDE                   19
EDISON PARK                16
Name: community, Length: 77, dtype: int64

## Match train/rail stop coordinates to the corresponding community areas

In [32]:
df_rails

Unnamed: 0,STOP_ID,DIRECTION_ID,STOP_NAME,STATION_NAME,STATION_DESCRIPTIVE_NAME,MAP_ID,ADA,RED,BLUE,G,BRN,P,Pexp,Y,Pnk,O,Location
0,30082,E,Cicero (Loop-bound),Cicero,Cicero (Pink Line),40420,True,False,False,False,False,False,False,False,True,False,"(41.85182, -87.745336)"
1,30151,E,Central Park (Loop-bound),Central Park,Central Park (Pink Line),40780,True,False,False,False,False,False,False,False,True,False,"(41.853839, -87.714842)"
2,30184,W,Halsted/63rd (Ashland-bound),Halsted,Halsted (Green Line),40940,True,False,False,True,False,False,False,False,False,False,"(41.778943, -87.644244)"
3,30044,N,Cumberland (O'Hare-bound),Cumberland,Cumberland (Blue Line),40230,True,False,True,False,False,False,False,False,False,False,"(41.984246, -87.838028)"
4,30092,E,Racine (O'Hare-bound),Racine,Racine (Blue Line),40470,False,False,True,False,False,False,False,False,False,False,"(41.87592, -87.659458)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
295,30009,W,Cicero (Harlem-bound),Cicero,Cicero (Green Line),40480,True,False,False,True,False,False,False,False,False,False,"(41.886519, -87.744698)"
296,30259,N,Montrose (O'Hare-bound),Montrose,Montrose (Blue Line),41330,False,False,True,False,False,False,False,False,False,False,"(41.961539, -87.743574)"
297,30125,N,North/Clybourn (Howard-bound),North/Clybourn,North/Clybourn (Red Line),40650,False,True,False,False,False,False,False,False,False,False,"(41.910655, -87.649177)"
298,30172,S,O'Hare (Forest Pk-bound),O'Hare,O'Hare (Blue Line),40890,True,False,True,False,False,False,False,False,False,False,"(41.97766526, -87.90422307)"


In [80]:
# Function to convert location strings to Point objects
def string_to_point_rails(point_str):
    # Extract coordinates using regex
    coords = re.findall(r"[-\d\.]+", point_str)
    if len(coords) == 2:
        # Convert strings to float and create Point geometry
        return Point(float(coords[1]), float(coords[0]))
    return None

In [81]:
# List of all the coordinate locations of the bus stops
list_of_points = df_rails['Location'].tolist()
points = []

# Convert to Point datatype
for p in list_of_points:
    points.append(string_to_point_rails(p))

In [82]:
gdf_points = gpd.GeoDataFrame(geometry=points, crs="EPSG:4326")

In [83]:
points_in_community = gpd.sjoin(gdf_points, geo_community_areas, how="inner", op='within')

  if (await self.run_code(code, result,  async_=asy)):


In [85]:
points_in_community = points_in_community[['geometry', 'community', 'area_numbe']]
points_in_community

Unnamed: 0,geometry,community,area_numbe
1,POINT (-87.71484 41.85384),NORTH LAWNDALE,29
50,POINT (-87.70541 41.85396),NORTH LAWNDALE,29
124,POINT (-87.72431 41.85373),NORTH LAWNDALE,29
163,POINT (-87.73326 41.85375),NORTH LAWNDALE,29
172,POINT (-87.72431 41.85373),NORTH LAWNDALE,29
...,...,...,...
284,POINT (-87.67487 41.95452),NORTH CENTER,5
207,POINT (-87.69477 41.85411),SOUTH LAWNDALE,30
287,POINT (-87.69477 41.85411),SOUTH LAWNDALE,30
209,POINT (-87.62572 41.76837),GREATER GRAND CROSSING,69


In [89]:
points_in_community['community'].value_counts()

LOOP                      34
NEAR WEST SIDE            18
LAKE VIEW                 18
NEAR NORTH SIDE           14
ROGERS PARK               10
AUSTIN                    10
UPTOWN                     8
WEST TOWN                  8
EAST GARFIELD PARK         8
EDGEWATER                  8
NORTH LAWNDALE             8
LINCOLN SQUARE             8
LOWER WEST SIDE            8
GRAND BOULEVARD            8
ALBANY PARK                6
IRVING PARK                6
LOGAN SQUARE               6
LINCOLN PARK               6
NEAR SOUTH SIDE            6
ARMOUR SQUARE              4
WEST GARFIELD PARK         4
NORTH CENTER               4
CHATHAM                    4
ENGLEWOOD                  4
OHARE                      4
WOODLAWN                   4
FULLER PARK                4
MCKINLEY PARK              2
SOUTH LAWNDALE             2
GARFIELD RIDGE             2
AVONDALE                   2
NORWOOD PARK               2
BRIGHTON PARK              2
JEFFERSON PARK             2
WASHINGTON PAR

In [90]:
df_bus

Unnamed: 0,SYSTEMSTOP,OBJECTID,the_geom,STREET,CROSS_ST,DIR,POS,ROUTESSTPG,OWLROUTES,CITY,STATUS,PUBLIC_NAM,POINT_X,POINT_Y
0,11953,193,POINT (-87.54862703700002 41.72818418100002),92ND STREET,BALTIMORE,EB,NS,95,,CHICAGO,1,92nd Street & Baltimore,-87.548627,41.728184
1,2723,194,POINT (-87.737227163 41.749111071000016),79TH STREET,KILPATRICK (east leg),EB,NS,79,,CHICAGO,1,79th Street & Kilpatrick,-87.737227,41.749111
2,1307,195,POINT (-87.74397362600001 41.924143016000016),FULLERTON,KILPATRICK,EB,NS,74,,CHICAGO,1,Fullerton & Kilpatrick,-87.743974,41.924143
3,6696,196,POINT (-87.65929365400001 41.86931424800002),TAYLOR,THROOP,EB,NS,157,,CHICAGO,1,Taylor & Throop,-87.659294,41.869314
4,22,197,POINT (-87.72780787099998 41.877006596),JACKSON,KARLOV,EB,FS,126,,CHICAGO,1,Jackson & Karlov,-87.727808,41.877007
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11069,7054,7462,POINT (-87.56049104499999 41.76386655599998),S. SHORE DRIVE,73RD STREET,SB,FS,626,,CHICAGO,1,S. Shore Drive & 73rd Street,-87.560491,41.763867
11070,6363,7463,POINT (-87.64107999999999 41.87558799999999),CLINTON,BLUE LINE (FOREST PARK),SB,MB,73760157,N60,CHICAGO,1,Clinton Blue Line Station,-87.641080,41.875588
11071,8986,7464,POINT (-87.72808945000003 41.966580921),PULASKI,LELAND,SB,NT,53,,CHICAGO,1,Pulaski & Leland,-87.728089,41.966581
11072,11535,7465,POINT (-87.76675599999999 41.94412799999998),CENTRAL,CORNELIA,SB,NS,85,,CHICAGO,1,Central & Cornelia,-87.766756,41.944128


In [91]:
df_rails

Unnamed: 0,STOP_ID,DIRECTION_ID,STOP_NAME,STATION_NAME,STATION_DESCRIPTIVE_NAME,MAP_ID,ADA,RED,BLUE,G,BRN,P,Pexp,Y,Pnk,O,Location,Location_Point,Community_Area
0,30082,E,Cicero (Loop-bound),Cicero,Cicero (Pink Line),40420,True,False,False,False,False,False,False,False,True,False,"(41.85182, -87.745336)",POINT (41.85182 -87.745336),
1,30151,E,Central Park (Loop-bound),Central Park,Central Park (Pink Line),40780,True,False,False,False,False,False,False,False,True,False,"(41.853839, -87.714842)",POINT (41.853839 -87.714842),
2,30184,W,Halsted/63rd (Ashland-bound),Halsted,Halsted (Green Line),40940,True,False,False,True,False,False,False,False,False,False,"(41.778943, -87.644244)",POINT (41.778943 -87.644244),
3,30044,N,Cumberland (O'Hare-bound),Cumberland,Cumberland (Blue Line),40230,True,False,True,False,False,False,False,False,False,False,"(41.984246, -87.838028)",POINT (41.984246 -87.838028),
4,30092,E,Racine (O'Hare-bound),Racine,Racine (Blue Line),40470,False,False,True,False,False,False,False,False,False,False,"(41.87592, -87.659458)",POINT (41.87592 -87.659458),
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
295,30009,W,Cicero (Harlem-bound),Cicero,Cicero (Green Line),40480,True,False,False,True,False,False,False,False,False,False,"(41.886519, -87.744698)",POINT (41.886519 -87.744698),
296,30259,N,Montrose (O'Hare-bound),Montrose,Montrose (Blue Line),41330,False,False,True,False,False,False,False,False,False,False,"(41.961539, -87.743574)",POINT (41.961539 -87.743574),
297,30125,N,North/Clybourn (Howard-bound),North/Clybourn,North/Clybourn (Red Line),40650,False,True,False,False,False,False,False,False,False,False,"(41.910655, -87.649177)",POINT (41.910655 -87.649177),
298,30172,S,O'Hare (Forest Pk-bound),O'Hare,O'Hare (Blue Line),40890,True,False,True,False,False,False,False,False,False,False,"(41.97766526, -87.90422307)",POINT (41.97766526 -87.90422307),
