## CTA Bus Tracker Data Exploration

#### See detailed documentation on CTA Bus Tracker API here:
https://www.transitchicago.com/assets/1/6/cta_Bus_Tracker_API_Developer_Guide_and_Documentation_20160929.pdf

In [126]:
import requests
from dotenv import load_dotenv
import pandas as pd
import geopandas as gpd
from shapely import Point, LineString

In [9]:
# Get API key from the .env file
load_dotenv()
API_KEY = os.getenv('API_KEY')

### gettime request - checks for local clock synced with CTA

In [10]:
api_url = f'http://www.ctabustracker.com/bustime/api/v2/gettime?key={API_KEY}'

response = requests.get(api_url)
response.headers["Content-Type"]
response.text

'<?xml version="1.0"?>\r\n<bustime-response><tm>20230108 12:14:13</tm></bustime-response>\r\n'

### getvehicles request by route(s) or vehicle(s)

In [55]:
# Must specify a route or list of routes via &rt=20  (to get only route 20) or &rt=20,22 (to get two routes)
# Alternatively, you could specify a vehicle or list of vehicles instead
# Must specify &format=json, otherwise format will be html

routes = '20'
api_url = f'http://www.ctabustracker.com/bustime/api/v2/getvehicles?key={API_KEY}&rt={routes}&format=json'

response = requests.get(api_url)
vehicles = response.json()

# vehicles


In [56]:
# Convert to DataFrame
df_vehicles = pd.DataFrame(vehicles_20['bustime-response']['vehicle'])
df_vehicles

Unnamed: 0,vid,tmstmp,lat,lon,hdg,pid,rt,des,pdist,dly,tatripid,origtatripno,tablockid,zone
0,1974,20230108 12:56,41.88128662109375,-87.68594360351562,269,954,20,Austin,16758,False,102815,238685903,20 -805,
1,1966,20230108 12:56,41.88058199201311,-87.77409199305943,322,954,20,Austin,41268,False,102814,238686008,20 -802,
2,8112,20230108 12:56,41.88038333333333,-87.77415333333333,187,957,20,Michigan,0,False,102974,238685879,N20 -891,
3,1907,20230108 12:56,41.88053363864704,-87.7381752143472,90,957,20,Michigan,10128,False,102970,238685888,20 -851,
4,2014,20230108 12:56,41.88119125366211,-87.68541229248046,94,957,20,Michigan,24575,False,102969,238685965,20 -804,
5,2019,20230108 12:56,41.88176345825195,-87.6497802734375,87,957,20,Michigan,34832,False,102968,238685813,N20 -893,
6,1901,20230108 12:55,41.88277339935303,-87.6245346069336,181,957,20,Michigan,42352,False,102966,238685944,20 -801,


### getvehicles data scraped for a full day from chn-ghost-buses

In [57]:
datestring = '2022-10-05'
chn_data_source = f'https://chn-ghost-buses-public.s3.us-east-2.amazonaws.com/bus_full_day_data_v2/{datestring}.csv'

vehicles_allroutes = pd.read_csv(
    chn_data_source, dtype={
        'vid':'int',
        'tmstmp':'str',
        'lat':'float',
        'lon':'float',
        'hdg':'int',
        'pid':'int',
        'rt':'str',
        'pdist':'int',
        'des':'str',
        'dly':'bool',
        'tatripid':'str',
        'origatripno':'int',
        'tablockid':'str',
        'zone':'str',
        'scrape_file':'str',
        'data_hour':'int',
        'data_date':'str'
        }
    )

vehicles_allroutes

Unnamed: 0,vid,tmstmp,lat,lon,hdg,pid,rt,des,pdist,dly,tatripid,origtatripno,tablockid,zone,scrape_file,data_time,data_hour,data_date
0,1295,20221005 00:02,41.894222,-87.620331,128,18414,3,Michigan/Chicago,70632,False,398,235351548,3 -707,,bus_data/2022-10-05/00:02:58.json,2022-10-05 00:02:00,0,2022-10-05
1,1211,20221005 00:02,41.893993,-87.620178,180,18415,3,95th,419,False,1080332,235351424,3 -712,,bus_data/2022-10-05/00:02:58.json,2022-10-05 00:02:00,0,2022-10-05
2,1589,20221005 00:02,41.873760,-87.624321,174,18415,3,95th,11254,False,1080331,235351525,3 -758,,bus_data/2022-10-05/00:02:58.json,2022-10-05 00:02:00,0,2022-10-05
3,1235,20221005 00:02,41.800130,-87.616126,178,18415,3,95th,39721,False,1080330,235351566,3 -713,,bus_data/2022-10-05/00:02:58.json,2022-10-05 00:02:00,0,2022-10-05
4,7931,20221005 00:02,41.758288,-87.615181,186,18415,3,95th,55022,False,1080329,235353106,N4 -793,,bus_data/2022-10-05/00:02:58.json,2022-10-05 00:02:00,0,2022-10-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173739,1775,20221005 23:57,41.944596,-87.641169,158,8102,151,Union Station,28455,False,398,235082170,151 -564,,bus_data/2022-10-05/23:57:56.json,2022-10-05 23:57:00,23,2022-10-05
173740,4397,20221005 23:57,41.896269,-87.624364,179,8102,151,Union Station,48924,False,400,235082169,151 -562,,bus_data/2022-10-05/23:57:56.json,2022-10-05 23:57:00,23,2022-10-05
173741,1763,20221005 23:57,42.007904,-87.665489,89,904,155,Kedzie,0,False,1005709,235083405,155 -551,,bus_data/2022-10-05/23:57:56.json,2022-10-05 23:57:00,23,2022-10-05
173742,1790,20221005 23:57,41.997397,-87.708695,270,904,155,Kedzie,18800,False,1005708,235084706,155 -505,,bus_data/2022-10-05/23:57:56.json,2022-10-05 23:57:00,23,2022-10-05


### getroutes request

In [31]:
# Must specify &format=json, otherwise format will be html

api_url = f'http://www.ctabustracker.com/bustime/api/v2/getroutes?key={API_KEY}&format=json'

response = requests.get(api_url)
routes = response.json()
# routes

In [32]:
df_routes = pd.DataFrame(routes['bustime-response']['routes'])
df_routes

Unnamed: 0,rt,rtnm,rtclr,rtdd
0,1,Bronzeville/Union Station,#336633,1
1,2,Hyde Park Express,#993366,2
2,3,King Drive,#009900,3
3,4,Cottage Grove,#cc3300,4
4,X4,Cottage Grove Express,#006666,X4
...,...,...,...,...
120,171,U. of Chicago/Hyde Park,#336633,171
121,172,U. of Chicago/Kenwood,#993366,172
122,192,U. of Chicago Hospitals Express,#cc3300,192
123,201,Central/Ridge,#996633,201


### getdirections request

In [58]:
# Must specify a single route: &rt=20 for Route 20, for example

rt = '20'
api_url = f'http://www.ctabustracker.com/bustime/api/v2/getdirections?key={API_KEY}&rt={rt}&format=json'

response = requests.get(api_url)
directions = response.json()
# directions

In [59]:
df_directions = pd.DataFrame(directions_rt_20['bustime-response']['directions'])
df_directions

Unnamed: 0,dir
0,Eastbound
1,Westbound


### getstops request

In [60]:
# Route and direction are both required
rt = '20'
dir = 'Westbound'

api_url = f'http://www.ctabustracker.com/bustime/api/v2/getstops?key={API_KEY}&rt={rt}&dir={dir}&format=json'

response = requests.get(api_url)
stops = response.json()
# stops

In [61]:
df_stops = pd.DataFrame(stops['bustime-response']['stops'])
df_stops

Unnamed: 0,stpid,stpnm,lat,lon
0,8613,Columbus & Randolph,41.884754,-87.620779
1,460,Madison & Aberdeen,41.881790,-87.654423
2,480,Madison & Albany,41.881018,-87.703412
3,465,Madison & Ashland,41.881520,-87.666695
4,14800,Madison & Austin,41.880192,-87.774348
...,...,...,...,...
60,15090,Madison & Wood,41.881392,-87.672147
61,1106,Michigan & Washington,41.883478,-87.624567
62,6360,Randolph & Stetson,41.884533,-87.622093
63,448,Washington & State,41.883467,-87.627547


In [82]:
# Turn lat/lon values into shaeply points
geometry = list(zip(df_stops['lon'],df_stops['lat']))
# geometry = [Point(p for p in list(geometry))]
geometry = [Point(p) for p in geometry]

# Turn stops into a geodataframe
gdf_stops = gpd.GeoDataFrame(df_stops,geometry=geometry).set_crs(epsg=4326)

gdf_stops.explore(color='blue', tiles="CartoDB positron")

### getpatterns request
#### All patterns for a specified route

In [83]:
# Either a list of pattern IDs or a single route is required.

rt = '20'
api_url = f'http://www.ctabustracker.com/bustime/api/v2/getpatterns?key={API_KEY}&rt={rt}&format=json'

response = requests.get(api_url)
patterns = response.json()
# patterns

In [84]:
# convert to dataframe
df_patterns = pd.DataFrame(patterns['bustime-response']['ptr'])
df_patterns

Unnamed: 0,pid,ln,rtdir,pt
0,954,41419.0,Westbound,"[{'seq': 1, 'lat': 41.882146000000574, 'lon': ..."
1,957,42743.0,Eastbound,"[{'seq': 1, 'lat': 41.880294999999315, 'lon': ..."


In [86]:
# convert pt column values to dataframes for each pattern containing that pattern's points
df_patterns['pt'] = df_patterns['pt'].apply(lambda x: pd.DataFrame(x))
df_patterns

Unnamed: 0,pid,ln,rtdir,pt
0,954,41419.0,Westbound,seq lat lon typ pdist st...
1,957,42743.0,Eastbound,seq lat lon typ stpid ...


In [95]:
# drill down to look at the individual pt data for one of the patterns
df_patterns.loc[0,'pt']

Unnamed: 0,seq,lat,lon,typ,pdist,stpid,stpnm
0,1,41.882146,-87.624857,W,0.0,,
1,2,41.882135,-87.625740,W,0.0,,
2,3,41.882139,-87.625906,S,286.0,450,Madison & Wabash
3,4,41.882076,-87.627785,W,0.0,,
4,5,41.882022,-87.628957,W,0.0,,
...,...,...,...,...,...,...,...
360,361,41.880565,-87.774460,W,0.0,,
361,362,41.880582,-87.774078,W,0.0,,
362,363,41.880502,-87.773903,W,0.0,,
363,364,41.880343,-87.773920,W,0.0,,


In [129]:
# Turn patterns into linestrings
geometry_linestrings = []
for p in df_patterns['pt']:
    p.sort_values('seq', inplace=True)
    linestring_points = list(zip(p['lon'],p['lat']))

    # generate linestring using all points
    linestring = LineString(linestring_points)
    geometry_linestrings.append(linestring)


# Create a geodataframe for the shapes using the linestring geometry
gdf_patterns = gpd.GeoDataFrame(df_patterns, geometry=geometry_linestrings).set_crs(epsg=4326)

# Drop the original pt column
gdf_patterns.drop(['pt'], axis=1, inplace=True)

# generate a map showing the patterns as linestrings
m = gdf_patterns.explore(color='blue', tiles="CartoDB positron") 

m

In [133]:
# Turn patterns into stop points

for pattern in df_patterns['pt']:
    pattern.sort_values('seq', inplace=True)
    # filter to only show stop points
    stops = pattern[pattern['typ']=='S']
    coords = list(zip(stops['lon'],stops['lat']))
    geometry = [Point(c) for c in coords]
    gdf_pattern_stops = gpd.GeoDataFrame(stops,geometry=geometry).set_crs(epsg=4326)

    # Add stops to the map that already contains linestrings
    gdf_pattern_stops.explore(m=m, color='red')

# Show the map
m


### getpredictions request

In [27]:
# list of stop ids is required.
# List of routes s optional WITH stop ids.
# List of vehicle IDs is optional, but can't be combined with stop ids.

api_url = f'http://www.ctabustracker.com/bustime/api/v2/getpredictions?key={API_KEY}&stpid=388&rt=20&format=json'

response = requests.get(api_url)
predictions_rt20_stpid388 = response.json()
predictions_rt20_stpid388


{'bustime-response': {'prd': [{'tmstmp': '20230108 12:14',
    'typ': 'A',
    'stpnm': 'Madison & Mayfield',
    'stpid': '388',
    'vid': '2014',
    'dstp': 8555,
    'rt': '20',
    'rtdd': '20',
    'rtdir': 'Eastbound',
    'des': 'Michigan',
    'prdtm': '20230108 12:30',
    'tablockid': '20 -804',
    'tatripid': '102969',
    'origtatripno': '238685965',
    'dly': False,
    'prdctdn': '15',
    'zone': ''}]}}

### getservicebulletins request

In [28]:
# list of routes OR list of stop ids is required.
# single route direction is optional.

api_url = f'http://www.ctabustracker.com/bustime/api/v2/getservicebulletins?key={API_KEY}&rt=20&format=json'

response = requests.get(api_url)
bulletins_rt20 = response.json()
bulletins_rt20

{'bustime-response': {'sb': [{'nm': '20 Alerts',
    'sbj': 'Check for #20 alerts',
    'dtl': 'For alerts, maps and detailed route information, visit <a href="https://www.transitchicago.com/bus/20/">#20 Madison Route Information</a> on our website. You can also sign up to have alerts sent by text or e-mail by signing up for <a href="https://transitchicago.com/updates">CTA Updates</a>.',
    'brf': 'For alerts and detailed #20 bus route info, visit https://transitchicago.com/bus/20 - get alerts sent to you via transitchicago.com/updates',
    'prty': 'Low',
    'srvc': [{'rt': '20', 'rtdir': '', 'stpid': '', 'stpnm': ''}]}]}}

## Strategy for calculating headways - rough draft

In [None]:
# Vehicles with pattern and dist for an entire route -> match up to pattern data (incl dist along pattern and route) to get stops passed each period. 

# For a given stop, list all vehicles with times they passed the stop.  (dist minus 100 ft or so, to account for buses
# stopping not EXACTLY as far as the stop point?)

# For the given stop, sort vehicles by time and calculate headways.