# NYCbuswatcher API Demo (against api.buswatcher.org)

In [2]:
# !pip install requests pandas geopandas ipyleaflet ipywidgets matplotlib python-dateutil pydeck

In [3]:
import requests
import pandas as pd
import geopandas as gpd

%matplotlib inline

In [4]:
# prevent cell wrapping in dataframe tables

In [5]:
%%html
<style>
.dataframe td {
    white-space: nowrap;
}
</style>

## How to use the API:

A Shipment is a JSON file showing all observations of a single route for a single hour. 
There are several endpoints that let you:
- get a list of available Shipments to retrive
- retrieve the Shipments as JSON or GeoJOSN.

In [6]:
api_url = 'http://api.buswatcher.org/api/v2'

## Get a list of all the Shipments in history for a specific route

#### ENDPOINT: List All Shipments In History For Route: `/api/v2/nyc/{route}` 

In [7]:
route='M15'

In [8]:
shipments_for_route_url = f'{api_url}/nyc/{route}'
shipments_for_route_url

'http://api.buswatcher.org/api/v2/nyc/M15'

In [9]:
# fetch the index of shipments for this route
shipment_list = requests.get(shipments_for_route_url).json()

In [10]:
# inspect the response
list(shipment_list.keys())

['route', 'shipments']

In [11]:
# verify route
shipment_list['route']

'M15'

In [12]:
# how many shipment pointers in all
len(shipment_list['shipments'])

1386

In [13]:
# look at first record (shipments is a list of dicts, one for each shipment in the Data Store)
shipment_list['shipments'][0]

{'route': 'M15',
 'year': 2021,
 'month': 6,
 'day': 30,
 'hour': 23,
 'url': 'http://api.buswatcher.org:80/api/v2/nyc/2021/6/30/23/M15/buses'}

In [14]:
# and the last one
shipment_list['shipments'][-1]

{'route': 'M15',
 'year': 2021,
 'month': 9,
 'day': 8,
 'hour': 19,
 'url': 'http://api.buswatcher.org:80/api/v2/nyc/2021/9/8/19/M15/buses'}

#### grab one shipment... 

In [13]:
url = shipment_list['shipments'][0]['url']
shipment = requests.get(url).json()

#### look inside it... 

In [14]:
# how many buses?
len(shipment['buses'])

510

In [15]:
# look at one record
shipment['buses'][0]

{'route': 'M15',
 'timestamp': '2021-06-30 23:14:44-04:00',
 'route_long': 'MTA NYCT_M15',
 'direction': '0',
 'service_date': '2021-06-30',
 'trip_id': 'MTA NYCT_OH_C1-Weekday-134000_M15_243',
 'gtfs_shape_id': 'MTA_M150004',
 'route_short': 'M15',
 'agency': 'MTA NYCT',
 'origin_id': 'MTA_803019',
 'destination_name': 'EAST HARLEM 125 ST via 1 AV',
 'next_stop_id': 'MTA_401721',
 'next_stop_eta': '2021-06-30T23:15:12.240-04:00',
 'next_stop_d_along_route': 11470.57,
 'next_stop_d': 56.85,
 'lat': 40.784978,
 'lon': -73.943452,
 'bearing': 54.05072,
 'progress_rate': 'normalProgress',
 'vehicle_id': 'MTA NYCT_5881',
 'gtfs_block_id': 'MTA NYCT_OH_C1-Weekday_C_OH_13560_M15-206'}

In [16]:
# we can also get the shipment as geojson, jsut add /geojson to the end of the endpoint

url = shipment_list['shipments'][0]['url'] + '/geojson'
gj_shipment = requests.get(url).json()

In [None]:
# verify same # of buses
len(gj_shipment['features'])

In [None]:
# and check out the geojson (hint: you can paste this into geojson.io to make maps instantly)
gj_shipment['features'][0]

#### ...and map it

In [None]:
from ipywidgets import Layout
from ipyleaflet import (
    Map, basemaps, basemap_to_tiles,
    Circle, Marker, Rectangle, LayerGroup
)

# init the data layer
buses_gdf = gpd.GeoDataFrame.from_features(gj_shipment['features'])

# init the map
defaultLayout=Layout(width='960px', height='540px')
center = (40.7128, -74.0060) #reverse?
zoom = 12


toner = basemap_to_tiles(basemaps.Stamen.Toner)
m = Map(layers=(toner, ), center=center, zoom=zoom, layout=defaultLayout)

# Create layer group
layer_group = LayerGroup()


for index, row in buses_gdf.iterrows():
    marker = Marker(location=(row.lat, row.lon), draggable=False)
    layer_group.add_layer(marker);


m.add_layer(layer_group)

m

## build a route history 

#### (e.g. all observations ever made for a single route)
#### by iterating over list of shipments and load them all into a dataframe (takes a few minutes)

In [None]:
def get_route_history(route):
    
    # get the list of shipments for a route
    shipments_for_route_url = f'{api_url}/nyc/{route}'
    shipment_list = requests.get(shipments_for_route_url).json()
    
    # init list to hold results of the individual fetches
    rows=[]

    # iterate over the list of shipments and get each one
    for s in shipment_list['shipments']:
        shipment = requests.get(s['url']).json()
        for bus_dict in shipment['buses']:
            rows.append(bus_dict)
        
    df = pd.DataFrame.from_dict(rows, orient='columns')
    df['passenger_count'] = df['passenger_count'].fillna(0)
    print(f'loaded {len(rows)} buses from {len(shipment_list["shipments"])} shipments for route {route} into DataFrame: "df"')
    
    return df

df = get_route_history(route)
df

In [None]:
### convert this to a geodataframe
gdf = geopandas.GeoDataFrame(
    df, geometry=geopandas.points_from_xy(df.lon, df.lat))

## retrieving raw response archives

#### The individual JSON responses for each minute's request for each route are bundled up and stored in hourly packages as well called "glaciers". Right now there's no index for glaciers so you'll have to just handle the missing ones with try-except

In [22]:
year=2021
month=8
day=8
hour=8
glacier_url = f'{api_url}/nyc/{year}/{month}/{day}/{hour}/{route}/archive'
glacier_url

'http://api.buswatcher.org/api/v2/nyc/2021/8/8/8/M15/archive'

In [23]:
# fetch the index of shipments for this route
glacier = requests.get(glacier_url)
type(glacier.content)

bytes

In [24]:
len(glacier.content)

840144

In [36]:
# get a days worth and demonstrate trapping not found

year=2021
month=8
day=8

for hour in range(0,25):
    glacier_url = f'{api_url}/nyc/{year}/{month}/{day}/{hour}/{route}/archive'
    glacier = requests.get(glacier_url)
    if glacier.ok:
        print(f'retrieved Glacier {(len(glacier.content)/1024):.0f} kb long for {hour}')
    else:
        print (f'no Glacier found for {year}-{month}-{day}-{hour} on the {route}')

retrieved Glacier 824 kb long for 0
retrieved Glacier 570 kb long for 1
retrieved Glacier 307 kb long for 2
retrieved Glacier 357 kb long for 3
retrieved Glacier 406 kb long for 4
retrieved Glacier 532 kb long for 5
retrieved Glacier 625 kb long for 6
retrieved Glacier 664 kb long for 7
retrieved Glacier 820 kb long for 8
retrieved Glacier 838 kb long for 9
retrieved Glacier 925 kb long for 10
retrieved Glacier 1112 kb long for 11
retrieved Glacier 1221 kb long for 12
retrieved Glacier 1287 kb long for 13
retrieved Glacier 1395 kb long for 14
retrieved Glacier 1457 kb long for 15
retrieved Glacier 1413 kb long for 16
retrieved Glacier 1563 kb long for 17
retrieved Glacier 1457 kb long for 18
retrieved Glacier 1278 kb long for 19
retrieved Glacier 1215 kb long for 20
retrieved Glacier 1030 kb long for 21
retrieved Glacier 968 kb long for 22
retrieved Glacier 997 kb long for 23
no Glacier found for 2021-8-8-24 on the M15


#### when you are ready to start parsing your own, you can look at common.Models.BusObservation for my approach to it