In [1]:
# -*- coding: utf-8 -*-
"""
The script identifies an example of unstable pairing of trip_id and vehicle_id.
"""

import os
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import HTML
%matplotlib inline 

os.chdir('/gpfs2/projects/project-bus_capstone_2016/workspace/share')

## 1. Get Bus Time data sample

In [2]:
# get the sample of parsed AVL data.  Beware, large files take more time.
bustime = pd.read_csv('spark_parse/1203.txt',header=None)

# beware!  Bonan is still working on organizing the extract files.  these columns may change.
bustime.columns = ['route','lat','lon','timestamp','vehicle_id','trip_id','trip_date','shape_id',
                   'next_stop_id','dist_from_stop','stop_dist_on_trip','presentable_dist']

bustime.drop_duplicates(['vehicle_id','timestamp'],inplace=True)
bustime['trip_id'] = bustime['trip_id'].str.replace('MTA NYCT_','')
bustime['trip_id'] = bustime['trip_id'].str.replace('MTABC_','')
bustime.set_index(['route','trip_id','trip_date','vehicle_id'],inplace=True,drop=True)

# for demonstration, use a subset. Just get data for one trip-date.
tripDateLookup = "2015-12-03" # this is a non-holiday Thursday
bustime = bustime.xs((tripDateLookup),level=(2),drop_level=False)
bustime.sort_index(inplace=True)
print 'Finished loading BusTime data and and slicing one day.'

Finished loading BusTime data and and slicing one day.


## 2. Look for cases of multiple vehicles reported on same trip

In [3]:
# look manually for a good example of multiple vehicles grouped to the same trip_id
bustime.groupby(level=(0,1,2,3)).size()[170:200]

route        trip_id                          trip_date   vehicle_id   
MTA NYCT_B1  UP_D5-Weekday-SDon-068300_B1_27  2015-12-03  MTA NYCT_4867    17
             UP_D5-Weekday-SDon-068400_B1_9   2015-12-03  MTA NYCT_5082    41
             UP_D5-Weekday-SDon-068800_B1_10  2015-12-03  MTA NYCT_4855    27
                                                          MTA NYCT_4988     7
             UP_D5-Weekday-SDon-069300_B1_16  2015-12-03  MTA NYCT_7181    36
             UP_D5-Weekday-SDon-069300_B1_19  2015-12-03  MTA NYCT_4858    18
             UP_D5-Weekday-SDon-069400_B1_23  2015-12-03  MTA NYCT_4989    12
                                                          MTA NYCT_7168     1
             UP_D5-Weekday-SDon-069800_B1_5   2015-12-03  MTA NYCT_4855     6
                                                          MTA NYCT_5007    37
             UP_D5-Weekday-SDon-070200_B1_21  2015-12-03  MTA NYCT_5098    27
             UP_D5-Weekday-SDon-070800_B1_7   2015-12-03  MTA NYCT_716

## 3.  Select one example, export data to CARTO, and visualize
First look at locations of MULTIPLE VEHICLES on using same trip_id

In [4]:
# export the lat-lon data associated for that one trip
bustime.loc[('MTA NYCT_B1','UP_D5-Weekday-SDon-071100_B1_6','2015-12-03')][['lat','lon']].to_csv('multi_veh_example.csv')

In [5]:
framestring = """
<iframe width="100%" height="520" frameborder="0" src="https://mu529.carto.com/viz/d52d1c92-4d0a-11e6-8fab-0e3a376473ab/embed_map" allowfullscreen webkitallowfullscreen mozallowfullscreen oallowfullscreen msallowfullscreen></iframe>
"""
HTML(framestring)

Then look at locations of SAME VEHICLE but with CHANGING TRIP_ID.

In [6]:
# export the lat-lon data associated for one vehicle on that route, so we can also see other trips it reports.
bustime.xs(('MTA NYCT_B1','2015-12-03','MTA NYCT_5098'),level=(0,2,3)).query('timestamp > "2015-12-03T11:36:00.000-05:00"').sort('timestamp')[['lat','lon','timestamp']].to_csv('multi_trip_example.csv')

In [7]:
framestring = """
<iframe width="100%" height="520" frameborder="0" src="https://mu529.carto.com/viz/2b57cb06-4d0d-11e6-852c-0e8c56e2ffdb/embed_map" allowfullscreen webkitallowfullscreen mozallowfullscreen oallowfullscreen msallowfullscreen></iframe>
"""
HTML(framestring)