In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import cPickle as pickle
from model import utilities as ut

# Reading in static schedule data
stops_full = pd.read_csv('data/google_transit/stops.txt', index_col='stop_id')
routes = pd.read_csv('data/google_transit/routes.txt', index_col='route_id')
trips = pd.read_csv('data/google_transit/trips.txt', index_col='trip_id')
blocks = pd.read_csv('../project/lookUpBlockIDToBlockNumNam.csv', index_col='BLOCKID')
stop_times = pd.read_csv('data/google_transit/stop_times.txt')
shapes = pd.read_csv('data/google_transit/shapes.txt')

# Some of these stops are named "Not a public stop" but are still in trips.
# Luckily, in the few trips they appear in, they're only either at the
# beginning or the end, so we can remove them now and we'll still build
# a nice graph with the connections we expect.
stops = stops_full[~stops_full.index.isin([7520, 7530, 7531, 7540])]
stop_times = stop_times[~stop_times['stop_id'].isin([7520, 7530, 7531, 7540])]

# Oh and some stops are in stops.txt but not used in trips... let's remove 'em
used_stops = set(stop_times['stop_id'].unique())
stops = stops[stops.index.isin(used_stops)]

# Let's make some sorted stop-timepoint lists for each stop_id to
# make lookup faster for things
all_stop_timepoints = {}
for stopid in used_stops:
    node_names = stop_times[stop_times['stop_id'] == stopid].\
                    apply(lambda x: '{0}_{1}'.\
                              format(stopid, x['arrival_time']),\
                          axis=1)
    all_stop_timepoints[stopid] = sorted(list(set(node_names)))

In [2]:
raw_test = pd.read_csv('../project/test3.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [29]:
raw_test.loc[1100:1120]

Unnamed: 0,REV,REPORT_TIME,VEHICLE_TAG,LONGITUDE,LATITUDE,SPEED,HEADING,TRAIN_ASSIGNMENT,PREDICTABLE
1100,1506,01/06/2016 12:06:37,7,-122.41139,37.79558,0.0,36.0,6002,0
1101,1506,01/06/2016 12:08:07,7,-122.41191,37.79496,0.0,78.0,6002,1
1102,1506,01/06/2016 12:12:37,7,-122.41215,37.79481,0.833,16.0,6002,1
1103,1506,01/06/2016 12:26:07,7,-122.41181,37.7949,0.0,195.0,6002,0
1104,1506,01/06/2016 12:35:07,7,-122.41198,37.79495,0.0,65.0,6002,0
1105,1506,01/06/2016 12:39:37,7,-122.41208,37.79485,0.0,20.0,6002,0
1106,1506,01/06/2016 12:56:07,7,-122.41206,37.79495,0.0,19.0,6002,0
1107,1506,01/06/2016 13:00:37,7,-122.41206,37.79471,0.0,348.0,6002,0
1108,1506,01/06/2016 13:44:07,7,-122.41208,37.79473,0.0,18.0,6002,0
1109,1506,01/06/2016 13:47:07,7,-122.412,37.79485,0.0,344.0,6002,0


In [4]:
raw_test.TRAIN_ASSIGNMENT.unique()

array([nan, '6003', '6004', ..., 905.0, 2905.0, 8888.0], dtype=object)

In [5]:
raw_test.TRAIN_ASSIGNMENT.min(), raw_test.TRAIN_ASSIGNMENT.max()

(101.0, 'NUNSCHED')

In [42]:
raw_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 304830 entries, 0 to 304829
Data columns (total 9 columns):
REV                 304830 non-null int64
REPORT_TIME         304830 non-null object
VEHICLE_TAG         304830 non-null object
LONGITUDE           304830 non-null float64
LATITUDE            304830 non-null float64
SPEED               304830 non-null float64
HEADING             304830 non-null float64
TRAIN_ASSIGNMENT    239082 non-null object
PREDICTABLE         304830 non-null int64
dtypes: float64(4), int64(2), object(3)
memory usage: 20.9+ MB


In [6]:
blocks.head()

Unnamed: 0_level_0,SIGNID,BLOCKNUM,BLOCKNAME
BLOCKID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
332917,69,107,107
332918,69,108,108
332919,69,109,109
332920,69,110,110
332921,69,102,102


In [73]:
blocks.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 71670 entries, 332917 to 442963
Data columns (total 3 columns):
SIGNID       71670 non-null int64
BLOCKNUM     71670 non-null int64
BLOCKNAME    67983 non-null object
dtypes: int64(2), object(1)
memory usage: 2.2+ MB


In [7]:
blocks.index.max()

442993

In [8]:
blocks.SIGNID.min()

69

In [9]:
blocks.SIGNID.max()

88

In [10]:
blocks.BLOCKNUM.min(), blocks.BLOCKNUM.max()

(1, 9973)

In [22]:
blocks[blocks.BLOCKNUM == 6002].head()

Unnamed: 0_level_0,SIGNID,BLOCKNUM,BLOCKNAME
BLOCKID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
334964,69,6002,6002
334977,69,6002,6002
334989,69,6002,6002
342104,69,6002,6002
342109,69,6002,6002


In [53]:
trips.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 29087 entries, 6858659 to 6916562
Data columns (total 6 columns):
route_id         29087 non-null int64
service_id       29087 non-null int64
trip_headsign    29071 non-null object
direction_id     29087 non-null int64
block_id         29087 non-null int64
shape_id         29087 non-null int64
dtypes: int64(5), object(1)
memory usage: 1.6+ MB


In [52]:
trips[trips.block_id == 6002].head()

Unnamed: 0_level_0,route_id,service_id,trip_headsign,direction_id,block_id,shape_id
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
6900882,1060,3,Beach + Hyde,0,6002,135858
6900834,1060,3,Beach + Hyde,0,6002,135856
6900281,1060,2,Beach + Hyde,0,6002,135856
6900751,1060,3,Beach + Hyde,0,6002,135858
6899740,1060,1,Beach + Hyde,0,6002,135856


In [24]:
trips[(trips.block_id == 6002)].shape_id.unique()

array([135858, 135856, 135862, 135863])

In [14]:
shapes.head()

Unnamed: 0,shape_id,shape_pt_lon,shape_pt_lat,shape_pt_sequence,shape_dist_traveled
0,135594,-122.446805,37.787266,1,0
1,135594,-122.448481,37.787054,2,149
2,135594,-122.450131,37.786842,3,296
3,135594,-122.450238,37.786822,4,306
4,135594,-122.451771,37.786624,5,443


In [41]:
raw_test.TRAIN_ASSIGNMENT.unique()[:500]

array([nan, '6003', '6004', '6007', '5903', '5908', '6005', '5907', '6010',
       '6002', '5904', '6006', '5906', '9805', '9801', '9803', '9804',
       '9806', '9821', '9822', '9814', '9816', '9999', '9808', '9809',
       '9811', '9802', '9812', '9817', '9810', '9815', '9819', '9813',
       '9818', '5901', 'NUNSCHED', '9401', '9409', 'MUNSCHED', 'KTUNSCHED',
       '9405', '9419', '9506', '9713', '9508', '9603', 'LUNSCHED', '9304',
       '9504', '9704', '9706', '9606', '9611', '9621', '9604', '9502',
       '9410', '9717', '9421', '9303', '9413', '9501', '9609', '9703',
       '9718', '9503', '9511', '9705', '9724', '9509', '9414', '9310',
       '9708', '9507', '9605', '9412', '9712', '9721', '9607', '9522',
       '9608', '9716', '9711', '9702', '9305', '9407', '9404', '9601',
       '9723', '9306', '9308', 'JUNSCHED', '9710', '9521', '9422', '9416',
       '9402', '9415', '9725', '9602', '9510', '9610', '9709', '9406',
       '9420', '9321', '9424', '9423', '9453', '9707', '945

In [34]:
raw_test.loc[1109]['TRAIN_ASSIGNMENT']

'6002'

In [64]:
test_block = raw_test[raw_test['TRAIN_ASSIGNMENT'] == '6002']

In [65]:
test_block.head()

Unnamed: 0,REV,REPORT_TIME,VEHICLE_TAG,LONGITUDE,LATITUDE,SPEED,HEADING,TRAIN_ASSIGNMENT,PREDICTABLE
1043,1506,01/06/2016 06:00:37,7,-122.42101,37.80691,0.0,221.0,6002,1
1044,1506,01/06/2016 06:03:49,7,-122.41953,37.80141,3.889,170.0,6002,1
1045,1506,01/06/2016 06:04:34,7,-122.41918,37.79971,3.889,171.0,6002,1
1046,1506,01/06/2016 06:09:06,7,-122.41374,37.79423,1.944,80.0,6002,1
1047,1506,01/06/2016 06:11:48,7,-122.40951,37.79388,3.333,171.0,6002,1


Unnamed: 0_level_0,route_id,service_id,trip_headsign,direction_id,block_id,shape_id
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
6858659,11047,2,Geary + 33rd Avenue,0,115,135594
7048008,11047,1,Geary + 33rd Avenue,0,110,135595
7048006,11047,1,Geary + 33rd Avenue,0,116,135595
7048005,11047,1,Geary + 33rd Avenue,0,103,135595
7048004,11047,1,Geary + 33rd Avenue,0,113,135595


In [67]:
foo = raw_test['TRAIN_ASSIGNMENT'].apply(lambda x: int(x) if str(x)[0].isdigit() else 'boo')

In [70]:
foo.unique()[:200]

array(['boo', 6003, 6004, 6007, 5903, 5908, 6005, 5907, 6010, 6002, 5904,
       6006, 5906, 9805, 9801, 9803, 9804, 9806, 9821, 9822, 9814, 9816,
       9999, 9808, 9809, 9811, 9802, 9812, 9817, 9810, 9815, 9819, 9813,
       9818, 5901, 9401, 9409, 9405, 9419, 9506, 9713, 9508, 9603, 9304,
       9504, 9704, 9706, 9606, 9611, 9621, 9604, 9502, 9410, 9717, 9421,
       9303, 9413, 9501, 9609, 9703, 9718, 9503, 9511, 9705, 9724, 9509,
       9414, 9310, 9708, 9507, 9605, 9412, 9712, 9721, 9607, 9522, 9608,
       9716, 9711, 9702, 9305, 9407, 9404, 9601, 9723, 9306, 9308, 9710,
       9521, 9422, 9416, 9402, 9415, 9725, 9602, 9510, 9610, 9709, 9406,
       9420, 9321, 9424, 9423, 9453, 9707, 9451, 9403, 9719, 9408, 9302,
       6001, 6008, 9417, 9714, 9505, 9715, 9612, 9301, 9411, 9701, 9722,
       9452, 9309, 9307, 9418, 5909, 6009, 5905, 9807, 5902, 6106, 4105,
       1433, 4113, 2292, 3303, 2491, 2412, 2408, 2405, 1494, 2415, 4103,
       1431, 4913, 1492, 3309, 2403, 3306, 814, 14