# Find Arrival Timings of Various Buses

## Load Data

For now experiment with one day of data: 2018-03-05 

In [5]:
import folium
import pandas as pd
import json
from math import *

busLocationFile = "Data/BusLocation/2018-03-05.csv"
busStopFile = "Data/busstops.txt"

In [12]:
m = folium.Map(location=[1.2966, 103.7764], width=750, height=500, zoom_start = 14)
m

In [6]:
df = pd.read_csv(busLocationFile)
print(df.head())

   node_id vehicle_serial                  gps_time  latitude  longitude  \
0     2025        PC3957P  2018-03-04T22:59:01.000Z  1.309950  103.77200   
1     2026        PA9558D  2018-03-04T22:59:01.000Z  1.315100  103.69067   
2     2031        PC3785T  2018-03-04T22:59:01.000Z  1.291883  103.78049   
3     2043        PC4038K  2018-03-04T22:59:01.000Z  1.294683  103.77486   
4     2054        PC3989Y  2018-03-04T22:59:01.000Z  1.294500  103.77513   

   altitude  speed  heading  
0        28     14   192.07  
1        25      0    77.53  
2        70      0   139.86  
3        37      0   329.73  
4        48      0   290.10  


In [7]:
# get unique bus
df_id = df['node_id']
buses = set()
for busId in df_id:
    buses.add(busId)
buses = list(buses)

In [8]:
print(buses)

[2048, 2054, 2055, 2058, 2060, 2062, 2063, 2068, 2077, 2078, 2079, 2129, 2024, 2025, 2026, 2027, 2029, 2031, 2032, 2033, 2038, 2042, 2043, 2047]


In [9]:
df_2048 = df[df['node_id'] == 2048]
print(df_2048.head())

       node_id vehicle_serial                  gps_time  latitude  longitude  \
10143     2048        PC4019R  2018-03-05T13:34:01.000Z  1.296467  103.78330   
10156     2048        PC4019R  2018-03-05T13:34:02.000Z  1.296533  103.78325   
10169     2048        PC4019R  2018-03-05T13:34:03.000Z  1.296583  103.78318   
10182     2048        PC4019R  2018-03-05T13:34:04.000Z  1.296633  103.78312   
10195     2048        PC4019R  2018-03-05T13:34:05.000Z  1.296667  103.78308   

       altitude  speed  heading  
10143        39     34   307.96  
10156        41     32   306.98  
10169        42     30   305.21  
10182        42     26   305.44  
10195        42     21   304.02  


In [10]:
# plot data for this bus
for idx, row in df_2048.iterrows():
#     print("added", row['latitude'], row['longitude'])
    folium.Marker([row['latitude'], row['longitude']]).add_to(m)

NameError: name 'm' is not defined

In [11]:
# load bus stop geolocation data
busStopData = json.load(open(busStopFile))

In [12]:
busStopLocations = {}
for d in busStopData:
    busStopLocations[d['name']] = (d['lat'], d['long'])

In [13]:
print(busStopLocations)

{'University Town': (1.30359, 103.77444), 'Museum': (1.30108, 103.7737), 'Raffles Hall': (1.30098, 103.7727), 'Opp. Yusof Ishak House': (1.299, 103.77412), 'Yusof Ishak House': (1.2989, 103.77438), 'University Health Centre': (1.29891, 103.77612), 'Opp. University Hall': (1.2976, 103.77814), 'Block S17': (1.29753, 103.78058), 'Opp. NUH': (1.29681, 103.78315), 'Computer Centre': (1.29738, 103.77285), 'Central Library': (1.29657, 103.77254), 'Ventus (Opp. LT13)': (1.2954, 103.77062), 'Temasek Hall': (1.29312, 103.77133), 'COM2': (1.2943, 103.7738), 'AS7': (1.29367, 103.77178), 'Eusoff Hall': (1.29389, 103.7704), 'PGP Terminal': (1.29192, 103.78055), 'After Science Park Drive': (1.29255, 103.78449), 'KR MRT Station': (1.29373, 103.78489), 'NUH': (1.29631, 103.78346), 'LT29': (1.29738, 103.78104), 'BIZ2': (1.29338, 103.7752), 'Opp. University Health Centre': (1.29879, 103.77562), 'University Hall': (1.29743, 103.77791), 'LT13': (1.29475, 103.77069), 'House 7': (1.29328, 103.77782), 'Betwee

In [14]:
def dist(lat1, lon1, lat2, lon2):
    '''
    returns distance in meters
    '''
    R = 6373.0
    lat1 = radians(lat1)
    lon1 = radians(lon1)
    lat2 = radians(lat2)
    lon2 = radians(lon2)

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c * 1000
    return distance

def near(locA, locB):
    EPS = 10
    return abs(dist(locA[0], locA[1], locB[0], locB[1])) < EPS
    
    

In [15]:
def nearestBusStop(lat, long):
    for stop, loc in busStopLocations.items():
        if(near(loc, (lat, long))):
            return stop
    return None

In [16]:
for idx, row in df_2048.iterrows():
    nearestBusStop(row['latitude'], row['longitude'])

In [17]:
def arrivalTimes(df):
    times = [] # (bus stop, arrival time, depature time)
    isWaitingDepart = False
    for idx, row in df.iterrows():
        busstop = nearestBusStop(row['latitude'], row['longitude'])
        if busstop:
            # Entering
            if len(times) == 0 or times[-1][0] != busstop:
                times.append([busstop, row['gps_time'], None])
        elif len(times) and times[-1][-1] is None:
            # Leaving
            times[-1][-1] = row['gps_time']
    return times
    

In [18]:
arrivalTimes2048 = arrivalTimes(df_2048)
arrivalTimes2048

[['LT29', '2018-03-05T13:34:44.000Z', '2018-03-05T13:35:08.000Z'],
 ['University Hall', '2018-03-05T13:36:31.000Z', '2018-03-05T13:36:34.000Z'],
 ['Computer Centre', '2018-03-05T13:39:23.000Z', '2018-03-05T13:39:25.000Z'],
 ['Central Library', '2018-03-05T13:39:40.000Z', '2018-03-05T13:39:44.000Z'],
 ['LT13', '2018-03-05T13:41:10.000Z', '2018-03-05T13:41:29.000Z'],
 ['COM2', '2018-03-05T13:43:31.000Z', '2018-03-05T13:44:01.000Z'],
 ['BIZ2', '2018-03-05T13:46:08.000Z', '2018-03-05T13:46:22.000Z'],
 ['House 12', '2018-03-05T13:46:58.000Z', '2018-03-05T14:10:01.000Z'],
 ['COM2', '2018-03-05T14:11:02.000Z', '2018-03-05T14:11:17.000Z'],
 ['BIZ2', '2018-03-05T14:13:36.000Z', '2018-03-05T14:14:01.000Z'],
 ['House 12', '2018-03-05T14:14:27.000Z', '2018-03-05T14:14:29.000Z'],
 ['Between House 14 & 15',
  '2018-03-05T14:14:39.000Z',
  '2018-03-05T14:14:40.000Z'],
 ['PGP Terminal', '2018-03-05T14:15:17.000Z', '2018-03-05T14:15:20.000Z'],
 ['After Science Park Drive',
  '2018-03-05T14:31:14.000Z',

In [19]:
# Plot Arrival Timings on the map
m = folium.Map(location=[1.2966, 103.7764], width=750, height=500, zoom_start = 14)

In [20]:
for timings in arrivalTimes2048:
    busstop, start, end = timings
    long, lat = busStopLocations[busstop]
    folium.Marker([long, lat], popup = busstop).add_to(m)

In [21]:
points = []
for timings in arrivalTimes2048:
    busstop, start, end = timings
    long, lat = busStopLocations[busstop]
    points.append([long, lat])

    
from colour import Color
red = Color("red")
colors = list(red.range_to(Color("green"),len(points)))
colors = [c.hex for c in colors]

for p, c in zip(points, colors):
    folium.Circle([p[0], p[1]], color = c ).add_to(m)


In [22]:
folium.ColorLinePolyLine(points).add_to(m)

AttributeError: module 'folium' has no attribute 'ColorLinePolyLine'

In [131]:
m

# Classify all buses

In [26]:
arrivalTimesofBuses = {b: arrivalTimes(df[df['node_id']==b]) for b in buses}

In [28]:
arrivalTimesofBuses[2048]

[['LT29', '2018-03-05T13:34:44.000Z', '2018-03-05T13:35:08.000Z'],
 ['University Hall', '2018-03-05T13:36:31.000Z', '2018-03-05T13:36:34.000Z'],
 ['Computer Centre', '2018-03-05T13:39:23.000Z', '2018-03-05T13:39:25.000Z'],
 ['Central Library', '2018-03-05T13:39:40.000Z', '2018-03-05T13:39:44.000Z'],
 ['LT13', '2018-03-05T13:41:10.000Z', '2018-03-05T13:41:29.000Z'],
 ['COM2', '2018-03-05T13:43:31.000Z', '2018-03-05T13:44:01.000Z'],
 ['BIZ2', '2018-03-05T13:46:08.000Z', '2018-03-05T13:46:22.000Z'],
 ['House 12', '2018-03-05T13:46:58.000Z', '2018-03-05T14:10:01.000Z'],
 ['COM2', '2018-03-05T14:11:02.000Z', '2018-03-05T14:11:17.000Z'],
 ['BIZ2', '2018-03-05T14:13:36.000Z', '2018-03-05T14:14:01.000Z'],
 ['House 12', '2018-03-05T14:14:27.000Z', '2018-03-05T14:14:29.000Z'],
 ['Between House 14 & 15',
  '2018-03-05T14:14:39.000Z',
  '2018-03-05T14:14:40.000Z'],
 ['PGP Terminal', '2018-03-05T14:15:17.000Z', '2018-03-05T14:15:20.000Z'],
 ['After Science Park Drive',
  '2018-03-05T14:31:14.000Z',

In [20]:
# filter out empty buses
arrivalTimesofBuses = {k:v for k,v in arrivalTimesofBuses.items() if len(v)}
len(arrivalTimesofBuses)

16

| Bus   | Service|
|-------|--------|
| 2048  |  A2/A1 |	
| 2055  | 		 |		
| 2058  | 		 |	
| 2060  | 		 |	
| 2063  | 	D2	 |		
| 2068  | 	D2	 |		
| 2078  | 	D2	 |		
| 2079  | 		 |		
| 2129  | 	A1	 |			
| 2025  | 	B2	 |	
| 2026  | 	B1	 |			
| 2027  | 	D1	 |		
| 2031  | 	DEAD |		
| 2033  | 	A2	 |		
| 2038  | 	A2	 |		
| 2042  | 	A2	 |		


In [33]:
spots = list(busStopLocations.keys())

In [34]:
spots

['University Town',
 'Museum',
 'Raffles Hall',
 'Opp. Yusof Ishak House',
 'Yusof Ishak House',
 'University Health Centre',
 'Opp. University Hall',
 'Block S17',
 'Opp. NUH',
 'Computer Centre',
 'Central Library',
 'Ventus (Opp. LT13)',
 'Temasek Hall',
 'COM2',
 'AS7',
 'Eusoff Hall',
 'PGP Terminal',
 'After Science Park Drive',
 'KR MRT Station',
 'NUH',
 'LT29',
 'BIZ2',
 'Opp. University Health Centre',
 'University Hall',
 'LT13',
 'House 7',
 'Between House 14 & 15',
 'House 12',
 'Opp. Hon Sui Sen Memorial Library',
 'The Japanese Primary School',
 'Unknown',
 'Opp. Block EA',
 'Opp. KR MRT Station',
 'Opp. House 12',
 'Kent Ridge Terminal']

In [29]:
A1 = set(['PGP Terminal', 'KR MRT Station', 'LT29', 'University Hall', 'Opp. University Health Centre', 'Yusof Ishak House', 'Central Library', 'LT13', 'AS7', 'Opp. Hon Sui Sen Memorial Library', 'BIZ2', 'Opp. House 12', 'House 7'])
A1E= set(['KR MRT Station', 'LT29', 'Central Library', 'BIZ2', 'PGP Terminal'])
A2 = set(['PGP Terminal', 'Opp. Hon Sui Sen Memorial Library', 'COM2', 'Ventus (Opp. LT13)', 'Computer Centre', 'Opp. Yusof Ishak House', 'Museum', 'University Health Centre', 'Opp. University Hall', 'Opp. KR MRT Station'])
B1 = set(['Kent Ridge Terminal','Computer Centre', 'Opp. Yusof Ishak House', 'University Town', 'Yusof Ishak House', 'Central Library', 'LT13', 'AS7', 'BIZ2'])
B2 = set(['Opp. Hon Sui Sen Memorial Library', 'Ventus (Opp. LT13)', 'Computer Centre', 'Opp. Yusof Ishak House', 'University Town', 'Raffles Hall', 'Opp. Block EA', 'Kent Ridge Terminal'])
C = set(['Kent Ridge Terminal''Computer Centre', 'Opp. Yusof Ishak House', 'Museum', 'University Health Centre', 'Opp. University Hall', 'Block S17', 'LT29', 'University Hall', 'Raffles Hall', 'Opp. Block EA'])
D1 = set(['Opp. Hon Sui Sen Memorial Library', 'COM2', 'Ventus (Opp. LT13)', 'Computer Centre', 'Opp. Yusof Ishak House', 'Museum', 'University Town', 'Yusof Ishak House', 'Central Library', 'LT13', 'AS7', 'BIZ2'])
D2 = set(['PGP Terminal', 'KR MRT Station', 'LT29', 'University Hall', 'Opp. University Health Centre', 'Museum', 'University Town', 'University Health Centre', 'Opp. University Hall', 'Block S17', 'Opp. KR MRT Station', 'BIZ2'])

In [25]:
services = {'A1':A1,'A1E':A1E,'A2':A2,'B1':B1,'B2':B2,'C':C,'D1':D1,'D2':D2}

In [33]:
import plotly.plotly as py
import plotly.graph_objs as go

header = list(services.keys())
rows = buses

def getStopsForBus(arrivalTimesofBuses, bus):
    '''
    returns set of bus stops that bus passes through
    '''
    return set([tup[0] for tup in arrivalTimesofBuses[bus]])

def percentageCloseness(stops, service):
    '''
    input: both sets of bus stops
    returns: intersect(stops,service)/service*100
    '''
    return len(stops.intersection(service))/len(service)*100
    
    
data = [[percentageCloseness(getStopsForBus(arrivalTimesofBuses, bus), services[s]) 
         for s in header] for bus in buses]


TypeError: unsupported operand type(s) for /: 'set' and 'int'