In [1]:
import numpy as np
import matplotlib.pyplot as plt
import sqlite3
from tqdm.notebook import tqdm
from datetime import datetime
from random import choice
from glob import glob
## gmplot - dependency to create geoplot
import gmplot

In [2]:
files = glob("input/feed/bus_movements_2019_07_07.db")
db = sqlite3.connect(files[0])
speed_data = db.execute("Select speed, timestamp, route_id, trip_id, lat, lng from vehicle_feed where route_id=426")

In [3]:
to_radian = lambda x : x/57.29577951

def haversine_dist(lat1, lon1, lat2, lon2):
    lat1 = to_radian(lat1)
    lon = to_radian(lon1) - to_radian(lon2)
    lat2 = to_radian(lat2)
    a = np.sin((lat1-lat2) / 2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(lon / 2)**2
    c = 2*np.arcsin(np.sqrt(a))

    return 6373.0 * c * 1000

In [4]:
tree = {}
count = 0
for e in tqdm(speed_data):
    if int(e[2]) not in tree:
        tree[int(e[2])]  = {}
    if e[3] not in tree[int(e[2])]:
        tree[int(e[2])][e[3]]  = {}
    
    if int(e[1]) not in tree[int(e[2])][e[3]]:
        tree[int(e[2])][e[3]][int(e[1])] = np.array([e[0], e[-2], e[-1]]).astype(float)

HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




In [5]:
stops_data = dict()
i = 0
for e in open("input/static/stops.txt").read().split("\n")[1:-1]:
    split = e.split(",")
    stops_data[int(split[0])] = (float(split[-2]), float(split[-1]), split[0], split[1], i)
    i+=1
# stops_data = np.array([e.split(",")[-2:] for e in  open("input/stops.txt").read().split("\n")[1:-1]]).astype(np.float32)

In [20]:
trips_data = dict()
for e in open("input/static/stop_times.txt").read().split("\n")[1:-1]:
    split = e.split(",")
    trip_id = split[0]
    if trip_id not in trips_data:
        trips_data[trip_id] = []
    trips_data[trip_id].append((int(split[3]), int(split[-1])))

In [23]:
routes_data = dict()
for e in open("input/static/trips.txt").read().split("\n")[1:-1]:
    split = e.split(",")
    route_id = int(split[0])
    trip_id = split[2]
    if route_id not in routes_data:
        routes_data[route_id] = np.array(sorted(trips_data[trip_id], key=lambda e:e[1]))[:, 0]

In [84]:
stop_tree = {}
for route_id in tree:
    stop_tree[route_id] = {}
    for trip_id in tqdm(tree[route_id]):
        stop_tree[route_id][trip_id] = [None]*len(routes_data[route_id])

        time_stamps = sorted(tree[route_id][trip_id].keys())
        stop_id = 0
        for each_click in time_stamps:
            prev_distance = haversine_dist(stops_data[routes_data[route_id][stop_id]][0], stops_data[routes_data[route_id][stop_id]][1],
                                           tree[route_id][trip_id][each_click][1], tree[route_id][trip_id][each_click][2])
            
            for each_stop in range(stop_id+1, len(routes_data[route_id])):
                cur_distance = haversine_dist(stops_data[routes_data[route_id][each_stop]][0], stops_data[routes_data[route_id][each_stop]][1],
                                              tree[route_id][trip_id][each_click][1], tree[route_id][trip_id][each_click][2])
                if cur_distance < prev_distance:
                    prev_distance = cur_distance
                    stop_id = each_stop
                else:
                    break
            
            if prev_distance < 150:
                if stop_tree[route_id][trip_id][stop_id] == None:
                    stop_tree[route_id][trip_id][stop_id] = []
                stop_tree[route_id][trip_id][stop_id].append((each_click, prev_distance, cur_distance))

HBox(children=(FloatProgress(value=0.0, max=62.0), HTML(value='')))




In [85]:

# cd = []
# trip_id = "12313"
# time_stamps = sorted(tree[route_id][trip_id].keys())
# for each_click in time_stamps:
#     cd.append(tree[route_id][trip_id][each_click][1:])
# cd = np.array(cd)

gmap3 = gmplot.GoogleMapPlotter(lat = stops_data[routes_data[426][0]][0], lng = stops_data[routes_data[426][0]][1], zoom=15)

points = stop_tree[426]['12298'][0]
gmap3.marker(stops_data[routes_data[426][0]][0], stops_data[routes_data[426][0]][1], color="green",
        title="time spend : {} seconds, initial distance : {}, final distance: {}, final difference : {}".format(
                    (points[-1][0] - points[0][0]), np.round(points[0][1], 2),
                    np.round(points[-1][1], 2), np.round(points[-1][2], 2)))

for e in range(1, len(routes_data[426])-1):
    points = stop_tree[426]['12298'][e]
    if points is not None:
        gmap3.marker(stops_data[routes_data[426][e]][0], stops_data[routes_data[426][e]][1], color="blue",
            title="time spend : {} seconds, initial distance : {}, final distance: {}, final difference : {}".format(
                        (points[-1][0] - points[0][0]), np.round(points[0][1], 2),
                        np.round(points[-1][1], 2), np.round(points[-1][2], 2)))
    else:
        gmap3.marker(stops_data[routes_data[426][e]][0], stops_data[routes_data[426][e]][1], color="pink",
                    title="Not found")

points = stop_tree[426]['12298'][-1]

if points is not None:
    gmap3.marker(stops_data[routes_data[426][-1]][0], stops_data[routes_data[426][-1]][1], color="red",
            title="time spend : {} seconds, initial distance : {}, final distance: {}, final difference : {}".format(
                        (points[-1][0] - points[0][0]), np.round(points[0][1], 2),
                        np.round(points[-1][1], 2), np.round(points[-1][2], 2)))
else:
    gmap3.marker(stops_data[routes_data[426][-1]][0], stops_data[routes_data[426][-1]][1], color="yellow",
                    title="Not found")



gmap3.draw( "output/map.html" )

In [120]:
matrix = np.zeros([54*54, 144])

In [121]:
start_date = 1562437800

In [156]:
times = []
count = 0
for route_id in stop_tree:
    stops = routes_data[route_id]
    for each_trip in stop_tree[route_id]:
        for start_stop in range(len(stops)):
            if stop_tree[route_id][each_trip][start_stop] == None:
                continue
            start_time = ((stop_tree[route_id][each_trip][start_stop][0][0] + stop_tree[route_id][each_trip][start_stop][-1][0])//2 - start_date)//600
            if start_time < 0:
                continue
            
            for end_stop in range(start_stop+1, len(stops)):
                if stop_tree[route_id][each_trip][end_stop] == None:
                    continue
                end_time = ((stop_tree[route_id][each_trip][end_stop][0][0] + stop_tree[route_id][each_trip][end_stop][-1][0])//2 - start_date)//600

                if (end_time == start_time):
                    matrix[start_stop + 54*end_stop, start_time] = 1
                else:
                    if (end_time - start_time > 10):
                        continue
                    
                    factor = end_time - start_time
                    for e in range(start_time, end_time+1):
                        matrix[start_stop + 54*end_stop, e] = 1/factor


In [157]:
matrix

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [155]:
stop_tree[route_id][each_trip]

[[(1562464659, 72.02095966413057, 455.46247907079993),
  (1562464679, 71.89855877244229, 455.4649213637021),
  (1562464689, 71.85392497017966, 455.46659001985745),
  (1562464699, 71.85392497017966, 455.46659001985745),
  (1562464709, 72.59454789385518, 454.72211050011197),
  (1562464719, 72.63872658541477, 454.72043913913086),
  (1562464739, 73.47372058714843, 453.9737985057126),
  (1562464759, 74.21311541852997, 453.2293128091625),
  (1562464769, 74.18876809690381, 453.22970432675737),
  (1562464789, 74.1418707036624, 453.2307852843656),
  (1562464799, 74.03513704694367, 453.23522553986874),
  (1562464809, 73.97832424196409, 453.23908134557104),
  (1562464829, 75.4985552423645, 451.7474632933783),
  (1562464849, 76.13861377318743, 451.01223568889276),
  (1562464869, 76.40825278859423, 450.9962491725083),
  (1562464899, 77.02598163371006, 451.0089452320678),
  (1562464919, 75.4184819953923, 452.492080819334),
  (1562464929, 75.08202258231037, 452.4843584562933),
  (1562464949, 74.73829

(27, 35)

In [130]:
 stop_tree[route_id][each_trip][start_stop][-1][0]

1562437792

In [101]:
len(stop_tree[426]['12312'])

54

In [89]:
stop_tree[426].keys()

dict_keys(['12313', '12315', '12312', '12310', '12314', '12265', '12266', '12267', '12268', '12269', '12270', '12271', '12272', '12273', '12274', '12275', '12276', '12277', '12278', '12279', '12280', '12281', '12282', '12283', '12284', '12285', '12286', '12287', '12288', '12289', '12290', '12291', '12292', '12293', '12294', '12295', '12296', '12297', '12298', '12299', '12300', '12301', '12302', '12303', '12304', '12305', '12306', '12307', '12308', '12309', '12311', '12316', '12317', '12318', '12319', '12320', '12321', '12322', '12323', '12324', '12325', '12326'])