In [1]:
# Python == 3.8
import json
import numpy as np
import pandas as pd
from copy import deepcopy
from pprint import pprint
import re
import os
from utils import extract_integer
import functools

In [3]:
json_file_path = "data/vehiclePosition01.json"

# Reading JSON file
with open(json_file_path) as json_file:
    json_dict = json.load(json_file)

In [10]:
# Information about the lineIds (buses, train, tram)
# LineId: The unique number of the line route
# directionId: the direction in which the specific vehicle is traveling
# pointId: The last stop travered, stop which is left behind
# distanceFromPoint: Distance from the left behind stop
single_line_info: dict = json_dict["data"][0]["Responses"][0]
single_line_info

{'lines': [{'lineId': '1',
   'vehiclePositions': [{'directionId': '8731',
     'distanceFromPoint': 0,
     'pointId': '8162'},
    {'directionId': '8731', 'distanceFromPoint': 0, 'pointId': '8131'},
    {'directionId': '8162', 'distanceFromPoint': 0, 'pointId': '8092'},
    {'directionId': '8731', 'distanceFromPoint': 0, 'pointId': '8011'},
    {'directionId': '8161', 'distanceFromPoint': 1, 'pointId': '8122'},
    {'directionId': '8161', 'distanceFromPoint': 0, 'pointId': '8742'},
    {'directionId': '8731', 'distanceFromPoint': 0, 'pointId': '8161'},
    {'directionId': '8731', 'distanceFromPoint': 1, 'pointId': '8101'},
    {'directionId': '8731', 'distanceFromPoint': 0, 'pointId': '8051'},
    {'directionId': '8731', 'distanceFromPoint': 0, 'pointId': '8291'},
    {'directionId': '8162', 'distanceFromPoint': 0, 'pointId': '8272'},
    {'directionId': '8161', 'distanceFromPoint': 0, 'pointId': '8052'}]},
  {'lineId': '2',
   'vehiclePositions': [{'directionId': '8763',
     'dista

In [16]:
info_for_line_ids = []

for data in json_dict["data"]:
    timestamp = data["time"]
    # Rest of the stuff
    for upper_line_group in data["Responses"]:
        # print(upper_line_group)
        if upper_line_group is None:
            continue
        for line_group in upper_line_group["lines"]:
#             print(line_group)
            line_id_info = [{"timestamp": timestamp, "lineId": line_group["lineId"], **elem}
                            for elem in line_group["vehiclePositions"]]

            info_for_line_ids.extend(deepcopy(line_id_info))

lines_df = pd.DataFrame(info_for_line_ids)
# lines_df.to_csv(f"vehiclePosition{i}.csv")
lines_df.head()

Unnamed: 0,timestamp,lineId,directionId,distanceFromPoint,pointId
0,1632229443663,1,8731,0,8121
1,1632229443663,1,8731,0,8061
2,1632229443663,1,8161,0,8122
3,1632229443663,1,8161,0,8052
4,1632229443663,1,8162,0,8733


## Works on processed data

In [2]:
proc_lines = pd.read_csv("data_processed/vehiclePosition08.csv")
# proc_lines = proc_lines.drop(columns=["Unnamed: 0"])
idx_lineId_95 = proc_lines["lineId"] == 95
line_95 = deepcopy( proc_lines.loc[idx_lineId_95] )


In [3]:
line_95.loc[:, "et"] = line_95.timestamp.apply(lambda x: pd.Timestamp(x, unit='ms') )

In [4]:
line_95.head()

Unnamed: 0,timestamp,lineId,directionId,distanceFromPoint,pointId,et
637,1631606899996,95,7104,339,1124,2021-09-14 08:08:19.996
638,1631606899996,95,4318,0,7104,2021-09-14 08:08:19.996
639,1631606899996,95,4318,0,1128,2021-09-14 08:08:19.996
640,1631606899996,95,7104,86,4365,2021-09-14 08:08:19.996
641,1631606899996,95,7104,66,4362,2021-09-14 08:08:19.996


In [5]:
# Figure out what the bus is doing. Why are there more then 2 directionIds ?
line_95.groupby(by="directionId").count()

Unnamed: 0_level_0,timestamp,lineId,distanceFromPoint,pointId,et
directionId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1233,54,54,54,54,54
2278,92,92,92,92,92
2392,177,177,177,177,177
3558,2,2,2,2,2
4318,1986,1986,1986,1986,1986
6432,118,118,118,118,118
7104,1734,1734,1734,1734,1734


In [6]:
# line_95.directionId.unique()
line_95.pointId.unique()

array([1124, 7104, 1128, 4365, 4362, 3558, 4351, 1233, 1165, 4358, 4363,
       1703, 4366, 1906, 1909, 1453, 6354, 1981, 4359, 4308, 4313, 4307,
       1781, 4364, 1983, 4355, 6432, 4348, 3630, 4360, 1455, 4303, 4314,
       4309, 4357, 4367, 4304, 3514, 4315, 4310, 4305, 1776, 6433, 7066,
       4306, 1729, 4311, 2268, 4318])

In [7]:
idxs = line_95.loc[:, "directionId"] == 4318
line_95_direc_4318 = line_95.loc[idxs]
# line_95_direc_4318.to_csv("line_95_direction-4318.csv")

In [8]:
# line_95_direc_4318 = line_95_direc_4318.reset_index(drop=True)
line_95_direc_4318.reset_index(drop=True, inplace=True)
line_95_direc_4318

Unnamed: 0,timestamp,lineId,directionId,distanceFromPoint,pointId,et
0,1631606899996,95,4318,0,7104,2021-09-14 08:08:19.996
1,1631606899996,95,4318,0,1128,2021-09-14 08:08:19.996
2,1631606899996,95,4318,112,1233,2021-09-14 08:08:19.996
3,1631606899996,95,4318,34,1233,2021-09-14 08:08:19.996
4,1631606899996,95,4318,0,7104,2021-09-14 08:08:19.996
...,...,...,...,...,...,...
1981,1631615154340,95,4318,217,4313,2021-09-14 10:25:54.340
1982,1631615154340,95,4318,102,7066,2021-09-14 10:25:54.340
1983,1631615154340,95,4318,0,7104,2021-09-14 10:25:54.340
1984,1631615154340,95,4318,274,4305,2021-09-14 10:25:54.340


In [9]:
# Create a csv file with -- marker to denote rows with same timestamp
def mark_rows_for_manual_analysis(line_95_direc_4318):
    line_95_direc_4318.reset_index(drop=True, inplace=True)
    matching_list = [""]
    for i, row in line_95_direc_4318.iterrows():
        if i == 0: 
            continue
        if row["et"] == line_95_direc_4318.loc[i-1, "et"]:
            print(row["et"])
            matching_list.append("--")
        else:
            matching_list.append("")

    line_95_direc_4318["matching_list"] = matching_list
    line_95_direc_4318.to_csv("line_95_direction-4318.csv", index=False)

## Working with shape file

In [10]:
import shapefile
import matplotlib.pyplot as plt
import os

In [11]:
stops_sf = shapefile.Reader("data/2109_STIB_MIVB_Network_shapefiles/ACTU_STOPS")
stop_obj = stops_sf.records()[0]
stop_obj.__dict__

{'_Record__field_positions': {'Code_Ligne': 0,
  'Variante': 1,
  'succession': 2,
  'stop_id': 3,
  'descr_fr': 4,
  'descr_nl': 5,
  'alpha_fr': 6,
  'alpha_nl': 7,
  'coord_x': 8,
  'coord_y': 9,
  'mode': 10,
  'numero_lig': 11,
  'terminus': 12},
 '_Record__oid': 0}

#### Separate points based on their Varient (direction)

In [12]:
# Separate stops so that when you group by lineID (succession) only stops of single direction are returned
stops_v1 = [stop for stop in stops_sf.records() if stop.Variante == 1]
stops_v2 = [stop for stop in stops_sf.records() if stop.Variante == 2]

#### Order stops by succession

In [13]:
# Separate out stops related to a single line e.g. All stops of 95 in front of key '095b' 
ordered_dict_v1 = {}  # "095b": [stop_obj, stop_obj, stop_obj]
for stop in stops_v1:
    if stop.Code_Ligne not in ordered_dict_v1:
        ordered_dict_v1[stop.Code_Ligne] = [s for s in stops_v1 if s.numero_lig == stop.numero_lig ]

ordered_dict_v2 = {}  # "095b": [stop_obj, stop_obj, stop_obj]
for stop in stops_v2:
    if stop.Code_Ligne not in ordered_dict_v2:
        ordered_dict_v2[stop.Code_Ligne] = [s for s in stops_v2 if s.numero_lig == stop.numero_lig ]
    
ordered_dict_v1['095b'][0:5]

[Record #2332: ['095b', 1, 1, '2278', 'GRAND-PLACE', 'GROTE MARKT', 'Grand-Place', 'Grote Markt', 148819.0, 170485.0, 'B', 95, 'WIENER'],
 Record #2333: ['095b', 1, 2, '2268', 'BIBLIOTHEQUE', 'BIBLIOTHEEK', 'Bibliothèque', 'Bibliotheek', 148993.0, 170336.0, 'B', 95, 'WIENER'],
 Record #2334: ['095b', 1, 3, '1703', 'GRAND SABLON', 'GROTE ZAVEL', 'Grand Sablon', 'Grote Zavel', 148959.0, 170147.0, 'B', 95, 'WIENER'],
 Record #2335: ['095b', 1, 4, '1128', 'PETIT SABLON', 'KLEINE ZAVEL', 'Petit Sablon', 'Kleine Zavel', 149093.0, 169943.0, 'B', 95, 'WIENER'],
 Record #2336: ['095b', 1, 5, '6354B', 'ROYALE', 'KONING', 'Royale', 'Koning', 149281.3, 170126.2, 'B', 95, 'WIENER']]

In [14]:
# Sort the stops according to succession
for Code_Ligne in ordered_dict_v1.keys():
    ordered_dict_v1[Code_Ligne].sort(key = lambda x: x.succession)
    
for Code_Ligne in ordered_dict_v2.keys():
    ordered_dict_v2[Code_Ligne].sort(key = lambda x: x.succession)

# View sorted stop
for key in ordered_dict_v2:
    print(f'{key} -> { " -> ".join( [s.alpha_fr for s in ordered_dict_v1[key]] ) } ')
    break

012b -> Brussels Airport -> Bourget -> Da Vinci -> Genève -> Meiser -> Schuman -> Luxembourg -> Trône -> Trône 


In [15]:
ordered_dict_v2['095b']

[Record #2357: ['095b', 2, 1, '4351', 'WIENER', 'WIENER', 'Wiener', 'Wiener', 153554.0, 165360.0, 'B', 95, 'GRAND-PLACE'],
 Record #2358: ['095b', 2, 2, '4348', 'MIRAVAL', 'MIRAVAL', 'Miraval', 'Miraval', 153191.0, 165531.0, 'B', 95, 'GRAND-PLACE'],
 Record #2359: ['095b', 2, 3, '4355', 'LES 3 TILLEULS', 'DRIE LINDEN', 'Les Trois Tilleuls', 'Drie Linden', 153110.0, 165811.0, 'B', 95, 'GRAND-PLACE'],
 Record #2360: ['095b', 2, 4, '1455', 'CALYPSO 2000', 'CALYPSO 2000', 'Calypso 2000', 'Calypso 2000', 152837.0, 165853.0, 'B', 95, 'GRAND-PLACE'],
 Record #2361: ['095b', 2, 5, '4357', 'VANDER ELST', 'VANDER ELST', 'Vander Elst', 'Vander Elst', 152680.0, 166105.0, 'B', 95, 'GRAND-PLACE'],
 Record #2362: ['095b', 2, 6, '4358', 'KEYM', 'KEYM', 'Keym', 'Keym', 152426.0, 166621.0, 'B', 95, 'GRAND-PLACE'],
 Record #2363: ['095b', 2, 7, '4359', 'ARCADES', 'ARCADEN', 'Arcades', 'Arcaden', 151974.0, 166672.0, 'B', 95, 'GRAND-PLACE'],
 Record #2364: ['095b', 2, 8, '4360', 'RELAIS', 'PLEISTERPLAATS',

In [16]:
ordered_dict_v1['095b']

[Record #2332: ['095b', 1, 1, '2278', 'GRAND-PLACE', 'GROTE MARKT', 'Grand-Place', 'Grote Markt', 148819.0, 170485.0, 'B', 95, 'WIENER'],
 Record #2333: ['095b', 1, 2, '2268', 'BIBLIOTHEQUE', 'BIBLIOTHEEK', 'Bibliothèque', 'Bibliotheek', 148993.0, 170336.0, 'B', 95, 'WIENER'],
 Record #2334: ['095b', 1, 3, '1703', 'GRAND SABLON', 'GROTE ZAVEL', 'Grand Sablon', 'Grote Zavel', 148959.0, 170147.0, 'B', 95, 'WIENER'],
 Record #2335: ['095b', 1, 4, '1128', 'PETIT SABLON', 'KLEINE ZAVEL', 'Petit Sablon', 'Kleine Zavel', 149093.0, 169943.0, 'B', 95, 'WIENER'],
 Record #2336: ['095b', 1, 5, '6354B', 'ROYALE', 'KONING', 'Royale', 'Koning', 149281.3, 170126.2, 'B', 95, 'WIENER'],
 Record #2337: ['095b', 1, 6, '6433', 'TRONE', 'TROON', 'Trône', 'Troon', 149885.0, 169988.0, 'B', 95, 'WIENER'],
 Record #2338: ['095b', 1, 7, '1729', 'SCIENCE', 'WETENSCHAP', 'Science', 'Wetenschap', 150043.6, 169936.7, 'B', 95, 'WIENER'],
 Record #2339: ['095b', 1, 8, '1233', 'LUXEMBOURG', 'LUXEMBURG', 'Luxembourg', 

In [17]:
# Entire order of stops in a user friendly way

line_id = "095b"
# line_to_print = ordered_dict_v2["095b"]
line_to_print = ordered_dict_v1["095b"]

print( f'{line_id} -> { " -> ".join( [s.alpha_fr for s in line_to_print ]) }' )

095b -> Grand-Place -> Bibliothèque -> Grand Sablon -> Petit Sablon -> Royale -> Trône -> Science -> Luxembourg -> Idalie -> Blyckaerts -> Germoir -> Rodin -> Delporte -> Etterbeek Gare -> Thys -> Cimetière d'Ixelles -> Relais -> Arcades -> Keym -> Vander Elst -> Calypso 2000 -> Les Trois Tilleuls -> Cerisaie -> Fauconnerie -> Wiener


#### Remove rows with technical stops (stops not in gtfs files)

In [18]:
line_95_direc_4318.loc[:, "pointId"].unique()

array([7104, 1128, 1233, 1703, 3558, 1906, 1909, 1453, 6354, 4308, 4313,
       4307, 4303, 4314, 4309, 4304, 4315, 4310, 4305, 6433, 7066, 4306,
       1729, 4311, 2268, 4318])

# ToDo

In [19]:
# Verify that there is no stop in line_<id>_direc_<dir-ID> that is not in its route from Shape-files

In [20]:
idx = line_95_direc_4318.loc[:, "pointId"] == 7104
a = deepcopy( idx[0: 5] )
b = deepcopy( idx[6: 11] )
# idx = (line_95_direc_4318.loc[:, "pointId"] == 7104) & (line_95_direc_4318.loc[:, "pointId"] == 7066)

In [21]:
unique_stops = line_95_direc_4318.loc[:, "pointId"].unique()
all_stops = [stop.stop_id for stop in stops_sf.records()]
stops_to_remove = []
for stop in unique_stops:
    # Remember to convert stops to str
    if str(stop) not in all_stops:
        stops_to_remove.append(stop)
stops_to_remove

[7104, 7066]

In [22]:
# Find idxs where technical stops are
idxs = [False] * len(line_95_direc_4318)
for stop in stops_to_remove:
    true_idxs = line_95_direc_4318.loc[:, "pointId"] == stop
    idxs = idxs | true_idxs
idxs = ~idxs

print(f"Length: {len(line_95_direc_4318)}")
line_95_direc_4318 = line_95_direc_4318[idxs]
print(f"Length: {len(line_95_direc_4318)}")
line_95_direc_4318.head()

Length: 1986
Length: 1365


Unnamed: 0,timestamp,lineId,directionId,distanceFromPoint,pointId,et
1,1631606899996,95,4318,0,1128,2021-09-14 08:08:19.996
2,1631606899996,95,4318,112,1233,2021-09-14 08:08:19.996
3,1631606899996,95,4318,34,1233,2021-09-14 08:08:19.996
6,1631606963238,95,4318,217,1128,2021-09-14 08:09:23.238
7,1631606963238,95,4318,6,1703,2021-09-14 08:09:23.238


## Identidying busses

In [23]:
# change 
line_95_direc_4318.loc[:, "bus_id"] = "-"
line_95_direc_4318 = line_95_direc_4318[["timestamp", "lineId", "directionId", "pointId", 
                                         "distanceFromPoint", "et", "bus_id"]]
unique_times = line_95_direc_4318.timestamp.unique()
idx = line_95_direc_4318.loc[:, "timestamp"] == unique_times[0]
line_95_direc_4318[idx]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


Unnamed: 0,timestamp,lineId,directionId,pointId,distanceFromPoint,et,bus_id
1,1631606899996,95,4318,1128,0,2021-09-14 08:08:19.996,-
2,1631606899996,95,4318,1233,112,2021-09-14 08:08:19.996,-
3,1631606899996,95,4318,1233,34,2021-09-14 08:08:19.996,-


In [24]:
# def distance_btw_busses(bus_df, bus_df):
#     """
#     bus_df is a row in flattened csv
#     """
ordered_list_of_stops_95 = [ extract_integer(stop.stop_id) for stop in ordered_dict_v1["095b"] ]
# [ ('2278', 0), (2268, '1') ]
lst = list( zip(ordered_list_of_stops_95, range(1, len(ordered_list_of_stops_95) + 1) ) )
stop_id_to_int = { key:val for key, val in lst }
stop_id_to_int

{2278: 1,
 2268: 2,
 1703: 3,
 1128: 4,
 6354: 5,
 6433: 6,
 1729: 7,
 1233: 8,
 1906: 9,
 1909: 10,
 4303: 11,
 4304: 12,
 4305: 13,
 4306: 14,
 4307: 15,
 3558: 16,
 4308: 17,
 4309: 18,
 4310: 19,
 4311: 20,
 1453: 21,
 4313: 22,
 4314: 23,
 4315: 24,
 4318: 25}

#### Mapping pointIds to int-stop-ID, ( int form ) so that we can perform arithmatic with them

In [25]:
pointId_to_int = [ stop_id_to_int[stop_id] for stop_id in line_95_direc_4318.pointId ]
line_95_direc_4318["istop_id"] = pointId_to_int
line_95_direc_4318 = line_95_direc_4318.filter(["timestamp", "lineId", "directionId", "pointId", "istop_id", 
                                         "distanceFromPoint", "et", "bus_id"])
line_95_direc_4318
# mark_rows_for_manual_analysis(line_95_direc_4318)

Unnamed: 0,timestamp,lineId,directionId,pointId,istop_id,distanceFromPoint,et,bus_id
1,1631606899996,95,4318,1128,4,0,2021-09-14 08:08:19.996,-
2,1631606899996,95,4318,1233,8,112,2021-09-14 08:08:19.996,-
3,1631606899996,95,4318,1233,8,34,2021-09-14 08:08:19.996,-
6,1631606963238,95,4318,1128,4,217,2021-09-14 08:09:23.238,-
7,1631606963238,95,4318,1703,3,6,2021-09-14 08:09:23.238,-
...,...,...,...,...,...,...,...,...
1977,1631615154340,95,4318,4314,23,145,2021-09-14 10:25:54.340,-
1979,1631615154340,95,4318,6354,5,80,2021-09-14 10:25:54.340,-
1981,1631615154340,95,4318,4313,22,217,2021-09-14 10:25:54.340,-
1984,1631615154340,95,4318,4305,13,274,2021-09-14 10:25:54.340,-


#### Trying to sperate out vehicles with similar timestamp make list of lists

In [26]:
# [ [ busses with same timestamp ], [busses with same timestamp], ... ]
lst_of_same_time95_df = [deepcopy(line_95_direc_4318[line_95_direc_4318.loc[:, "timestamp"] == ut]) 
                         for ut in unique_times]

#### Sort this structure

In [27]:
# random = [(2, 2), (4, 3), (3, 4), (2, 5), (4, 1), (1, 3), (3, 2)]
# sorted(random, key = lambda x: x[0])
# sorted(random, key = lambda x: (x[1], x[0]) )
#out [(4, 1), (2, 2), (3, 2), (1, 3), (4, 3), (3, 4), (2, 5)]
# sorted(random, key = lambda x: x[1] )
#out [(4, 1), (2, 2), (3, 2), (4, 3), (1, 3), (3, 4), (2, 5)]

In [28]:
# def sort_stops(df: pd.DataFrame, stop_id_to_int: dict)

## No, weakness of this, given the data, is that when new bus comes wrong points will connect.
# Make sure order in this innner list in descending succession wise and then match to closet. 
# So we match the furthest bus first to nearest one in the prev timestamp

for i in range(len(lst_of_same_time95_df)):
    lst_of_same_time95_df[i] = lst_of_same_time95_df[i].sort_values(by=["istop_id", "distanceFromPoint"], )
    lst_of_same_time95_df[i].reset_index(drop=True, inplace=True)

In [29]:
# # For testing: Flatten lst_of_same_time95_df
# import functools
# df = functools.reduce(lambda a, b: a.append(b), lst_of_same_time95_df)
# mark_rows_for_manual_analysis(df)

In [67]:
import plotly.express as px
import plotly.graph_objects as go
df = line_95_direc_4318
# x
df.et
# y 
df["y"] = (df.istop_id * 800 ) + (df.distanceFromPoint)

fig = px.scatter(x=df.et, y=df["y"])
fig.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(step="all")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    ))
fig.show()
fig.write_html("buses.html")

In [None]:
# Testing

In [65]:
df.head()

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
60,Australia,Oceania,1952,69.12,8691212,10039.59564,AUS,36
61,Australia,Oceania,1957,70.33,9712569,10949.64959,AUS,36
62,Australia,Oceania,1962,70.93,10794968,12217.22686,AUS,36
63,Australia,Oceania,1967,71.1,11872264,14526.12465,AUS,36
64,Australia,Oceania,1972,71.93,13177000,16788.62948,AUS,36


In [71]:
dfn = px.data.gapminder()
dfn = px.data.gapminder().query("continent == 'Oceania'")
# fig = px.line(df, x='year', y='lifeExp', color='country', markers=True)
fig = px.scatter(x=df.et, y=df["y"])
fig = px.line(df, x='et', y="y", color='pointId')
fig.show()

In [None]:
### ---------------- Test end -----------------

In [46]:
lst_of_same_time95_df[0]

Unnamed: 0,timestamp,lineId,directionId,pointId,istop_id,distanceFromPoint,et,bus_id
0,1631606899996,95,4318,1128,4,0,2021-09-14 08:08:19.996,-
1,1631606899996,95,4318,1233,8,34,2021-09-14 08:08:19.996,-
2,1631606899996,95,4318,1233,8,112,2021-09-14 08:08:19.996,-


In [30]:
# Stop_id, distanceFromStop
# bus_stop_arithmatic
def busses_proximity(bus_t1: tuple, bus_t2: tuple ):
    
    for_proximity = 2000
    
    # Bus t_n+1 is 2 stops ahead
    if ( bus_t2[0] - bus_t1[0] ) > 1:
        return -1
   
    # If one stop ahead, add stop advantage to distanceFromPoint 
    elif ( bus_t2[0] - bus_t1[0] ) == 1:
        bus_t2 = (bus_t2[0], 1000 + bus_t2[1])
    
    # Bus t_n+1 is behind
    if ( bus_t1[0] - bus_t2[0] ) > 0:
        return -1
    elif ( bus_t1[1] - bus_t2[1] ) > 0:
        return -1 
    
    # 
    return for_proximity - (bus_t2[1] - bus_t1[1])

In [31]:
# The more less the number is from for_proximity (val: 2000) the further the bus is
prox = busses_proximity( bus_t1=(5, 0), bus_t2=(5, 0) )
print(f"proximity on same stop: {prox}")

prox = busses_proximity( bus_t1=(5, 40), bus_t2=(6, 40) )
print(f"bus t_n+1 1 stop ahead: {prox}")

prox = busses_proximity( bus_t1=(5, 40), bus_t2=(6, 20) )
print(f"bus t_n+1 1 stop ahead: {prox}")

prox = busses_proximity( bus_t1=(5, 30), bus_t2=(5, 80) )
print(f"bus t_n+1 is 50 meters ahead: {prox}")

prox = busses_proximity( bus_t1=(5, 40), bus_t2=(5, 80) )
print(f"bus t_n+1 is 40 meters ahead: {prox}")

prox = busses_proximity( bus_t1=(5, 40), bus_t2=(7, 80) )
print(f"bus t_n+1 is 2 stops ahead: {prox}")

prox = busses_proximity( bus_t1=(5, 40), bus_t2=(5, 39) )
print(f"bus t_n+1 is behind: {prox}")

proximity on same stop: 2000
bus t_n+1 1 stop ahead: 1000
bus t_n+1 1 stop ahead: 1020
bus t_n+1 is 50 meters ahead: 1950
bus t_n+1 is 40 meters ahead: 1960
bus t_n+1 is 2 stops ahead: -1
bus t_n+1 is behind: -1


In [32]:
def compare_bus_tuples( series1, series2,  ):
    """
    tupe1 convention: (distanceFromPoint, pointId)
    instead of doing 
    """
    bus_t1 = (series1.loc["istop_id"], series1.loc["distanceFromPoint"])
    bus_t2 = (series2.loc["istop_id"], series2.loc["distanceFromPoint"])
    
    return busses_proximity(bus_t1=bus_t1, bus_t2=bus_t2)
    
compare_bus_tuples( lst_of_same_time95_df[0].iloc[0,:], lst_of_same_time95_df[0].iloc[0,:])
compare_bus_tuples( lst_of_same_time95_df[0].iloc[1,:], lst_of_same_time95_df[0].iloc[2,:])

1922

In [33]:
lst_of_same_time95_df[0]

Unnamed: 0,timestamp,lineId,directionId,pointId,istop_id,distanceFromPoint,et,bus_id
0,1631606899996,95,4318,1128,4,0,2021-09-14 08:08:19.996,-
1,1631606899996,95,4318,1233,8,34,2021-09-14 08:08:19.996,-
2,1631606899996,95,4318,1233,8,112,2021-09-14 08:08:19.996,-


In [34]:
lst_of_same_time95_df[1]

Unnamed: 0,timestamp,lineId,directionId,pointId,istop_id,distanceFromPoint,et,bus_id
0,1631606963238,95,4318,1703,3,6,2021-09-14 08:09:23.238,-
1,1631606963238,95,4318,1128,4,217,2021-09-14 08:09:23.238,-
2,1631606963238,95,4318,1906,9,40,2021-09-14 08:09:23.238,-
3,1631606963238,95,4318,1909,10,2,2021-09-14 08:09:23.238,-
4,1631606963238,95,4318,3558,16,273,2021-09-14 08:09:23.238,-
5,1631606963238,95,4318,1453,21,0,2021-09-14 08:09:23.238,-


In [101]:
# Test  -----------------
# Step 1
import pickle

lst_of_same_time95_df = deepcopy(temp_lst_of_df)
# Step 2
with open('lst_of_same_time95_df.pkl', 'wb') as config_dictionary_file:
 
  # Step 3
  pickle.dump(lst_of_same_time95_df, config_dictionary_file)



In [16]:
# Find 2nd max()
def get_2nd_max():
    a = [3, 2, 8, 3, 9, 1, 7]
    m1 = a[0]
    m2 = a[0]
    
    for elem in a:
        if elem > m1:
            m2 = deepcopy( m1 )
            m1 = deepcopy( elem )

        if m1 > elem and elem > m2:
            m2 = deepcopy( elem )
            
    print(m1, m2)
get_2nd_max()

9 8


In [None]:
# --------- test end ----------

In [48]:
# useage: first time uncomment the line below run and then comment it back. Then run to restore df/ list
# temp_lst_of_df = deepcopy( lst_of_same_time95_df )
# restore 
lst_of_same_time95_df = deepcopy(temp_lst_of_df)

In [36]:
# Constraint check on matrix. argmax of cols cannot be same
a = np.asarray([[1960, -1, -1, -1, -1, -1],
                [-1, 1217, -1, -1, -1, -1],
                [-1, -1, 2000, 1040, -1, -1],
                [-1, -1, -1, -1, -1, -1],
                [-1, -1, -1, -1, 1834, -1],
                [-1, -1, -1, -1, -1, 1952], ])


# for i in range(a.shape[1] - 1):
#     amax_1 = np.argmax( a[:, i] )
#     amax_2 = np.argmax( a[:, i+1] )
#     # if both equal set lower
#     if amax_1 == amax_2:
#         if a[amax_1, i] > a[amax_2, i+1]:

# No longer required maybe I solved this problem some otherway

In [49]:
# Now strategy is from t_n -> t_n+1 points being connected. ordered in ascending now.
bus_id = 1
active_busses = []
stoped_busses = []

for i, same_times_df in enumerate(lst_of_same_time95_df):

    # Handling end of list edge case
    if i == len(lst_of_same_time95_df) - 1:
        break

    analysis_matrix = []

    # Busses with t_n timestamp
    for j, bus_tn in same_times_df.iterrows():
        bus_tn_score = []

        # Start Case
        if i == 0:
            same_times_df.loc[j, "bus_id"] = bus_id
            bus_id += 1
        elif bus_tn.bus_id == "-":
            same_times_df.loc[j, "bus_id"] = bus_id
            bus_id += 1

        # Busses with t_n+1 timestamp
        for k, bus_tn1 in lst_of_same_time95_df[i + 1].iterrows():
            bus_tn_score.append(compare_bus_tuples(bus_tn, bus_tn1))
        print(f" {k} bus_tn_score: {bus_tn_score}")

        analysis_matrix.append(bus_tn_score)

    # Assigning bus_ids to the busses in t_n+1
    # each row for bus_tn
    # each col for bus_tn+1
    analysis_matrix = np.asarray(analysis_matrix)

    # TODO
    # Perform constraint check on the matrix

    # way 1: to assign bus_ids to bus_tn+1
    # Busses with t_n+1 timestamp
    #     for k, bus_tn1 in lst_of_same_time95_df[i+1].iterrows():

    #         candidate_bus_idx = np.argmax( analysis_matrix[:, k] )  # returns max-arg from cols
    #         # Check if value on argmax is not -1. That means bus t_n+1 has no preceeding point.
    #         if analysis_matrix[candidate_bus_idx, k] != -1:
    #             lst_of_same_time95_df[i+1].loc[k, "bus_id"] = lst_of_same_time95_df[i].loc[candidate_bus_idx, "bus_id"]

    # way 2: to assign bus_ids to bus_tn+1
    # Busses with t_n timestamp
    for k, bus_tn in same_times_df.iterrows():
        # Case with no previous bus
        if k == 1:
            candidate_bus_idx = np.argmax(analysis_matrix[k, :])  # returns max-arg from rows
            # Check if value on argmax is not -1. That means bus t_n has no proceeding point.
            if analysis_matrix[k, candidate_bus_idx] != -1:
                # if argmax of row is same as previous row, check if there is 2nd argmax, if yes take that else override
                # sol: so check if  np.argmax(analysis_matrix[2, :]) == np.argmax(analysis_matrix[1, :]) and analysis_matrix[1, np.argmax(analysis_matrix[1, :])] != -1
                # THen temp var a = np.argmax(analysis_matrix[2, :]), then if 2nd argmax of row 2 is non zero use that else use a

                lst_of_same_time95_df[i + 1].loc[candidate_bus_idx, "bus_id"] = bus_tn.loc["bus_id"]
        else:
            prev_bus_max_idx = np.argmax(analysis_matrix[k - 1, :])  # returns max-arg from rows
            candidate_bus_idx = np.argmax(analysis_matrix[k, :])  # returns max-arg from rows
            # if true means candidate_bus_idx & prev_bus_max_idx can be same, Which needs handling
            if analysis_matrix[k, candidate_bus_idx] != -1 and analysis_matrix[k, prev_bus_max_idx] != -1:
                # 2 buses at t_n pointing to same the same bus at t_n+1
                if candidate_bus_idx == prev_bus_max_idx:
                    # Check if for bus_tn at k can we find another bus at t_n+1
                    temp_row_k_bus_tn = deepcopy(analysis_matrix[k, :])
                    temp_row_k_bus_tn[candidate_bus_idx] = -1
                    new_candidate_bus_idx = np.argmax(temp_row_k_bus_tn)
                    # Check if this new argmax is of not -1
                    if temp_row_k_bus_tn[new_candidate_bus_idx] != -1:
                        candidate_bus_idx = new_candidate_bus_idx
                    # Else we use candidate_bus_idx & that means that bus k of timestamp tn gets connected to the
                    # bus candidate_bus_idx of timestamp t_n+1
                    
                    lst_of_same_time95_df[i + 1].loc[candidate_bus_idx, "bus_id"] = bus_tn.loc["bus_id"]

            elif analysis_matrix[k, candidate_bus_idx] != -1:
                # bus candidate_bus_idx at t_n+1 connected to bus k at t_n
                lst_of_same_time95_df[i + 1].loc[candidate_bus_idx, "bus_id"] = bus_tn.loc["bus_id"]

    print(f"et 0:")
    print(lst_of_same_time95_df[i][["istop_id", "bus_id"]])
    print(f"et 1:")
    print(lst_of_same_time95_df[i + 1][["istop_id", "bus_id"]])
#     if i == 2:
#         break
df = functools.reduce(lambda a, b: a.append(b), lst_of_same_time95_df)
df.to_csv("bus_95_trajectory.csv")

 5 bus_tn_score: [-1, 1783, -1, -1, -1, -1]
 5 bus_tn_score: [-1, -1, 994, -1, -1, -1]
 5 bus_tn_score: [-1, -1, 1072, -1, -1, -1]
et 0:
   istop_id bus_id
0         4      1
1         8      2
2         8      3
et 1:
   istop_id bus_id
0         3      -
1         4      1
2         9      3
3        10      -
4        16      -
5        21      -
 5 bus_tn_score: [1960, -1, -1, -1, -1, -1]
 5 bus_tn_score: [-1, 1217, -1, -1, -1, -1]
 5 bus_tn_score: [-1, -1, 2000, 1040, -1, -1]
 5 bus_tn_score: [-1, -1, -1, -1, -1, -1]
 5 bus_tn_score: [-1, -1, -1, -1, 1834, -1]
 5 bus_tn_score: [-1, -1, -1, -1, -1, 1952]
et 0:
   istop_id bus_id
0         3      4
1         4      1
2         9      3
3        10      5
4        16      6
5        21      7
et 1:
   istop_id bus_id
0         3      4
1         5      1
2         9      3
3        10      -
4        16      6
5        21      7
 5 bus_tn_score: [1046, -1, -1, -1, -1, -1]
 5 bus_tn_score: [-1, 2000, -1, -1, -1, -1]
 5 bus_tn_score: [

et 0:
   istop_id bus_id
0         5     14
1        10     15
2        10     11
3        17     19
4        18     16
5        24     20
et 1:
   istop_id bus_id
0         5     14
1        10     15
2        11     11
3        15      -
4        17     19
5        18     16
6        24     20
 5 bus_tn_score: [1985, -1, -1, -1, -1, -1]
 5 bus_tn_score: [-1, 1034, -1, -1, -1, -1]
 5 bus_tn_score: [-1, 1996, 1000, -1, -1, -1]
 5 bus_tn_score: [-1, -1, -1, -1, -1, -1]
 5 bus_tn_score: [-1, -1, -1, 996, 733, -1]
 5 bus_tn_score: [-1, -1, -1, -1, 1767, -1]
 5 bus_tn_score: [-1, -1, -1, -1, -1, 2000]
et 0:
   istop_id bus_id
0         5     14
1        10     15
2        11     11
3        15     21
4        17     19
5        18     16
6        24     20
et 1:
   istop_id bus_id
0         5     14
1        11     15
2        12     11
3        18     19
4        18     16
5        24     20
 5 bus_tn_score: [2000, -1, -1, -1, -1, -1]
 5 bus_tn_score: [-1, 1949, 1004, -1, -1, -1]
 5 bus_t

et 0:
   istop_id bus_id
0         5     29
1         8     31
2        14     14
3        15     33
4        18     30
5        18     28
et 1:
   istop_id bus_id
0         5     29
1         8     31
2        14     14
3        18     30
4        18     28
 5 bus_tn_score: [-1, 2000, -1, -1, -1, -1]
 5 bus_tn_score: [-1, -1, 1843, -1, -1, -1]
 5 bus_tn_score: [-1, -1, -1, 1036, -1, -1]
 5 bus_tn_score: [-1, -1, -1, -1, 2000, 1869]
 5 bus_tn_score: [-1, -1, -1, -1, -1, 2000]
et 0:
   istop_id bus_id
0         5     29
1         8     31
2        14     14
3        18     30
4        18     28
et 1:
   istop_id bus_id
0         2      -
1         5     29
2         8     31
3        15     14
4        18     30
5        18     28
 5 bus_tn_score: [2000, -1, -1, -1, -1, -1]
 5 bus_tn_score: [-1, 2000, -1, -1, -1, -1]
 5 bus_tn_score: [-1, -1, 1484, -1, -1, -1]
 5 bus_tn_score: [-1, -1, -1, 1968, -1, -1]
 5 bus_tn_score: [-1, -1, -1, -1, 2000, 1869]
 5 bus_tn_score: [-1, -1, -1, -1, -1, 

4        22     14
 4 bus_tn_score: [1948, -1, -1, -1, -1]
 4 bus_tn_score: [-1, 1997, -1, -1, -1]
 4 bus_tn_score: [-1, -1, 2000, -1, -1]
 4 bus_tn_score: [-1, -1, -1, 1152, -1]
 4 bus_tn_score: [-1, -1, -1, -1, 2000]
et 0:
   istop_id bus_id
0         5     38
1         9     34
2        14     29
3        17     39
4        22     14
et 1:
   istop_id bus_id
0         5     38
1         9     34
2        14     29
3        18     39
4        22     14
 4 bus_tn_score: [2000, -1, -1, -1, -1]
 4 bus_tn_score: [-1, 1071, -1, -1, -1]
 4 bus_tn_score: [-1, -1, 2000, -1, -1]
 4 bus_tn_score: [-1, -1, -1, 1856, -1]
 4 bus_tn_score: [-1, -1, -1, -1, 2000]
et 0:
   istop_id bus_id
0         5     38
1         9     34
2        14     29
3        18     39
4        22     14
et 1:
   istop_id bus_id
0         5     38
1        10     34
2        14     29
3        18     39
4        22     14
 5 bus_tn_score: [2000, -1, -1, -1, -1, -1]
 5 bus_tn_score: [-1, 2000, -1, -1, -1, -1]
 5 bus_tn_sco

 4 bus_tn_score: [-1, -1, -1, 1875, -1]
 4 bus_tn_score: [-1, -1, -1, -1, 2000]
et 0:
   istop_id bus_id
0         3     51
1         5     49
2        15     50
3        19     44
et 1:
   istop_id bus_id
0         3     51
1         5     49
2         9      -
3        15     50
4        19     44
 3 bus_tn_score: [-1, -1, -1, -1]
 3 bus_tn_score: [1872, -1, -1, -1]
 3 bus_tn_score: [-1, 2000, -1, -1]
 3 bus_tn_score: [-1, -1, -1, 1927]
 3 bus_tn_score: [-1, -1, -1, -1]
et 0:
   istop_id bus_id
0         3     51
1         5     49
2         9     53
3        15     50
4        19     44
et 1:
   istop_id bus_id
0         5     49
1         9     53
2        12      -
3        15     50
 4 bus_tn_score: [1734, -1, -1, -1, -1]
 4 bus_tn_score: [-1, 2000, -1, -1, -1]
 4 bus_tn_score: [-1, -1, 2000, -1, -1]
 4 bus_tn_score: [-1, -1, -1, 1221, -1]
et 0:
   istop_id bus_id
0         5     49
1         9     53
2        12     54
3        15     50
et 1:
   istop_id bus_id
0         5     

 6 bus_tn_score: [-1, -1, 2000, -1, -1, -1, -1]
 6 bus_tn_score: [-1, -1, -1, 2000, 964, -1, -1]
 6 bus_tn_score: [-1, -1, -1, -1, 2000, -1, -1]
 6 bus_tn_score: [-1, -1, -1, -1, -1, 1819, -1]
 6 bus_tn_score: [-1, -1, -1, -1, -1, -1, 1264]
et 0:
   istop_id bus_id
0         5     60
1        10     64
2        12     63
3        15     65
4        16     53
5        18     58
6        20     61
et 1:
   istop_id bus_id
0         5     60
1        10     64
2        12     63
3        15     65
4        16     53
5        18     58
6        21     61
 5 bus_tn_score: [1697, -1, -1, -1, -1, -1]
 5 bus_tn_score: [-1, 1352, -1, -1, -1, -1]
 5 bus_tn_score: [-1, -1, 1798, -1, -1, -1]
 5 bus_tn_score: [-1, -1, -1, 964, -1, -1]
 5 bus_tn_score: [-1, -1, -1, 2000, -1, -1]
 5 bus_tn_score: [-1, -1, -1, -1, 1263, -1]
 5 bus_tn_score: [-1, -1, -1, -1, -1, 1038]
et 0:
   istop_id bus_id
0         5     60
1        10     64
2        12     63
3        15     65
4        16     53
5        18     

 4 bus_tn_score: [-1, -1, 2000, -1, -1]
 4 bus_tn_score: [-1, -1, -1, 869, -1]
 4 bus_tn_score: [-1, -1, -1, 2000, -1]
 4 bus_tn_score: [-1, -1, -1, -1, 1827]
 4 bus_tn_score: [-1, -1, -1, -1, -1]
et 0:
   istop_id bus_id
0         5     69
1         8     71
2        13     60
3        15     73
4        16     68
5        18     70
6        22     53
et 1:
   istop_id bus_id
0         5     69
1         8     71
2        13     60
3        16     68
4        18     70
 5 bus_tn_score: [-1, 2000, -1, -1, -1, -1]
 5 bus_tn_score: [-1, -1, 1731, -1, -1, -1]
 5 bus_tn_score: [-1, -1, -1, 2000, -1, -1]
 5 bus_tn_score: [-1, -1, -1, -1, 1103, -1]
 5 bus_tn_score: [-1, -1, -1, -1, -1, 1330]
et 0:
   istop_id bus_id
0         5     69
1         8     71
2        13     60
3        16     68
4        18     70
et 1:
   istop_id bus_id
0         2      -
1         5     69
2         8     71
3        13     60
4        17     68
5        19     70
 6 bus_tn_score: [1113, -1, -1, -1, -1, -1, -1

et 0:
   istop_id bus_id
0         5     84
1         5     74
2        10     80
3        15     71
4        19     86
5        24     85
et 1:
   istop_id bus_id
0         5     84
1         6     74
2        11     80
3        15      -
4        15     71
5        19     86
6        24     85
 5 bus_tn_score: [1954, 1446, -1, -1, -1, -1]
 5 bus_tn_score: [-1, 1960, -1, -1, -1, -1]
 5 bus_tn_score: [-1, -1, 1891, -1, -1, -1]
 5 bus_tn_score: [-1, -1, -1, 1994, -1, -1]
 5 bus_tn_score: [-1, -1, -1, 2000, -1, -1]
 5 bus_tn_score: [-1, -1, -1, -1, 1858, -1]
 5 bus_tn_score: [-1, -1, -1, -1, -1, 1996]
et 0:
   istop_id bus_id
0         5     84
1         6     74
2        11     80
3        15     87
4        15     71
5        19     86
6        24     85
et 1:
   istop_id bus_id
0         5     84
1         6     74
2        11     80
3        15     71
4        19     86
5        24     85
 6 bus_tn_score: [1896, 1438, -1, -1, -1, -1, -1]
 6 bus_tn_score: [-1, 1946, -1, -1, -1, -1, -1

 4 bus_tn_score: [-1, 1052, -1, -1, -1]
 4 bus_tn_score: [-1, -1, 1091, -1, -1]
 4 bus_tn_score: [-1, -1, -1, 1139, -1]
 4 bus_tn_score: [-1, -1, -1, -1, 2000]
et 0:
   istop_id bus_id
0         4     97
1         9     99
2        12     96
3        13     93
4        19     94
et 1:
   istop_id bus_id
0         5     97
1        10     99
2        13     96
3        14     93
4        19     94
 5 bus_tn_score: [1873, -1, -1, -1, -1, -1]
 5 bus_tn_score: [-1, 1907, -1, -1, -1, -1]
 5 bus_tn_score: [-1, -1, 2000, 960, -1, -1]
 5 bus_tn_score: [-1, -1, -1, 2000, 1164, -1]
 5 bus_tn_score: [-1, -1, -1, -1, -1, 2000]
et 0:
   istop_id bus_id
0         5     97
1        10     99
2        13     96
3        14     93
4        19     94
et 1:
   istop_id bus_id
0         5     97
1        10     99
2        13     96
3        14     93
4        15      -
5        19     94
 5 bus_tn_score: [1864, -1, -1, -1, -1, -1]
 5 bus_tn_score: [-1, 1090, -1, -1, -1, -1]
 5 bus_tn_score: [-1, -1, 1092

et 0:
   istop_id bus_id
0         3    112
1         6    110
2         9    102
3        19    108
4        21    111
5        22    113
et 1:
   istop_id bus_id
0         3    112
1         6    110
2         9    102
3        19    108
4        21    111
5        22    113
 7 bus_tn_score: [2000, -1, -1, -1, -1, -1, -1, -1]
 7 bus_tn_score: [-1, 960, -1, -1, -1, -1, -1, -1]
 7 bus_tn_score: [-1, -1, 2000, -1, -1, -1, -1, -1]
 7 bus_tn_score: [-1, -1, -1, -1, -1, 1241, -1, -1]
 7 bus_tn_score: [-1, -1, -1, -1, -1, -1, 996, 818]
 7 bus_tn_score: [-1, -1, -1, -1, -1, -1, 1954, 1776]
et 0:
   istop_id bus_id
0         3    112
1         6    110
2         9    102
3        19    108
4        21    111
5        22    113
et 1:
   istop_id bus_id
0         3    112
1         7    110
2         9    102
3        13      -
4        15      -
5        20    108
6        22    111
7        22    113
 6 bus_tn_score: [2000, -1, -1, -1, -1, -1, -1]
 6 bus_tn_score: [-1, -1, -1, -1, -1, -1, -1]

In [95]:
# ----------- Version 1
# Now strategy is from t_n -> t_n+1 points being connected. ordered in ascending now.
bus_id = 1
active_busses = []
stoped_busses = []

for i, same_times_df in enumerate(lst_of_same_time95_df):
    
    # Handling end of list edge case
    if i == len(lst_of_same_time95_df) - 1:
        break
        
    analysis_matrix = []
    
    # Busses with t_n timestamp 
    for j, bus_tn in same_times_df.iterrows():
        bus_tn_score = []
        
        # Start Case
        if i == 0:
            same_times_df.loc[j, "bus_id"] = bus_id
            bus_id += 1
        elif bus_tn.bus_id == "-":
            same_times_df.loc[j, "bus_id"] = bus_id
            bus_id += 1
            
        # Busses with t_n+1 timestamp
        for k, bus_tn1 in lst_of_same_time95_df[i+1].iterrows():
            bus_tn_score.append( compare_bus_tuples( bus_tn, bus_tn1) )
        print(f" {k} bus_tn_score: {bus_tn_score}")
            
        analysis_matrix.append(bus_tn_score)

    # Assigning bus_ids to the busses in t_n+1
    # each row for bus_tn 
    # each col for bus_tn+1
    analysis_matrix = np.asarray(analysis_matrix)
    
    # TODO
    # Perform constraint check on the matrix
    
    # way 1: to assign bus_ids to bus_tn+1
    # Busses with t_n+1 timestamp
#     for k, bus_tn1 in lst_of_same_time95_df[i+1].iterrows():
        
#         candidate_bus_idx = np.argmax( analysis_matrix[:, k] )  # returns max-arg from cols
#         # Check if value on argmax is not -1. That means bus t_n+1 has no preceeding point.
#         if analysis_matrix[candidate_bus_idx, k] != -1:
#             lst_of_same_time95_df[i+1].loc[k, "bus_id"] = lst_of_same_time95_df[i].loc[candidate_bus_idx, "bus_id"]

    # way 2: to assign bus_ids to bus_tn+1
    # Busses with t_n timestamp
    for k, bus_tn in same_times_df.iterrows():
        candidate_bus_idx = np.argmax( analysis_matrix[k, :] )  # returns max-arg from rows
        # Check if value on argmax is not -1. That means bus t_n has no proceeding point.
        if analysis_matrix[k, candidate_bus_idx] != -1:
            lst_of_same_time95_df[i+1].loc[candidate_bus_idx, "bus_id"] = bus_tn.loc["bus_id"]
            
    
    
    print(f"et 0:")
    print(lst_of_same_time95_df[i][["istop_id", "bus_id"]])
    print(f"et 1:")
    print(lst_of_same_time95_df[i+1][["istop_id", "bus_id"]])
#     if i == 2:
#         break
df = functools.reduce(lambda a, b: a.append(b), lst_of_same_time95_df)
# df.to_csv("temp.csv")

 5 bus_tn_score: [-1, 1783, -1, -1, -1, -1]
 5 bus_tn_score: [-1, -1, 994, -1, -1, -1]
 5 bus_tn_score: [-1, -1, 1072, -1, -1, -1]
et 0:
   istop_id bus_id
0         4      1
1         8      2
2         8      3
et 1:
   istop_id bus_id
0         3      -
1         4      1
2         9      3
3        10      -
4        16      -
5        21      -
 5 bus_tn_score: [1960, -1, -1, -1, -1, -1]
 5 bus_tn_score: [-1, 1217, -1, -1, -1, -1]
 5 bus_tn_score: [-1, -1, 2000, 1040, -1, -1]
 5 bus_tn_score: [-1, -1, -1, -1, -1, -1]
 5 bus_tn_score: [-1, -1, -1, -1, 1834, -1]
 5 bus_tn_score: [-1, -1, -1, -1, -1, 1952]
et 0:
   istop_id bus_id
0         3      4
1         4      1
2         9      3
3        10      5
4        16      6
5        21      7
et 1:
   istop_id bus_id
0         3      4
1         5      1
2         9      3
3        10      -
4        16      6
5        21      7
 5 bus_tn_score: [1046, -1, -1, -1, -1, -1]
 5 bus_tn_score: [-1, 2000, -1, -1, -1, -1]
 5 bus_tn_score: [

et 0:
   istop_id bus_id
0         5     16
1         9     12
2         9     22
3        15     20
4        16     23
5        22      6
et 1:
   istop_id bus_id
0         5     16
1         9     22
2        10      -
3        15     20
4        17     23
5        24      -
 5 bus_tn_score: [2000, -1, -1, -1, -1, -1]
 5 bus_tn_score: [-1, 1183, 1180, -1, -1, -1]
 5 bus_tn_score: [-1, 2000, 1997, -1, -1, -1]
 5 bus_tn_score: [-1, -1, -1, 2000, -1, -1]
 5 bus_tn_score: [-1, -1, -1, -1, 1845, -1]
 5 bus_tn_score: [-1, -1, -1, -1, -1, 2000]
et 0:
   istop_id bus_id
0         5     16
1         9     22
2        10     24
3        15     20
4        17     23
5        24     25
et 1:
   istop_id bus_id
0         5     16
1        10     24
2        10      -
3        15     20
4        17     23
5        24     25
 5 bus_tn_score: [1381, -1, -1, -1, -1, -1]
 5 bus_tn_score: [-1, 2000, 1663, -1, -1, -1]
 5 bus_tn_score: [-1, -1, 1666, -1, -1, -1]
 5 bus_tn_score: [-1, -1, -1, -1, -1, -1]


 4 bus_tn_score: [-1, -1, 1992, -1, -1]
 4 bus_tn_score: [-1, -1, -1, 1284, 1250]
 4 bus_tn_score: [-1, -1, -1, 1582, 1548]
et 0:
   istop_id bus_id
0         3     45
1        12     16
2        16     47
3        16     48
et 1:
   istop_id bus_id
0         4     45
1         6      -
2        12     16
3        17     48
4        17      -
 5 bus_tn_score: [1926, -1, -1, -1, -1, -1]
 5 bus_tn_score: [-1, 2000, -1, -1, -1, -1]
 5 bus_tn_score: [-1, -1, 2000, -1, -1, -1]
 5 bus_tn_score: [-1, -1, -1, -1, 1966, 1898]
 5 bus_tn_score: [-1, -1, -1, -1, 2000, 1932]
et 0:
   istop_id bus_id
0         4     45
1         6     49
2        12     16
3        17     48
4        17     50
et 1:
   istop_id bus_id
0         4     45
1         6     49
2        12     16
3        15      -
4        17     50
5        17      -
 4 bus_tn_score: [1880, -1, -1, -1, -1]
 4 bus_tn_score: [-1, 994, -1, -1, -1]
 4 bus_tn_score: [-1, -1, 1008, -1, -1]
 4 bus_tn_score: [-1, -1, -1, -1, -1]
 4 bus_tn_score

et 0:
   istop_id bus_id
0         4     61
1         7     55
2        11     45
3        15     63
4        19     16
5        24     65
et 1:
   istop_id bus_id
0         5     61
1         7     55
2        12     45
3        16     63
4        20     16
5        24      -
 5 bus_tn_score: [2000, -1, -1, -1, -1, -1]
 5 bus_tn_score: [-1, 2000, -1, -1, -1, -1]
 5 bus_tn_score: [-1, -1, 1836, -1, -1, -1]
 5 bus_tn_score: [-1, -1, -1, 1932, -1, -1]
 5 bus_tn_score: [-1, -1, -1, -1, 1831, -1]
 5 bus_tn_score: [-1, -1, -1, -1, -1, 1759]
et 0:
   istop_id bus_id
0         5     61
1         7     55
2        12     45
3        16     63
4        20     16
5        24     66
et 1:
   istop_id bus_id
0         5     61
1         7     55
2        12     45
3        16     63
4        20     16
5        24     66
 4 bus_tn_score: [1799, -1, -1, -1, -1]
 4 bus_tn_score: [-1, 716, -1, -1, -1]
 4 bus_tn_score: [-1, -1, 2000, -1, -1]
 4 bus_tn_score: [-1, -1, -1, 1735, -1]
 4 bus_tn_score: [-1,

   istop_id bus_id
0         3     75
1         5     73
2         9     77
3        15     74
4        19     68
et 1:
   istop_id bus_id
0         5     73
1         9     77
2        12      -
3        15     74
 4 bus_tn_score: [1734, -1, -1, -1, -1]
 4 bus_tn_score: [-1, 2000, -1, -1, -1]
 4 bus_tn_score: [-1, -1, 2000, -1, -1]
 4 bus_tn_score: [-1, -1, -1, 1221, -1]
et 0:
   istop_id bus_id
0         5     73
1         9     77
2        12     78
3        15     74
et 1:
   istop_id bus_id
0         5     73
1         9     77
2        12     78
3        16     74
4        21      -
 5 bus_tn_score: [1942, -1, -1, -1, -1, -1]
 5 bus_tn_score: [-1, 1002, -1, -1, -1, -1]
 5 bus_tn_score: [-1, -1, 1077, -1, -1, -1]
 5 bus_tn_score: [-1, -1, -1, -1, 1959, -1]
 5 bus_tn_score: [-1, -1, -1, -1, -1, 2000]
et 0:
   istop_id bus_id
0         5     73
1         9     77
2        12     78
3        16     74
4        21     79
et 1:
   istop_id bus_id
0         5     73
1        10     77
2

et 0:
   istop_id bus_id
0         4     90
1         8     84
2        11     88
3        14     87
4        15     91
5        19     86
6        21     82
7        23     85
et 1:
   istop_id bus_id
0         5     90
1         8     84
2        13      -
3        14     87
4        19     86
5        22     82
 5 bus_tn_score: [2000, -1, -1, -1, -1, -1]
 5 bus_tn_score: [-1, 2000, -1, -1, -1, -1]
 5 bus_tn_score: [-1, -1, 2000, 1000, -1, -1]
 5 bus_tn_score: [-1, -1, -1, 2000, -1, -1]
 5 bus_tn_score: [-1, -1, -1, -1, 1889, -1]
 5 bus_tn_score: [-1, -1, -1, -1, -1, 2000]
et 0:
   istop_id bus_id
0         5     90
1         8     84
2        13     92
3        14     87
4        19     86
5        22     82
et 1:
   istop_id bus_id
0         5     90
1         8     84
2        13     92
3        14     87
4        19     86
5        22     82
 5 bus_tn_score: [-1, 2000, -1, -1, -1, -1]
 5 bus_tn_score: [-1, -1, 1236, -1, -1, -1]
 5 bus_tn_score: [-1, -1, -1, 2000, -1, -1]
 5 bus_t

 5 bus_tn_score: [-1, -1, -1, 1237, -1, -1]
 5 bus_tn_score: [-1, -1, -1, -1, 2000, -1]
 5 bus_tn_score: [-1, -1, -1, -1, -1, 2000]
et 0:
   istop_id bus_id
0         5     98
1         9    104
2        13     95
3        16    102
4        20    103
5        23    105
et 1:
   istop_id bus_id
0         5     98
1         9    104
2        13     95
3        17    102
4        20    103
5        23    105
 7 bus_tn_score: [-1, 1976, -1, -1, -1, -1, -1, -1]
 7 bus_tn_score: [-1, -1, 1743, -1, -1, -1, -1, -1]
 7 bus_tn_score: [-1, -1, -1, 1276, -1, -1, -1, -1]
 7 bus_tn_score: [-1, -1, -1, -1, -1, 2000, -1, -1]
 7 bus_tn_score: [-1, -1, -1, -1, -1, -1, 2000, -1]
 7 bus_tn_score: [-1, -1, -1, -1, -1, -1, -1, 1020]
et 0:
   istop_id bus_id
0         5     98
1         9    104
2        13     95
3        17    102
4        20    103
5        23    105
et 1:
   istop_id bus_id
0         4      -
1         5     98
2         9    104
3        14     95
4        15      -
5        17    102


et 0:
   istop_id bus_id
0         3    120
1         6    112
2        10    119
3        11    116
4        17    117
5        22     95
et 1:
   istop_id bus_id
0         3    120
1         6    112
2        11    119
3        12    116
4        18    117
5        23     95
 6 bus_tn_score: [2000, -1, -1, -1, -1, -1, -1]
 6 bus_tn_score: [-1, 2000, -1, -1, -1, -1, -1]
 6 bus_tn_score: [-1, -1, 1990, 962, -1, -1, -1]
 6 bus_tn_score: [-1, -1, -1, 2000, -1, -1, -1]
 6 bus_tn_score: [-1, -1, -1, -1, -1, 1962, -1]
 6 bus_tn_score: [-1, -1, -1, -1, -1, -1, 2000]
et 0:
   istop_id bus_id
0         3    120
1         6    112
2        11    119
3        12    116
4        18    117
5        23     95
et 1:
   istop_id bus_id
0         3    120
1         6    112
2        11    119
3        12    116
4        15      -
5        18    117
6        23     95
 5 bus_tn_score: [2000, -1, -1, -1, -1, -1]
 5 bus_tn_score: [-1, -1, -1, -1, -1, -1]
 5 bus_tn_score: [-1, -1, 1991, 972, -1, -1]
 5 bu

et 0:
   istop_id bus_id
0         3    134
1         6    127
2        10    132
3        15    140
4        17    136
5        19    139
6        19    141
et 1:
   istop_id bus_id
0         7    127
1        10    132
2        17    136
3        19    141
4        19      -
 5 bus_tn_score: [-1, 1977, -1, -1, -1, -1]
 5 bus_tn_score: [-1, -1, 1921, -1, -1, -1]
 5 bus_tn_score: [-1, -1, -1, 1048, -1, -1]
 5 bus_tn_score: [-1, -1, -1, -1, 2000, 1926]
 5 bus_tn_score: [-1, -1, -1, -1, -1, 1928]
et 0:
   istop_id bus_id
0         7    127
1        10    132
2        17    136
3        19    141
4        19    142
et 1:
   istop_id bus_id
0         5      -
1         7    127
2        10    132
3        18    136
4        19    141
5        19    142
 5 bus_tn_score: [1843, -1, -1, -1, -1, -1]
 5 bus_tn_score: [-1, 2000, -1, -1, -1, -1]
 5 bus_tn_score: [-1, -1, 2000, -1, -1, -1]
 5 bus_tn_score: [-1, -1, -1, 1955, 960, 848]
 5 bus_tn_score: [-1, -1, -1, -1, 2000, 1888]
 5 bus_tn_score: 

In [17]:
# array([[2000,   -1,   -1,   -1,   -1],
#        [  -1, 1098, 1052,   -1,   -1],
#        [  -1, 1821, 1775,   -1,   -1],
#        [  -1,   -1,   -1, 2000,   -1],
#        [  -1,   -1,   -1,   -1, 2000]])

# Exception case 1
# When two buses (backward, forward) are very close at t_n and they get so ahead that even 
# the 'backward' bus is ahead than position where 'forward' bus was at t_n. The algo used to connect forward 
# to 'backward' as it was the most nearest and 'forward' bus at t_n+1 would get a new ID.
#
# - Solution
# if argmax of row is same as previous row, check if there is 2nd argmax, if yes take that else override
# sol: so check if  np.argmax(analysis_matrix[2, :]) == np.argmax(analysis_matrix[1, :]) and analysis_matrix[1, np.argmax(analysis_matrix[1, :])] != -1
# THen temp var a = np.argmax(analysis_matrix[2, :]), then if 2nd argmax of row 2 is non zero use that else use a
# -----


In [79]:
np.argmax([-1, 1783, -1, -1, -1, -1])  # 1
print(np.asarray(analysis_matrix))

[[1956   -1   -1   -1   -1   -1   -1]
 [  -1 1974   -1   -1   -1   -1   -1]
 [  -1   -1 2000  922   -1   -1   -1]
 [  -1   -1   -1 2000   -1   -1   -1]
 [  -1   -1   -1   -1 1435   -1   -1]
 [  -1   -1   -1   -1   -1 2000 1072]
 [  -1   -1   -1   -1   -1   -1 1855]]


In [459]:
for i, same_times_df in enumerate(lst_of_same_time95_df):
    
    for j, bus_tn in same_times_df.iterrows():
        same_times_df.loc[j, "bus_id"] = "34"
    print(same_times_df)
    break
print(lst_of_same_time95_df[0])

       timestamp  lineId  directionId  pointId  istop_id  distanceFromPoint  \
0  1631606899996      95         4318     1128         3                  0   
1  1631606899996      95         4318     1233         7                112   
2  1631606899996      95         4318     1233         7                 34   

                       et bus_id matching_list  
0 2021-09-14 08:08:19.996     34                
1 2021-09-14 08:08:19.996     34            --  
2 2021-09-14 08:08:19.996     34            --  
       timestamp  lineId  directionId  pointId  istop_id  distanceFromPoint  \
0  1631606899996      95         4318     1128         3                  0   
1  1631606899996      95         4318     1233         7                112   
2  1631606899996      95         4318     1233         7                 34   

                       et bus_id matching_list  
0 2021-09-14 08:08:19.996     34                
1 2021-09-14 08:08:19.996     34            --  
2 2021-09-14 08:08:19.9

In [63]:
print(f"printing report")
print(f"et 0:")
print(lst_of_same_time95_df[0][["istop_id", "bus_id"]])
print(f"et 1:")
print(lst_of_same_time95_df[1][["istop_id", "bus_id"]])

printing report
et 0:
   istop_id bus_id
0         4      1
1         8      2
2         8      3
et 1:
   istop_id bus_id
0         3      4
1         4      1
2         9      3
3        10      5
4        16      6
5        21      7


### Plot bus trajectories

In [45]:
df.head()

Unnamed: 0,timestamp,lineId,directionId,pointId,istop_id,distanceFromPoint,et,bus_id
0,1631606899996,95,4318,1128,4,0,2021-09-14 08:08:19.996,1
1,1631606899996,95,4318,1233,8,34,2021-09-14 08:08:19.996,2
2,1631606899996,95,4318,1233,8,112,2021-09-14 08:08:19.996,3
0,1631606963238,95,4318,1703,3,6,2021-09-14 08:09:23.238,4
1,1631606963238,95,4318,1128,4,217,2021-09-14 08:09:23.238,1


In [46]:
# Do processing on df to make it ready
df['distFromZero'] = (df["istop_id"] * 800) + df["distanceFromPoint"]
df.head()

Unnamed: 0,timestamp,lineId,directionId,pointId,istop_id,distanceFromPoint,et,bus_id,distFromZero
0,1631606899996,95,4318,1128,4,0,2021-09-14 08:08:19.996,1,3200
1,1631606899996,95,4318,1233,8,34,2021-09-14 08:08:19.996,2,6434
2,1631606899996,95,4318,1233,8,112,2021-09-14 08:08:19.996,3,6512
0,1631606963238,95,4318,1703,3,6,2021-09-14 08:09:23.238,4,2406
1,1631606963238,95,4318,1128,4,217,2021-09-14 08:09:23.238,1,3417


In [47]:
import plotly.express as px
import plotly.graph_objects as go

fig = px.scatter(df, x='et', y="distFromZero", color='bus_id')

fig.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(step="all")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    ))
fig.show()
fig.write_html("buses_trajectories.html")