In [24]:
import json
import pandas as pd
from copy import deepcopy
from pprint import pprint

In [2]:
json_file_path = "data/vehiclePosition01.json"

# Reading JSON file
with open(json_file_path) as json_file:
    json_dict = json.load(json_file)

In [10]:
# Information about the lineIds (buses, train, tram)
# LineId: The unique number of the line route
# directionId: the direction in which the specific vehicle is traveling
# pointId: The last stop travered, stop which is left behind
# distanceFromPoint: Distance from the left behind stop
single_line_info: dict = json_dict["data"][0]["Responses"][0]
single_line_info

{'lines': [{'lineId': '1',
   'vehiclePositions': [{'directionId': '8731',
     'distanceFromPoint': 0,
     'pointId': '8162'},
    {'directionId': '8731', 'distanceFromPoint': 0, 'pointId': '8131'},
    {'directionId': '8162', 'distanceFromPoint': 0, 'pointId': '8092'},
    {'directionId': '8731', 'distanceFromPoint': 0, 'pointId': '8011'},
    {'directionId': '8161', 'distanceFromPoint': 1, 'pointId': '8122'},
    {'directionId': '8161', 'distanceFromPoint': 0, 'pointId': '8742'},
    {'directionId': '8731', 'distanceFromPoint': 0, 'pointId': '8161'},
    {'directionId': '8731', 'distanceFromPoint': 1, 'pointId': '8101'},
    {'directionId': '8731', 'distanceFromPoint': 0, 'pointId': '8051'},
    {'directionId': '8731', 'distanceFromPoint': 0, 'pointId': '8291'},
    {'directionId': '8162', 'distanceFromPoint': 0, 'pointId': '8272'},
    {'directionId': '8161', 'distanceFromPoint': 0, 'pointId': '8052'}]},
  {'lineId': '2',
   'vehiclePositions': [{'directionId': '8763',
     'dista

In [16]:
info_for_line_ids = []

for data in json_dict["data"]:
    timestamp = data["time"]
    # Rest of the stuff
    for upper_line_group in data["Responses"]:
        # print(upper_line_group)
        if upper_line_group is None:
            continue
        for line_group in upper_line_group["lines"]:
#             print(line_group)
            line_id_info = [{"timestamp": timestamp, "lineId": line_group["lineId"], **elem}
                            for elem in line_group["vehiclePositions"]]

            info_for_line_ids.extend(deepcopy(line_id_info))

lines_df = pd.DataFrame(info_for_line_ids)
# lines_df.to_csv(f"vehiclePosition{i}.csv")
lines_df.head()

Unnamed: 0,timestamp,lineId,directionId,distanceFromPoint,pointId
0,1632229443663,1,8731,0,8121
1,1632229443663,1,8731,0,8061
2,1632229443663,1,8161,0,8122
3,1632229443663,1,8161,0,8052
4,1632229443663,1,8162,0,8733


## Works on processed data

In [9]:
proc_lines = pd.read_csv("processed_data/vehiclePosition08.csv")
proc_lines = proc_lines.drop(columns=["Unnamed: 0"])
idx_lineId_95 = proc_lines["lineId"] == 95
line_95 = deepcopy( proc_lines.loc[idx_lineId_95] )


In [10]:
line_95.loc[:, "et"] = line_95.timestamp.apply(lambda x: pd.Timestamp(x, unit='ms') )

In [11]:
line_95.head()

Unnamed: 0,timestamp,lineId,directionId,distanceFromPoint,pointId,et
637,1631606899996,95,7104,339,1124,2021-09-14 08:08:19.996
638,1631606899996,95,4318,0,7104,2021-09-14 08:08:19.996
639,1631606899996,95,4318,0,1128,2021-09-14 08:08:19.996
640,1631606899996,95,7104,86,4365,2021-09-14 08:08:19.996
641,1631606899996,95,7104,66,4362,2021-09-14 08:08:19.996


In [12]:
# Figure out what the bus is doing. Why are there more then 2 directionIds ?
line_95.groupby(by="directionId").count()

Unnamed: 0_level_0,timestamp,lineId,distanceFromPoint,pointId,et
directionId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1233,54,54,54,54,54
2278,92,92,92,92,92
2392,177,177,177,177,177
3558,2,2,2,2,2
4318,1986,1986,1986,1986,1986
6432,118,118,118,118,118
7104,1734,1734,1734,1734,1734


In [13]:
line_95.directionId.unique() 

array([7104, 4318, 3558, 2392, 6432, 1233, 2278])

In [14]:
idxs = line_95.loc[:, "directionId"] == 4318
line_95_direc_4318 = line_95.loc[idxs]
# line_95_direc_4318.to_csv("line_95_direction-4318.csv")

In [15]:
# line_95_direc_4318 = line_95_direc_4318.reset_index(drop=True)
line_95_direc_4318.reset_index(drop=True, inplace=True)
line_95_direc_4318

Unnamed: 0,timestamp,lineId,directionId,distanceFromPoint,pointId,et
0,1631606899996,95,4318,0,7104,2021-09-14 08:08:19.996
1,1631606899996,95,4318,0,1128,2021-09-14 08:08:19.996
2,1631606899996,95,4318,112,1233,2021-09-14 08:08:19.996
3,1631606899996,95,4318,34,1233,2021-09-14 08:08:19.996
4,1631606899996,95,4318,0,7104,2021-09-14 08:08:19.996
...,...,...,...,...,...,...
1981,1631615154340,95,4318,217,4313,2021-09-14 10:25:54.340
1982,1631615154340,95,4318,102,7066,2021-09-14 10:25:54.340
1983,1631615154340,95,4318,0,7104,2021-09-14 10:25:54.340
1984,1631615154340,95,4318,274,4305,2021-09-14 10:25:54.340


In [146]:
# Create a csv file with -- marker to denote rows with same timestamp
matching_list = [""]
for i, row in line_95_direc_4318.iterrows():
    if i == 0: 
        continue
    if row["et"] == line_95_direc_4318.loc[i-1, "et"]:
        print(row["et"])
        matching_list.append("--")
    else:
        matching_list.append("")
    
line_95_direc_4318["matching_list"] = matching_list
# line_95_direc_4318.to_csv("line_95_direction-4318.csv", index=False)

2021-09-14 08:08:19.996000
2021-09-14 08:08:19.996000
2021-09-14 08:08:19.996000
2021-09-14 08:08:19.996000
2021-09-14 08:09:23.238000
2021-09-14 08:09:23.238000
2021-09-14 08:09:23.238000
2021-09-14 08:09:23.238000
2021-09-14 08:09:23.238000
2021-09-14 08:09:23.238000
2021-09-14 08:09:23.238000
2021-09-14 08:09:54.879000
2021-09-14 08:09:54.879000
2021-09-14 08:09:54.879000
2021-09-14 08:09:54.879000
2021-09-14 08:09:54.879000
2021-09-14 08:09:54.879000
2021-09-14 08:09:54.879000
2021-09-14 08:10:27.041000
2021-09-14 08:10:27.041000
2021-09-14 08:10:27.041000
2021-09-14 08:10:27.041000
2021-09-14 08:10:27.041000
2021-09-14 08:10:27.041000
2021-09-14 08:10:27.041000
2021-09-14 08:10:59.550000
2021-09-14 08:10:59.550000
2021-09-14 08:10:59.550000
2021-09-14 08:10:59.550000
2021-09-14 08:10:59.550000
2021-09-14 08:10:59.550000
2021-09-14 08:10:59.550000
2021-09-14 08:10:59.550000
2021-09-14 08:10:59.550000
2021-09-14 08:11:30.668000
2021-09-14 08:11:30.668000
2021-09-14 08:11:30.668000
2

2021-09-14 09:45:28.226000
2021-09-14 09:45:28.226000
2021-09-14 09:45:59.665000
2021-09-14 09:45:59.665000
2021-09-14 09:45:59.665000
2021-09-14 09:45:59.665000
2021-09-14 09:45:59.665000
2021-09-14 09:45:59.665000
2021-09-14 09:45:59.665000
2021-09-14 09:45:59.665000
2021-09-14 09:46:31.510000
2021-09-14 09:46:31.510000
2021-09-14 09:46:31.510000
2021-09-14 09:46:31.510000
2021-09-14 09:46:31.510000
2021-09-14 09:46:31.510000
2021-09-14 09:46:31.510000
2021-09-14 09:46:31.510000
2021-09-14 09:47:02.744000
2021-09-14 09:47:02.744000
2021-09-14 09:47:02.744000
2021-09-14 09:47:02.744000
2021-09-14 09:47:02.744000
2021-09-14 09:47:02.744000
2021-09-14 09:47:02.744000
2021-09-14 09:47:02.744000
2021-09-14 09:47:35.408000
2021-09-14 09:47:35.408000
2021-09-14 09:47:35.408000
2021-09-14 09:47:35.408000
2021-09-14 09:47:35.408000
2021-09-14 09:47:35.408000
2021-09-14 09:47:35.408000
2021-09-14 09:47:35.408000
2021-09-14 09:47:35.408000
2021-09-14 09:47:35.408000
2021-09-14 09:48:06.436000
2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  line_95_direc_4318["matching_list"] = matching_list


## Working with shape file

In [3]:
import shapefile
import numpy as np
import matplotlib.pyplot as plt
import os

In [4]:
stops_sf = shapefile.Reader("data/2109_STIB_MIVB_Network_shapefiles/ACTU_STOPS")
stop_obj = stops_sf.records()[0]
stop_obj.__dict__

{'_Record__field_positions': {'Code_Ligne': 0,
  'Variante': 1,
  'succession': 2,
  'stop_id': 3,
  'descr_fr': 4,
  'descr_nl': 5,
  'alpha_fr': 6,
  'alpha_nl': 7,
  'coord_x': 8,
  'coord_y': 9,
  'mode': 10,
  'numero_lig': 11,
  'terminus': 12},
 '_Record__oid': 0}

#### Separate points based on their Varient (direction)

In [5]:
# Separate stops so that when you group by lineID (succession) only stops of single direction are returned
stops_v1 = [stop for stop in stops_sf.records() if stop.Variante == 1]
stops_v2 = [stop for stop in stops_sf.records() if stop.Variante == 2]

#### Order stops by succession

In [6]:
# Separate out stops related to a single line e.g. All stops of 95 in front of key '095b' 
ordered_dict_v1 = {}  # "095b": [stop_obj, stop_obj, stop_obj]
for stop in stops_v1:
    if stop.Code_Ligne not in ordered_dict_v1:
        ordered_dict_v1[stop.Code_Ligne] = [s for s in stops_v1 if s.numero_lig == stop.numero_lig ]

ordered_dict_v2 = {}  # "095b": [stop_obj, stop_obj, stop_obj]
for stop in stops_v2:
    if stop.Code_Ligne not in ordered_dict_v2:
        ordered_dict_v2[stop.Code_Ligne] = [s for s in stops_v2 if s.numero_lig == stop.numero_lig ]
    
ordered_dict_v1['095b'][0:5]

[Record #2332: ['095b', 1, 1, '2278', 'GRAND-PLACE', 'GROTE MARKT', 'Grand-Place', 'Grote Markt', 148819.0, 170485.0, 'B', 95, 'WIENER'],
 Record #2333: ['095b', 1, 2, '2268', 'BIBLIOTHEQUE', 'BIBLIOTHEEK', 'Bibliothèque', 'Bibliotheek', 148993.0, 170336.0, 'B', 95, 'WIENER'],
 Record #2334: ['095b', 1, 3, '1703', 'GRAND SABLON', 'GROTE ZAVEL', 'Grand Sablon', 'Grote Zavel', 148959.0, 170147.0, 'B', 95, 'WIENER'],
 Record #2335: ['095b', 1, 4, '1128', 'PETIT SABLON', 'KLEINE ZAVEL', 'Petit Sablon', 'Kleine Zavel', 149093.0, 169943.0, 'B', 95, 'WIENER'],
 Record #2336: ['095b', 1, 5, '6354B', 'ROYALE', 'KONING', 'Royale', 'Koning', 149281.3, 170126.2, 'B', 95, 'WIENER']]

In [7]:
# Sort the stops according to succession
for Code_Ligne in ordered_dict_v1.keys():
    ordered_dict_v1[Code_Ligne].sort(key = lambda x: x.succession)
    
for Code_Ligne in ordered_dict_v2.keys():
    ordered_dict_v2[Code_Ligne].sort(key = lambda x: x.succession)

# View sorted stop
for key in ordered_dict_v2:
    print(f'{key} -> { " -> ".join( [s.alpha_fr for s in ordered_dict_v1[key]] ) } ')
    break

012b -> Brussels Airport -> Bourget -> Da Vinci -> Genève -> Meiser -> Schuman -> Luxembourg -> Trône -> Trône 


In [22]:
ordered_dict_v2['095b']

[Record #2357: ['095b', 2, 1, '4351', 'WIENER', 'WIENER', 'Wiener', 'Wiener', 153554.0, 165360.0, 'B', 95, 'GRAND-PLACE'],
 Record #2358: ['095b', 2, 2, '4348', 'MIRAVAL', 'MIRAVAL', 'Miraval', 'Miraval', 153191.0, 165531.0, 'B', 95, 'GRAND-PLACE'],
 Record #2359: ['095b', 2, 3, '4355', 'LES 3 TILLEULS', 'DRIE LINDEN', 'Les Trois Tilleuls', 'Drie Linden', 153110.0, 165811.0, 'B', 95, 'GRAND-PLACE'],
 Record #2360: ['095b', 2, 4, '1455', 'CALYPSO 2000', 'CALYPSO 2000', 'Calypso 2000', 'Calypso 2000', 152837.0, 165853.0, 'B', 95, 'GRAND-PLACE'],
 Record #2361: ['095b', 2, 5, '4357', 'VANDER ELST', 'VANDER ELST', 'Vander Elst', 'Vander Elst', 152680.0, 166105.0, 'B', 95, 'GRAND-PLACE'],
 Record #2362: ['095b', 2, 6, '4358', 'KEYM', 'KEYM', 'Keym', 'Keym', 152426.0, 166621.0, 'B', 95, 'GRAND-PLACE'],
 Record #2363: ['095b', 2, 7, '4359', 'ARCADES', 'ARCADEN', 'Arcades', 'Arcaden', 151974.0, 166672.0, 'B', 95, 'GRAND-PLACE'],
 Record #2364: ['095b', 2, 8, '4360', 'RELAIS', 'PLEISTERPLAATS',

In [21]:
ordered_dict_v1['095b']

[Record #2332: ['095b', 1, 1, '2278', 'GRAND-PLACE', 'GROTE MARKT', 'Grand-Place', 'Grote Markt', 148819.0, 170485.0, 'B', 95, 'WIENER'],
 Record #2333: ['095b', 1, 2, '2268', 'BIBLIOTHEQUE', 'BIBLIOTHEEK', 'Bibliothèque', 'Bibliotheek', 148993.0, 170336.0, 'B', 95, 'WIENER'],
 Record #2334: ['095b', 1, 3, '1703', 'GRAND SABLON', 'GROTE ZAVEL', 'Grand Sablon', 'Grote Zavel', 148959.0, 170147.0, 'B', 95, 'WIENER'],
 Record #2335: ['095b', 1, 4, '1128', 'PETIT SABLON', 'KLEINE ZAVEL', 'Petit Sablon', 'Kleine Zavel', 149093.0, 169943.0, 'B', 95, 'WIENER'],
 Record #2336: ['095b', 1, 5, '6354B', 'ROYALE', 'KONING', 'Royale', 'Koning', 149281.3, 170126.2, 'B', 95, 'WIENER'],
 Record #2337: ['095b', 1, 6, '6433', 'TRONE', 'TROON', 'Trône', 'Troon', 149885.0, 169988.0, 'B', 95, 'WIENER'],
 Record #2338: ['095b', 1, 7, '1729', 'SCIENCE', 'WETENSCHAP', 'Science', 'Wetenschap', 150043.6, 169936.7, 'B', 95, 'WIENER'],
 Record #2339: ['095b', 1, 8, '1233', 'LUXEMBOURG', 'LUXEMBURG', 'Luxembourg', 

In [20]:
# TODO
for key in ordered_dict_v1:
    if key == '095b':
        print(f'{key} -> { " -> ".join( [s.alpha_fr for s in ordered_dict_v1[key]] ) } ')
    

095b -> Grand-Place -> Bibliothèque -> Grand Sablon -> Petit Sablon -> Royale -> Trône -> Science -> Luxembourg -> Idalie -> Blyckaerts -> Germoir -> Rodin -> Delporte -> Etterbeek Gare -> Thys -> Cimetière d'Ixelles -> Relais -> Arcades -> Keym -> Vander Elst -> Calypso 2000 -> Les Trois Tilleuls -> Cerisaie -> Fauconnerie -> Wiener 


## Identidying busses

#### Trying to sperate out vehicles with similar timestamp make list of lists

In [131]:
# [ [ busses with same timestamp ], [busses with same timestamp], ... ]

In [130]:
for i, row in line_95_direc_4318.iterrows():
    print( type(row).__dict__ )
    break

{'__module__': 'pandas.core.series', '__annotations__': {'_name': 'Hashable', '_metadata': 'list[str]', '_mgr': 'SingleManager', 'div': 'Callable[[Series, Any], Series]', 'rdiv': 'Callable[[Series, Any], Series]', '_index': 'Index | None', 'index': 'Index'}, '__doc__': "\n    One-dimensional ndarray with axis labels (including time series).\n\n    Labels need not be unique but must be a hashable type. The object\n    supports both integer- and label-based indexing and provides a host of\n    methods for performing operations involving the index. Statistical\n    methods from ndarray have been overridden to automatically exclude\n    missing data (currently represented as NaN).\n\n    Operations between Series (+, -, /, *, **) align values based on their\n    associated index values-- they need not be the same length. The result\n    index will be the sorted union of the two indexes.\n\n    Parameters\n    ----------\n    data : array-like, Iterable, dict, or scalar value\n        Conta

In [16]:
line_95_direc_4318.loc[:, "bus_id"] = "-"
unique_times = line_95_direc_4318.timestamp.unique()
a_timestamp = unique_times[0]
idx = line_95_direc_4318.loc[:, "timestamp"] == a_timestamp
line_95_direc_4318[idx]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


Unnamed: 0,timestamp,lineId,directionId,distanceFromPoint,pointId,et,bus_id
0,1631606899996,95,4318,0,7104,2021-09-14 08:08:19.996,-
1,1631606899996,95,4318,0,1128,2021-09-14 08:08:19.996,-
2,1631606899996,95,4318,112,1233,2021-09-14 08:08:19.996,-
3,1631606899996,95,4318,34,1233,2021-09-14 08:08:19.996,-
4,1631606899996,95,4318,0,7104,2021-09-14 08:08:19.996,-


In [156]:
same_time95_lst_of_lst = [deepcopy(line_95_direc_4318[line_95_direc_4318.loc[:, "timestamp"] == a_timestamp]) 
                          for ts in unique_times]

#### A function that returns distance between 2 busses

In [25]:
ordered_list_of_stops_95 = [ stop.stop_id for stop in ordered_dict_v1["095b"] ]
# [ ('2278', 0), (2268, '1') ]
lst = list( zip(ordered_list_of_stops_95, range(len(ordered_list_of_stops_95)) ) )
stop_id_to_int = { key:val for key, val in lst }
stop_id_to_int

{'2278': 0,
 '2268': 1,
 '1703': 2,
 '1128': 3,
 '6354B': 4,
 '6433': 5,
 '1729': 6,
 '1233': 7,
 '1906': 8,
 '1909': 9,
 '4303': 10,
 '4304': 11,
 '4305': 12,
 '4306': 13,
 '4307': 14,
 '3558': 15,
 '4308': 16,
 '4309': 17,
 '4310': 18,
 '4311': 19,
 '1453': 20,
 '4313': 21,
 '4314': 22,
 '4315': 23,
 '4318': 24}

In [None]:
def compare_bus_tuples( tup1, tup2 ):
    # tupe1 convention: (distanceFromPoint, pointId)

In [None]:
def get_dist_tuple():
    

In [217]:
# def distance_btw_busses(bus_df, bus_df):
#     """
#     bus_df is a row in flattened csv
#     """
ordered_list_of_stops_95 = [ stop.stop_id for stop in ordered_dict_v1["095b"] ]
# [ ('2278', 0), (2268, '1') ]
lst = list( zip(ordered_list_of_stops_95, range(len(ordered_list_of_stops_95)) ) )
stop_id_to_int = { key:val for key, val in lst }



In [1]:
a = line_95_direc_4318.iloc[0, :]
a

NameError: name 'line_95_direc_4318' is not defined

In [202]:
b = line_95_direc_4318.iloc[2, :]
b

timestamp                         1631606899996
lineId                                       95
directionId                                4318
distanceFromPoint                           112
pointId                                    1233
et                   2021-09-14 08:08:19.996000
matching_list                                 -
bus_id                                        -
Name: 2, dtype: object

In [163]:
bus_id = 1
for i, same_times_lst in enumerate(same_time95_lst_of_lst):
    for j, row in same_times_lst.iterrows():
        if i == 0:
            row.bus_id = bus_id
            bus_id += 1
        else:
            # Comparing bus with busses in previos timestamp
            # Make sure order this innner list in descending succession wise and then match to closet 
            for prev_row in same_time95_lst_of_lst[i-1]:
                pass
    break

-1
for is working
-1
for is working
-1
for is working
-1
for is working
-1
for is working
-1
for is working
-1
for is working
else is working
timestamp                         1631606899996
lineId                                       95
directionId                                4318
distanceFromPoint                             0
pointId                                    7104
et                   2021-09-14 08:08:19.996000
matching_list                                 -
Name: 0, dtype: object
-1
for is working
-1
for is working
-1
for is working
-1
for is working
-1
for is working
-1
for is working
-1
for is working
else is working
timestamp                         1631606899996
lineId                                       95
directionId                                4318
distanceFromPoint                             0
pointId                                    1128
et                   2021-09-14 08:08:19.996000
matching_list                                 -
Name: 1, dtype: objec