In [3]:
import json
import numpy as np
import pandas as pd
import pickle
from copy import deepcopy
from datetime import timedelta
from pprint import pprint
from utils import extract_integer
from os import getcwd, listdir
from os.path import join, abspath, pardir
from functools import reduce

from pprint import pprint
import shapefile
import matplotlib.pyplot as plt

import plotly.express as px
import plotly.graph_objects as go

from typing import List, Set, Dict, Tuple, Optional, Union

## Utils Functions

In [4]:
def get_stop_dist_to_zero(line_str_code: str, varient: str, dist_dict: dict, stop_id_to_int):
    """
    :param str line_str_code: Line (vehicle & route code) for which to extract this information.
    :param str varient: One of the 2 directions on the line's route.
    dist_dict["095b"]['1']
    Distance to zero means distance in meters from any stop to first stop (zero).
    """
    prev_dist = 0

    # Since first stop is the zero/ ref point (Base Case)
    stop_dist_to_zero = {1: prev_dist}

    for pointId, istop_id in stop_id_to_int.items():

        # The stop 1 was catered for already.
        istop_id_adjusted = istop_id + 1

        stop_dist_to_zero[istop_id_adjusted] = prev_dist + dist_dict[line_str_code][varient][istop_id - 1]['distance']
        prev_dist = stop_dist_to_zero[istop_id_adjusted]

        # Not required till the last stop, plus, the code also breaks
        if istop_id == len(stop_id_to_int) - 1:
            break

    #     print( stop_dist_to_zero )
    return stop_dist_to_zero

def get_ordered_stops(stops_file_path: str):
    """
    :param str stops_file_path: path to shape file
    :returns:
        -ordered_dict_v1 (:py:class:`dict`) - Ordered stops of varient 1
        -ordered_dict_v2 (:py:class:`dict`) - Ordered stops of varient 2
    """
    stops_sf = shapefile.Reader(stops_file_path)

    # Separate stops so that when you group by lineID (succession) only stops of single direction are returned
    stops_v1 = [stop for stop in stops_sf.records() if stop.Variante == 1]
    stops_v2 = [stop for stop in stops_sf.records() if stop.Variante == 2]

    # Separate out stops related to a single line e.g. All stops of 95 in front of key '095b'
    ordered_dict_v1 = {}  # "095b": [stop_obj, stop_obj, stop_obj]
    for stop in stops_v1:
        if stop.Code_Ligne not in ordered_dict_v1:
            ordered_dict_v1[stop.Code_Ligne] = [s for s in stops_v1 if s.numero_lig == stop.numero_lig ]

    ordered_dict_v2 = {}
    for stop in stops_v2:
        if stop.Code_Ligne not in ordered_dict_v2:
            ordered_dict_v2[stop.Code_Ligne] = [s for s in stops_v2 if s.numero_lig == stop.numero_lig ]

    # Sort the stops according to succession
    for Code_Ligne in ordered_dict_v1.keys():
        ordered_dict_v1[Code_Ligne].sort(key = lambda x: x.succession)

    for Code_Ligne in ordered_dict_v2.keys():
        ordered_dict_v2[Code_Ligne].sort(key = lambda x: x.succession)

    return ordered_dict_v1, ordered_dict_v2

In [1]:
# AR example
from statsmodels.tsa.ar_model import AutoReg
from random import random
# contrived dataset
data = [x + random() for x in range(1, 100)]
# fit model
model = AutoReg(data, lags=1)
model_fit = model.fit()
# make prediction
yhat = model_fit.predict(len(data), len(data))
print(yhat)

[100.07772216]


In [5]:
parent_dir = abspath(join(getcwd(), pardir))
shape_files_dir = join(parent_dir, "data/2109_STIB_MIVB_Network_shapefiles")
line_shape_file = join(shape_files_dir, "ACTU_LINES")
stops_shape_file = join(shape_files_dir, "ACTU_STOPS")
dist_file = join(parent_dir, "distances.json")

trajectories_dir = join(parent_dir, "data_trajectories")
plot_dir = join(parent_dir, "plots_plotly")

Setup

In [6]:
line_varient = 1
line_str_code = '001m'
direction_id = 8161

ordered_stops_v1, ordered_stops_v2 = get_ordered_stops( stops_file_path=stops_shape_file )

if line_varient == 1:
    ordered_list_of_stops_in_lineId = [extract_integer(stop.stop_id) for stop in ordered_stops_v1[line_str_code]]
else:
    ordered_list_of_stops_in_lineId = [extract_integer(stop.stop_id) for stop in ordered_stops_v2[line_str_code]]
lst = list(zip(ordered_list_of_stops_in_lineId, range(1, len(ordered_list_of_stops_in_lineId) + 1)))
stop_id_to_int = {key: val for key, val in lst}
# stop_dist_to_zero = get_stop_dist_to_zero(line_str_code, str(line_varient), dist_dict, stop_id_to_int)

In [7]:
df_traj = pd.read_csv(f"{join(trajectories_dir, line_str_code)}_{direction_id}_{line_varient}.csv")
df_traj = df_traj[ df_traj["bus_id"] != '-' ]
df_traj["bus_id"] = df_traj["bus_id"].astype(int)
df_traj.loc[:, "et"] = df_traj.et.apply(lambda x: pd.to_datetime(x))