# Notebook Overview

1. **Importing Libraries and Configurations**:
    - Import necessary libraries such as `polars`, `datetime`, and `json`.
    - Configure `polars` settings for better performance and display.

2. **Data Loading and Preprocessing**:
    - Load route mappings to map route IDs to their respective names.
    - Read and preprocess the main dataset from a Parquet file, replacing route IDs with their mapped names.
    - Load and preprocess additional data from JSON files, including stops and patterns data.

3. **Data Merging and Transformation**:
    - Merge stops data with the main dataset to get stop names for `nextStopID` and `lastStopID`.
    - Filter the dataset to include only relevant records and add new columns such as `stopChanged` and `timeDiff`.

4. **Computing Permutations**:
    - Define a function to compute permutations for each route, calculating time differences between stops.
    - Save the resulting dataset to a Parquet file for further analysis.

5. **Data Analysis**:
    - Extract and display specific columns from the processed dataset for analysis.
    - Filter and analyze data for specific routes and stops.

6. **Alternative Approaches**:
    - Document an alternative method to compute time differences between stops using the `shift` method (commented out).

This notebook provides a comprehensive workflow for processing and analyzing transportation data, enabling detailed insights into bus routes and stop timings.

In [2]:
import polars as pl
from datetime import datetime
import json
import datetime

pl.enable_string_cache()
pl.Config().set_tbl_cols(100)
pl.Config().set_tbl_rows(25)

polars.config.Config

In [3]:
%config InteractiveShell.ast_node_interactivity = 'last_expr_or_assign'

In [4]:
#Map route id to correct route name
route_mapping = {
    3: "2L",
    4: "2R",
    33: "3",
    17: "10",
    18: "11",
    23: "12",
    12: "16",
    13: "17",
    14: "18",
    30: "19",
    29: "21",
    38: "21 Tripper",
    777: "777"
}

{3: '2L',
 4: '2R',
 33: '3',
 17: '10',
 18: '11',
 23: '12',
 12: '16',
 13: '17',
 14: '18',
 30: '19',
 29: '21',
 38: '21 Tripper',
 777: '777'}

In [5]:
df = pl.read_parquet("./data/2024-09-entries.parquet")
df = df.with_columns(pl.col("routeID").replace_strict(route_mapping))

routeID,patternID,equipmentID,tripID,lat,lng,load,capacity,eLoad,blockID,nextStopID,nextStopETA,nextPatternStopID,h,lastStopID,lastPatternStopID,scheduleNumber,inService,onSchedule,trainID,receiveTime,aID,captureTime,direction,seq,lastStopExtID,nextStopExtID,nextStopPctProg,atStop,__index_level_0__
str,i64,str,i64,f64,f64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,str,bool,f64,i64,datetime[ns],str,datetime[ns],cat,i64,i64,i64,f64,bool,i64
"""777""",33,"""1001""",,41.93536,-88.77044,0,0,4,0,0,-1,0,197,491,11402,"""NIS""",false,,0,2024-08-31 23:59:49,"""35467605078ef4d""",2024-09-01 00:00:01,,-1,-1,-1,,false,13
"""777""",9998,"""1002""",,41.93484,-88.72824,0,0,9,0,0,-1,0,78,0,0,"""NIS""",false,,0,2024-08-31 15:14:57,"""3546760508796c6""",2024-09-01 00:00:01,,-1,-1,-1,,false,14
"""777""",0,"""1003""",,41.93277,-88.76676,0,0,0,0,0,-1,0,0,0,0,"""NIS""",false,,0,2024-08-30 04:47:18,"""3546760504518c0""",2024-09-01 00:00:01,,-1,-1,-1,,false,15
"""777""",9998,"""1004""",,41.93481,-88.7283,0,0,15,0,0,-1,0,0,0,0,"""NIS""",false,,0,2024-08-31 09:43:05,"""35467605056be89""",2024-09-01 00:00:01,,-1,-1,-1,,false,16
"""777""",9998,"""1101""",,41.93486,-88.7282,0,0,0,125,0,-1,0,351,0,0,"""NIS""",false,,0,2024-08-31 05:17:48,"""3546760504edeed""",2024-09-01 00:00:01,,-1,-1,-1,,false,17
"""17""",13,"""1102""",10058,41.93558,-88.76723,0,0,237,0,806,1139,10612,2,805,10611,"""8:00:00-17""",true,-658.0,11606,2024-08-31 11:36:01,"""35467605059a8e2""",2024-09-01 00:00:01,"""Outbound""",2,805,806,100.0,true,18
"""777""",9998,"""1103""",,41.93481,-88.72816,0,0,0,0,0,-1,0,47,0,0,"""NIS""",false,,0,2024-08-31 02:19:58,"""35467605044c796""",2024-09-01 00:00:01,,-1,-1,-1,,false,19
"""777""",0,"""1401""",,41.93539,-88.77016,0,0,0,0,0,-1,0,0,0,0,"""NIS""",false,,0,2024-08-01 19:06:17,"""3546760504ac22e""",2024-09-01 00:00:01,,-1,-1,-1,,false,20
"""777""",0,"""1701""",,41.9348,-88.72831,0,0,0,0,0,-1,0,0,0,0,"""NIS""",false,,0,2024-08-10 04:51:32,"""3546760504a404a""",2024-09-01 00:00:01,,-1,-1,-1,,false,21
"""777""",9998,"""1702""",,41.93459,-88.72733,0,0,0,0,0,-1,0,7,0,0,"""NIS""",false,,0,2024-08-30 19:58:30,"""3546760505ac39c""",2024-09-01 00:00:01,,-1,-1,-1,,false,22


In [6]:
#Load stops data
file = open("./data/stops.json", "r")
stopsData = json.load(file)

stops = pl.DataFrame(stopsData['get_stops'])

#Load patterns data
pattern_mapping = {
    3: "2L",
    4: "2R",
    37: "3",
    17: "10",
    18: "11",
    23: "12",
    12: "16",
    13: "17",
    14: "18",
    33: "19",
    46: "21",
    45: "21 Tripper",
}

#Load patterns json
file = open("./data/patterns.json", "r")
patternsData = json.load(file)

patterns = pl.DataFrame(patternsData['get_patterns'])
patterns = patterns.with_columns(pl.col("id").replace_strict(pattern_mapping, default="None"))

id,name,extID,type,length,color,encLine,decLine,routes,routeNames,stations,stopIDs
str,str,str,i64,f64,str,str,list[null],list[i64],list[str],list[null],list[i64]
"""2L""","""Route 2L (Full Service)""","""2""",2,5.156097,"""#097138""","""eq}~Fxix|OM?a@@}A??fE?nEAbE?h@…",[],[3],"[""Route 2L""]",[],"[433, 465, … 820]"
"""2R""","""Route 2R Full Service""","""1""",1,5.083211,"""#FFD600""","""qk}~Fn{x|Ow@C{BhAoBp@Em@G}G@_I…",[],[4],"[""Route 2R""]",[],"[431, 451, … 432]"
"""3""","""Route 3 Full Service""","""3""",2,2.793036,"""#58F964""","""kq}~Fxix|Oa@??@sA?@?C?A}C???aC…",[],[33],"[""Route 3""]",[],"[431, 493, … 431]"
"""10""","""Route 10 Full Service""","""10""",3,5.564219,"""#782BC9""","""{q}~Fxix|OuBACfF@jH@xClFkBj@Sj…",[],[17],"[""Route 10""]",[],"[477, 432, … 431]"
"""11""","""Route 11 Full Service""","""11""",2,12.793737,"""#A07D5C""","""mq}~F~ix|O}A?QECKEaPAoACmQGGeR…",[],[18],"[""Route 11""]",[],"[492, 431, … 491]"
"""12""","""Route 12 Full Service""","""12""",2,39.210159,"""#3C8DBC""","""{~|~Fzex|O?dCrIA@qE@}Bx@u}@@mF…",[],[23],"[""Route 12""]",[],"[809, 453, … 575]"
"""16""","""Route 16 Full Service""","""16""",3,4.9,"""#A64598""","""y~|~Ftcx|OAfErI@@}H?W~@cbAFmAD…",[],[12],"[""Route 16""]",[],"[809, 536, … 811]"
"""17""","""Route 17 Full Service""","""17""",2,11.542563,"""#3E5BA6""","""ap}~Fjkx|OHOAYQGu@B_B?K}]?_M`B…",[],[13],"[""Route 17""]",[],"[805, 806, … 805]"
"""18""","""Route 18 Full Service""","""18""",4,14.597877,"""#FFAA00""","""}i`_G|no|OoA}Aa@]gAKCB[~BUp@c@…",[],[14],"[""Route 18""]",[],"[676, 835, … 685]"
"""19""","""Route 19 Full Service""","""19""",4,21.596067,"""#34ADE0""","""cm}~Fj_y|Ol@CRUFW?_@??ESOQ??WI…",[],[30],"[""Route 19""]",[],"[819, 528, … 819]"


In [7]:
stops.rename({"id": "nextStopID"}).select(["nextStopID", "name"]).unique().sort("nextStopID")

#Left join stops to get stop names for nextStopID and lastStopID
df = df.join(stops.rename({"id": "nextStopID"}).select(["nextStopID", "name"]).unique(), on="nextStopID", how="left").rename({"name": "nextStopName"})
df = df.join(stops.rename({"id": "lastStopID"}).select(["lastStopID", "name"]).unique(), on="lastStopID", how="left").rename({"name": "lastStopName"})

routeID,patternID,equipmentID,tripID,lat,lng,load,capacity,eLoad,blockID,nextStopID,nextStopETA,nextPatternStopID,h,lastStopID,lastPatternStopID,scheduleNumber,inService,onSchedule,trainID,receiveTime,aID,captureTime,direction,seq,lastStopExtID,nextStopExtID,nextStopPctProg,atStop,__index_level_0__,nextStopName,lastStopName
str,i64,str,i64,f64,f64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,str,bool,f64,i64,datetime[ns],str,datetime[ns],cat,i64,i64,i64,f64,bool,i64,str,str
"""777""",33,"""1001""",,41.93536,-88.77044,0,0,4,0,0,-1,0,197,491,11402,"""NIS""",false,,0,2024-08-31 23:59:49,"""35467605078ef4d""",2024-09-01 00:00:01,,-1,-1,-1,,false,13,,"""Normal Rd. at Northern Lane"""
"""777""",9998,"""1002""",,41.93484,-88.72824,0,0,9,0,0,-1,0,78,0,0,"""NIS""",false,,0,2024-08-31 15:14:57,"""3546760508796c6""",2024-09-01 00:00:01,,-1,-1,-1,,false,14,,
"""777""",0,"""1003""",,41.93277,-88.76676,0,0,0,0,0,-1,0,0,0,0,"""NIS""",false,,0,2024-08-30 04:47:18,"""3546760504518c0""",2024-09-01 00:00:01,,-1,-1,-1,,false,15,,
"""777""",9998,"""1004""",,41.93481,-88.7283,0,0,15,0,0,-1,0,0,0,0,"""NIS""",false,,0,2024-08-31 09:43:05,"""35467605056be89""",2024-09-01 00:00:01,,-1,-1,-1,,false,16,,
"""777""",9998,"""1101""",,41.93486,-88.7282,0,0,0,125,0,-1,0,351,0,0,"""NIS""",false,,0,2024-08-31 05:17:48,"""3546760504edeed""",2024-09-01 00:00:01,,-1,-1,-1,,false,17,,
"""17""",13,"""1102""",10058,41.93558,-88.76723,0,0,237,0,806,1139,10612,2,805,10611,"""8:00:00-17""",true,-658.0,11606,2024-08-31 11:36:01,"""35467605059a8e2""",2024-09-01 00:00:01,"""Outbound""",2,805,806,100.0,true,18,"""Lucinda Ave at Wirtz Dr Stop #…","""Holmes Student Center Stop #73…"
"""777""",9998,"""1103""",,41.93481,-88.72816,0,0,0,0,0,-1,0,47,0,0,"""NIS""",false,,0,2024-08-31 02:19:58,"""35467605044c796""",2024-09-01 00:00:01,,-1,-1,-1,,false,19,,
"""777""",0,"""1401""",,41.93539,-88.77016,0,0,0,0,0,-1,0,0,0,0,"""NIS""",false,,0,2024-08-01 19:06:17,"""3546760504ac22e""",2024-09-01 00:00:01,,-1,-1,-1,,false,20,,
"""777""",0,"""1701""",,41.9348,-88.72831,0,0,0,0,0,-1,0,0,0,0,"""NIS""",false,,0,2024-08-10 04:51:32,"""3546760504a404a""",2024-09-01 00:00:01,,-1,-1,-1,,false,21,,
"""777""",9998,"""1702""",,41.93459,-88.72733,0,0,0,0,0,-1,0,7,0,0,"""NIS""",false,,0,2024-08-30 19:58:30,"""3546760505ac39c""",2024-09-01 00:00:01,,-1,-1,-1,,false,22,,


In [8]:
#Filter out the subset of data we want to work with
df = df.filter(
    (pl.col("inService"))
)

#Add stopChanged column
df = df.with_columns(
    (
        (pl.col("lastStopID") == pl.col("nextStopID").shift(1)).over(
            "equipmentID", order_by="receiveTime"
        )
    ).alias("stopChanged")
)

df = df.filter(pl.col("stopChanged")).with_columns(
    (pl.col("nextStopID").shift(1) == pl.col("lastStopID"))
    .over("equipmentID", order_by="receiveTime")
    .alias("nextToLast")
)

#Add timeDiff column
df = df.with_columns(
    (-pl.col("receiveTime").diff(-1).over("equipmentID", order_by="receiveTime")).alias(
        "timeDiff"
    )
).filter(pl.col("nextToLast"))


routeID,patternID,equipmentID,tripID,lat,lng,load,capacity,eLoad,blockID,nextStopID,nextStopETA,nextPatternStopID,h,lastStopID,lastPatternStopID,scheduleNumber,inService,onSchedule,trainID,receiveTime,aID,captureTime,direction,seq,lastStopExtID,nextStopExtID,nextStopPctProg,atStop,__index_level_0__,nextStopName,lastStopName,stopChanged,nextToLast,timeDiff
str,i64,str,i64,f64,f64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,str,bool,f64,i64,datetime[ns],str,datetime[ns],cat,i64,i64,i64,f64,bool,i64,str,str,bool,bool,duration[ns]
"""19""",33,"""603""",10252,41.94231,-88.77307,0,0,8,0,616,1143,11390,360,709,11389,"""33-0831_1""",true,-4.0,11681,2024-09-01 00:03:09,"""35467605059b849""",2024-09-01 00:03:17,"""Outbound""",4,709,616,0.0,true,357,"""Annie Glidden Rd. at Varsity D…","""Annie Glidden Rd. & Loren Dr. …",true,true,20s
"""19""",33,"""603""",10252,41.94478,-88.77306,0,0,8,0,482,1144,11391,0,616,11390,"""33-0831_1""",true,-4.0,11681,2024-09-01 00:03:29,"""35467605059b849""",2024-09-01 00:03:32,"""Outbound""",5,616,482,26.68739,false,384,"""Blackhawk Rd. at Kimberly Dr.""","""Annie Glidden Rd. at Varsity D…",true,true,50s
"""19""",33,"""603""",10252,41.94629,-88.76997,0,0,8,0,483,1144,11392,2,482,11391,"""33-0831_1""",true,-4.0,11681,2024-09-01 00:04:19,"""35467605059b849""",2024-09-01 00:04:33,"""Outbound""",6,482,483,0.0,true,492,"""Blackhawk Rd. at Edgebrook Dr.""","""Blackhawk Rd. at Kimberly Dr.""",true,true,1m 10s
"""21""",46,"""72309""",,41.99158,-88.68532,0,0,0,0,764,1145,11448,9,763,11447,"""19:00:00-21""",true,-2.0,11696,2024-09-01 00:04:33,"""354676050c83bea""",2024-09-01 00:04:48,"""Outbound""",4,763,764,0.0,true,537,"""Lucas St. at Opportunity House""","""Maple St. at Opportunity House…",true,true,1m 10s
"""2L""",3,"""Old_1501""",329,41.93639,-88.7736,0,0,33,0,465,1145,10482,271,433,10481,"""19:00:00-2""",true,-2.0,11414,2024-09-01 00:04:41,"""354676050595540""",2024-09-01 00:04:48,"""Outbound""",4,433,465,0.0,true,538,"""Stadium Dr. at New Hall""","""Lincoln Hall""",true,true,40s
"""16""",12,"""72307""",10046,41.93175,-88.75342,0,0,47,0,624,1146,10598,112,623,10597,"""12-0831_1""",true,-1.0,11594,2024-09-01 00:04:48,"""354676050c83bf0""",2024-09-01 00:05:03,"""Outbound""",4,623,624,0.0,true,563,"""Locust St. at 6th St.""","""Locust St. at 2nd St.""",true,true,2m 20s
"""2L""",3,"""Old_1501""",329,41.93746,-88.7754,0,0,33,0,466,1146,10483,0,465,10482,"""19:00:00-2""",true,-2.0,11414,2024-09-01 00:05:21,"""354676050595540""",2024-09-01 00:05:33,"""Outbound""",5,465,466,0.0,true,619,"""Grant North""","""Stadium Dr. at New Hall""",true,true,1m
"""21""",46,"""72309""",,41.99374,-88.68572,0,0,0,0,765,1147,11449,100,764,11448,"""19:00:00-21""",true,-2.0,11696,2024-09-01 00:05:43,"""354676050c83bea""",2024-09-01 00:05:49,"""Outbound""",5,764,765,0.0,true,645,"""State St. at Sycamore Post Off…","""Lucas St. at Opportunity House""",true,true,1m 40s
"""10""",17,"""904""",10159,41.93984,-88.77303,0,0,101,0,529,1146,10691,360,528,10690,"""17-0831_1""",true,-4.0,11166,2024-09-01 00:05:45,"""35467605084242c""",2024-09-01 00:05:49,"""Outbound""",4,528,529,0.0,true,633,"""Varsity Blvd. at Pappas""","""Annie Glidden Rd at Crane Dr""",true,true,50s
"""19""",33,"""603""",10252,41.94709,-88.76576,0,0,8,0,824,1145,11396,181,486,11395,"""33-0831_1""",true,-3.0,11681,2024-09-01 00:05:59,"""35467605059b849""",2024-09-01 00:06:04,"""Outbound""",10,486,824,100.0,false,654,"""Normal Rd at NIU School of Nur…","""Ridge Rd. at Normal Rd.""",true,true,2m 10s


In [10]:
def compute_permutations():
    mega_df = None
    for route in route_mapping.values():

        # Skip any inactive routes
        if(route == "777"):
            continue

        subset = df.filter(pl.col('routeID') == route)
        stop_ids = patterns.filter(pl.col("id") == route)['stopIDs'][0]

        print(f"[!] Computing permutations for route: {route} with stop ids len: {len(stop_ids)}")

        for i in range(len(stop_ids)):
            stop1 = stop_ids[i]
            df1 = subset.filter(pl.col('lastStopID') == stop1)
            
            for j in range(1, 6):
                stop2 = stop_ids[(i + j) % len(stop_ids)]
                df2 = subset.filter(pl.col('lastStopID') == stop2)
                
                df2 = df2.with_columns(
                    pl.col("receiveTime").alias("receiveTime_right"), 
                    pl.col('lastStopID').alias('nextStopID_actual'),
                    pl.col('lastStopName').alias('nextStopName_actual')
                )
       
                joined_df = df1.join_asof(df2, on="receiveTime", by='equipmentID', strategy='forward')
                
                joined_df = joined_df.with_columns(
                    (pl.col("receiveTime_right") - pl.col("receiveTime")).alias(f"eta")
                )
                
                joined_df.drop(["scheduleNumber_right", "nextStopName_right", "nextStopID_right", "aID_right", "trainID_right", "onSchedule_right"], strict=True)
                
                #Add Time Of Day and Day of Week columns
                joined_df = joined_df.with_columns(
                    pl.col("receiveTime").dt.hour().alias("hour_of_day"),
                    pl.col("receiveTime").dt.minute().alias("minute_of_hour"),
                    pl.col("receiveTime").dt.weekday().alias("day_of_week"),
                    pl.col("eta").dt.total_seconds().alias("eta_seconds")
                )

                if mega_df is None:
                    mega_df = joined_df
                else:
                    mega_df = mega_df.vstack(joined_df)
    
    return mega_df

mega_df = compute_permutations()

mega_df.write_parquet('./data/mega_df.parquet')

print("[X] Succesfully wrote mega_df to disk")

[!] Computing permutations for route: 2L with stop ids len: 22
[!] Computing permutations for route: 2R with stop ids len: 23
[!] Computing permutations for route: 3 with stop ids len: 20
[!] Computing permutations for route: 10 with stop ids len: 33
[!] Computing permutations for route: 11 with stop ids len: 64
[!] Computing permutations for route: 12 with stop ids len: 15
[!] Computing permutations for route: 16 with stop ids len: 16
[!] Computing permutations for route: 17 with stop ids len: 43
[!] Computing permutations for route: 18 with stop ids len: 36
[!] Computing permutations for route: 19 with stop ids len: 58
[!] Computing permutations for route: 21 with stop ids len: 29
[!] Computing permutations for route: 21 Tripper with stop ids len: 10
[X] Succesfully wrote mega_df to disk


In [12]:
mega_df['routeID', 'equipmentID', 'lastStopID', 'nextStopID_actual', 'lastStopName', 'nextStopName_actual',  'receiveTime', 'receiveTime_right', 'eta', 'day_of_week', 'hour_of_day', 'minute_of_hour']

routeID,equipmentID,lastStopID,nextStopID_actual,lastStopName,nextStopName_actual,receiveTime,receiveTime_right,eta,day_of_week,hour_of_day,minute_of_hour
str,str,i64,i64,str,str,datetime[ns],datetime[ns],duration[ns],i8,i8,i8
"""2L""","""Old_1501""",433,465,"""Lincoln Hall""","""Stadium Dr. at New Hall""",2024-09-01 00:04:41,2024-09-01 00:05:21,40s,7,0,4
"""2L""","""1103""",433,465,"""Lincoln Hall""","""Stadium Dr. at New Hall""",2024-09-01 00:34:35,2024-09-01 00:35:16,41s,7,0,34
"""2L""","""1103""",433,465,"""Lincoln Hall""","""Stadium Dr. at New Hall""",2024-09-01 01:03:34,2024-09-01 01:04:25,51s,7,1,3
"""2L""","""1103""",433,465,"""Lincoln Hall""","""Stadium Dr. at New Hall""",2024-09-01 01:33:46,2024-09-01 01:34:37,51s,7,1,33
"""2L""","""1103""",433,465,"""Lincoln Hall""","""Stadium Dr. at New Hall""",2024-09-01 02:04:05,2024-09-01 02:04:35,30s,7,2,4
"""2L""","""1103""",433,465,"""Lincoln Hall""","""Stadium Dr. at New Hall""",2024-09-01 02:33:37,2024-09-01 02:34:37,1m,7,2,33
"""2L""","""1103""",433,465,"""Lincoln Hall""","""Stadium Dr. at New Hall""",2024-09-01 03:02:47,2024-09-01 03:03:48,1m 1s,7,3,2
"""2L""","""1103""",433,465,"""Lincoln Hall""","""Stadium Dr. at New Hall""",2024-09-01 03:34:16,2024-09-01 03:35:16,1m,7,3,34
"""2L""","""1103""",433,465,"""Lincoln Hall""","""Stadium Dr. at New Hall""",2024-09-01 04:04:30,2024-09-01 04:05:40,1m 10s,7,4,4
"""2L""","""1103""",433,465,"""Lincoln Hall""","""Stadium Dr. at New Hall""",2024-09-01 04:33:34,2024-09-01 04:34:14,40s,7,4,33


In [2]:
mega_df.filter(pl.col('eta_seconds') < 0).count()

NameError: name 'mega_df' is not defined

In [None]:
twoLStopId = patterns.filter(pl.col("id") == "2L")['stopIDs'][0]


433
465
466
467
468
436
437
438
439
440
441


### Another way to compute n stops ahead with some caviates

In [None]:
# # Approach #2 - Using the 'shift' method to calculate the time difference between two stops (Not recommended)
# # Calcuate the time difference between going to Linchon hall to Stevenson South
# shiftVal = -3
# startingStop = 465
# subset = df.filter(pl.col("routeID") == "2L")

# # Ensure the data is sorted by 'equipmentID' and 'receiveTime'
# df = subset.sort(["equipmentID", "receiveTime"])

# # Group by 'equipmentID' and shift the 'receiveTime' column by 3 to get the time three stops ahead
# df = df.with_columns([
#     pl.col("receiveTime").shift(shiftVal).over("equipmentID").alias("receiveTime_3_stops_ahead"),
#     pl.col("nextStopID").shift(shiftVal).over("equipmentID").alias("nextStopID_3_stops_ahead"),
#     pl.col("lastStopName").shift(shiftVal).over("equipmentID").alias("lastStopName_3_stops_ahead")
# ])

# # Join with stops to get the name of the stop 3 stops ahead
# df = df.join(stops.rename({"id": "nextStopID_3_stops_ahead"}).select(["nextStopID_3_stops_ahead", "name"]).unique(), on="nextStopID_3_stops_ahead", how="left").rename({"name": "nextStopName_3_ahead"})

# # Filter the rows where 'nextStopID' is 433 to calculate the time difference
# df_filtered = df.filter(pl.col("nextStopID") == startingStop)

# df_filtered = df_filtered.with_columns([
#     pl.col("receiveTime").dt.convert_time_zone("America/Chicago").alias("receiveTime"),
#     pl.col("receiveTime_3_stops_ahead").dt.convert_time_zone("America/Chicago").alias("receiveTime_3_stops_ahead")
# ])

# # Calculate the time difference between the current stop and three stops ahead
# df_filtered = df_filtered.with_columns([
#     (pl.col("receiveTime_3_stops_ahead") - pl.col("receiveTime")).alias("timeDiff_3_stops_ahead")
# ])

# # Display the result
# df_filtered['routeID', 'equipmentID', 'lastStopName', 'nextStopName_3_ahead', 'receiveTime', 'receiveTime_3_stops_ahead', 'timeDiff_3_stops_ahead']