In [348]:
import pandas as pd
import json

# reading the first json from the realtime data folder
parent_folder = 'washington_GTFS_data 2024.1.29-2024.2.11/washington_GTFS_data - Copy/output/'
trip_updates = 'RAIL_RT_TRIP_UPDATES/'

file_path = parent_folder + trip_updates + '2024_01_02_00_00_00.json'
try:
	with open(file_path, 'r') as file:
		tripJson = json.load(file)
except FileNotFoundError:
	print(f"File not found: {file_path}")

# extracting the links data from static data
links = pd.read_csv('static/links.csv')
stopTimes = pd.read_csv('static/stop_times.csv')
uniqueLinks = pd.read_csv('static/unique_links.csv')


In [349]:
def get_trip_data(tripJson: dict) -> pd.DataFrame:
    data = []
    for entity in tripJson['entity']:
        trip_update = entity.get('trip_update', {})
        stop_time_updates = trip_update.get('stop_time_update', [])
        for stop_time_update in stop_time_updates:
            data.append({
                'trip_id': trip_update['trip']['trip_id'],
                'stop_id': stop_time_update['stop_id'],
                'stop_sequence': stop_time_update['stop_sequence'],
                'arrival_time': stop_time_update.get('arrival', {}).get('time', None),
                'arrival_uncertainty': stop_time_update.get('arrival', {}).get('uncertainty', None),
                'departure_time': stop_time_update.get('departure', {}).get('time', None),
                'departure_uncertainty': stop_time_update.get('departure', {}).get('uncertainty', None),
            })

    currentStopTimes = pd.DataFrame(data)
    currentStopTimes['arrival_time'] = pd.to_datetime(currentStopTimes['arrival_time'], unit='s', errors='coerce')
    currentStopTimes['departure_time'] = pd.to_datetime(currentStopTimes['departure_time'], unit='s', errors='coerce')
    return currentStopTimes

def calculateLinkParams(currentStopTimes: pd.DataFrame, lengthdf: pd.DataFrame) -> pd.DataFrame:
    currentStopTimes = currentStopTimes.sort_values(by=['trip_id', 'stop_sequence'])
    next_stop_times = currentStopTimes.shift(-1)

    mask = currentStopTimes['trip_id'] == next_stop_times['trip_id']
    link_data = currentStopTimes[mask].copy()
    link_data['end_stop'] = next_stop_times['stop_id']
    link_data['end_sequence'] = next_stop_times['stop_sequence']
    link_data['end_time'] = next_stop_times['arrival_time']

    link_data['start_time'] = link_data['departure_time'].fillna(link_data['arrival_time'])
    link_data['real_time_taken [mins]'] = (link_data['end_time'] - link_data['start_time']).dt.total_seconds() / 60

    link_data = link_data.merge(lengthdf, left_on=['stop_id', 'end_stop'], right_on=['start_stop', 'end_stop'], how='left')
    link_data['real_speed [km/h]'] = link_data['length'] / (link_data['real_time_taken [mins]'] / 60)
    
    return link_data[['trip_id', 'stop_id', 'end_stop', 'stop_sequence', 'end_sequence', 'real_time_taken [mins]', 'start_time', 'end_time', 'length', 'real_speed [km/h]']].rename(columns={'stop_id': 'start_stop', 'stop_sequence': 'start_sequence'})

def calculateLinkKpis(currentLinks: pd.DataFrame, uniqueLinks: pd.DataFrame, stopTimes: pd.DataFrame) -> pd.DataFrame:
    for index, row in uniqueLinks.iterrows():
        link = currentLinks[(currentLinks['start_stop'] == row['start_stop']) & (currentLinks['end_stop'] == row['end_stop'])]

        mean_speed = link['real_speed [km/h]'].mean()
        covariance_speed = link['real_speed [km/h]'].std() / mean_speed if mean_speed else 0
        buffer_speed = link['real_speed [km/h]'].quantile(0.95) - link['real_speed [km/h]'].quantile(0.5)

        mean_time = link['real_time_taken [mins]'].mean()
        covariance_time = link['real_time_taken [mins]'].std() / mean_time if mean_time else 0
        buffer_time = link['real_time_taken [mins]'].quantile(0.95) - link['real_time_taken [mins]'].quantile(0.5)

        uniqueLinks.loc[index, 'mean_speed [km/h]'] = mean_speed
        uniqueLinks.loc[index, 'covariance_speed [%]'] = covariance_speed
        uniqueLinks.loc[index, 'buffer_speed [km/h]'] = buffer_speed
        uniqueLinks.loc[index, 'mean_time [mins]'] = mean_time
        uniqueLinks.loc[index, 'covariance_time [%]'] = covariance_time
        uniqueLinks.loc[index, 'buffer_time [mins]'] = buffer_time

        trips = stopTimes[stopTimes['stop_id'] == row['start_stop']].drop_duplicates(subset=['trip_id']).sort_values(by='departure_time')
        trips['departure_time'] = pd.to_datetime(trips['departure_time'], format='%H:%M:%S', errors='coerce')
        headways = trips['departure_time'].diff().dt.total_seconds().dropna() / 60

        if not headways.empty:
            mean_headway = headways.mean()
            covariance_headway = headways.std() / mean_headway if mean_headway else 0
            buffer_headway = headways.quantile(0.95) - headways.quantile(0.5)
            uniqueLinks.loc[index, 'mean_headway [mins]'] = mean_headway
            uniqueLinks.loc[index, 'covariance_headway [%]'] = covariance_headway
            uniqueLinks.loc[index, 'buffer_headway [mins]'] = buffer_headway
        else:
            uniqueLinks.loc[index, 'mean_headway [mins]'] = None
            uniqueLinks.loc[index, 'covariance_headway [%]'] = None
            uniqueLinks.loc[index, 'buffer_headway [mins]'] = None

    return uniqueLinks


In [350]:

currentStopTimes = get_trip_data(tripJson)
currentStopTimes

Unnamed: 0,trip_id,stop_id,stop_sequence,arrival_time,arrival_uncertainty,departure_time,departure_uncertainty
0,4982232_19743,PF_A15_C,1,NaT,,2024-02-01,0.0
1,4982232_19743,PF_A14_C,2,2024-02-01 00:03:12,0.0,NaT,
2,4982232_19743,PF_A13_C,3,2024-02-01 00:06:26,0.0,NaT,
3,4982232_19743,PF_A12_C,4,2024-02-01 00:08:52,0.0,NaT,
4,4982232_19743,PF_A11_C,5,2024-02-01 00:11:29,0.0,NaT,
...,...,...,...,...,...,...,...
3467,4982449_19743,PF_B07_C,23,2024-01-31 23:46:43,0.0,NaT,
3468,4982449_19743,PF_B08_C,24,2024-01-31 23:49:32,0.0,NaT,
3469,4982449_19743,PF_B09_C,25,2024-01-31 23:53:05,0.0,NaT,
3470,4982449_19743,PF_B10_C,26,2024-01-31 23:55:58,0.0,NaT,


In [351]:
uniqueLinks

Unnamed: 0,start_stop,end_stop,start_sequence,end_sequence,length,mean_speed [km/h],covariance_speed [%],buffer_speed [km/h],mean_time [mins],covariance_time [%],buffer_time [mins],mean_headway [mins],covariance_headway [%],buffer_headway [mins]
0,PF_A15_C,PF_A14_C,1,2,2.6710,40.065000,1.774532e-16,0.0,4.0,0.0,0.0,2.171030,0.594617,3.00
1,PF_A14_C,PF_A13_C,2,3,2.1048,42.096000,1.688916e-16,0.0,3.0,0.0,0.0,2.684783,0.605075,4.00
2,PF_A13_C,PF_A12_C,3,4,1.0988,32.964000,0.000000e+00,0.0,2.0,0.0,0.0,2.654428,0.656903,4.00
3,PF_A12_C,PF_A11_C,4,5,1.4536,29.072000,0.000000e+00,0.0,3.0,0.0,0.0,2.328897,0.579458,2.00
4,PF_A11_C,PF_A10_C,5,6,2.1612,32.418000,2.193121e-16,0.0,4.0,0.0,0.0,2.317490,0.577380,2.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
197,PF_N11_C,PF_N10_C,2,3,2.8733,34.479600,0.000000e+00,0.0,5.0,0.0,0.0,2.990544,0.902491,7.00
198,PF_N10_C,PF_N09_C,3,4,2.0799,31.198628,4.425913e-05,0.0,4.0,0.0,0.0,2.778022,0.795745,5.00
199,PF_N09_C,PF_N08_C,4,5,1.7224,34.447983,5.344577e-06,0.0,3.0,0.0,0.0,2.941452,0.909301,6.00
200,PF_N08_C,PF_N07_C,5,6,1.3477,26.954000,5.260984e-16,0.0,3.0,0.0,0.0,2.906977,0.817076,5.55


In [356]:
lengthdf = uniqueLinks[['start_stop','end_stop', 'length']]
lengthdf

Unnamed: 0,start_stop,end_stop,length
0,PF_A15_C,PF_A14_C,2.6710
1,PF_A14_C,PF_A13_C,2.1048
2,PF_A13_C,PF_A12_C,1.0988
3,PF_A12_C,PF_A11_C,1.4536
4,PF_A11_C,PF_A10_C,2.1612
...,...,...,...
197,PF_N11_C,PF_N10_C,2.8733
198,PF_N10_C,PF_N09_C,2.0799
199,PF_N09_C,PF_N08_C,1.7224
200,PF_N08_C,PF_N07_C,1.3477


In [353]:
currentLinks = calculateLinkParams(currentStopTimes, lengthdf)
currentLinks

Unnamed: 0,trip_id,start_stop,end_stop,start_sequence,end_sequence,real_time_taken [mins],start_time,end_time,length,real_speed [km/h]
0,4982165_19743,PF_A15_C,PF_A14_C,1,2.0,4.550000,2024-01-31 22:47:51,2024-01-31 22:52:24,2.6710,35.221978
1,4982165_19743,PF_A14_C,PF_A13_C,2,3.0,3.550000,2024-01-31 22:52:24,2024-01-31 22:55:57,2.1048,35.574085
2,4982165_19743,PF_A13_C,PF_A12_C,3,4.0,2.450000,2024-01-31 22:55:57,2024-01-31 22:58:24,1.0988,26.909388
3,4982165_19743,PF_A12_C,PF_A11_C,4,5.0,2.550000,2024-01-31 22:58:24,2024-01-31 23:00:57,1.4536,34.202353
4,4982165_19743,PF_A11_C,PF_A10_C,5,6.0,3.700000,2024-01-31 23:00:57,2024-01-31 23:04:39,2.1612,35.046486
...,...,...,...,...,...,...,...,...,...,...
3330,NR377,PF_D03_C,PF_D04_C,23,24.0,1.500000,2024-02-01 00:44:12,2024-02-01 00:45:42,0.3631,14.524000
3331,NR377,PF_D04_C,PF_D05_C,24,25.0,1.650000,2024-02-01 00:45:42,2024-02-01 00:47:21,0.5018,18.247273
3332,NR377,PF_D05_C,PF_D06_C,25,26.0,1.683333,2024-02-01 00:47:21,2024-02-01 00:49:02,0.5489,19.564752
3333,NR377,PF_D06_C,PF_D07_C,26,27.0,1.750000,2024-02-01 00:49:02,2024-02-01 00:50:47,0.6398,21.936000


In [354]:
# calculating the current unique links
currentUniqueLinks = currentLinks.drop_duplicates(subset=['start_stop', 'end_stop'])[['start_stop', 'end_stop', 'start_sequence', 'end_sequence']]
currentUniqueLinks.reset_index(drop=True, inplace=True)
currentUniqueLinks

Unnamed: 0,start_stop,end_stop,start_sequence,end_sequence
0,PF_A15_C,PF_A14_C,1,2.0
1,PF_A14_C,PF_A13_C,2,3.0
2,PF_A13_C,PF_A12_C,3,4.0
3,PF_A12_C,PF_A11_C,4,5.0
4,PF_A11_C,PF_A10_C,5,6.0
...,...,...,...,...
197,PF_D10_C,PF_D09_C,4,5.0
198,PF_D09_C,PF_D08_C,5,6.0
199,PF_K05_C,PF_K06_2,23,24.0
200,PF_K06_2,PF_K07_C,24,25.0


In [355]:
kpis = calculateLinkKpis(currentLinks, currentUniqueLinks, stopTimes)
kpis

Unnamed: 0,start_stop,end_stop,start_sequence,end_sequence,mean_speed [km/h],covariance_speed [%],buffer_speed [km/h],mean_time [mins],covariance_time [%],buffer_time [mins],mean_headway [mins],covariance_headway [%],buffer_headway [mins]
0,PF_A15_C,PF_A14_C,1,2.0,47.663518,0.553266,18.945980,3.957895,0.429820,1.673333,0.699202,1.722194,3.0
1,PF_A14_C,PF_A13_C,2,3.0,38.297121,0.053641,2.270418,3.306140,0.051140,0.253333,0.695092,2.034402,4.0
2,PF_A13_C,PF_A12_C,3,4.0,25.617347,0.174783,0.395511,2.816667,0.525070,0.843333,0.693440,2.087864,4.0
3,PF_A12_C,PF_A11_C,4,5.0,34.317370,0.138164,5.175448,2.572807,0.093682,0.101667,0.691978,1.817738,4.0
4,PF_A11_C,PF_A10_C,5,6.0,34.881789,0.173907,2.306700,3.993860,0.433855,1.183333,0.690141,1.816619,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
197,PF_D10_C,PF_D09_C,4,5.0,27.483472,0.123677,0.736360,2.175000,0.136564,0.640833,1.212219,1.730645,7.0
198,PF_D09_C,PF_D08_C,5,6.0,28.872150,0.167020,1.787677,4.433333,0.198288,1.967500,1.209003,1.498307,5.0
199,PF_K05_C,PF_K06_2,23,24.0,38.100171,0.025086,0.000000,3.345000,0.026786,0.155833,0.607875,1.666652,2.0
200,PF_K06_2,PF_K07_C,24,25.0,42.422700,0.077129,5.690850,3.430000,0.064536,0.000000,2.329218,1.508214,10.0
