In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

In [None]:
# read data
cwd = os.getcwd()
filepath = os.path.join(cwd, 'Allegheny_sample_xd_part1', 'Allegheny_sample_xd_part1.csv')
df = pd.read_csv(filepath)
filepath = os.path.join(cwd, 'Allegheny_sample_xd_part1', 'XD_Identification.csv')
df_xd = pd.read_csv(filepath)


In [None]:
df_xd.head()

In [None]:
# merge tt with id data
df_merge = df.merge(df_xd, how='inner', left_on='xd_id', right_on='xd')

In [None]:
df_merge.head()

In [None]:
def add_time_cols(df, datetime_col):
    df = df.copy()
    df['month'] = df[datetime_col].dt.month
    df['day'] = df[datetime_col].dt.day
    df['time'] = df[datetime_col].dt.time
    df['hour'] = df[datetime_col].dt.hour
    df['minute'] = df[datetime_col].dt.minute
    return df

In [None]:
# get variation across x hour period 
def intraday_variation(df, frc, hour_start, hour_end):
    df = df.copy()
    df = df[df['frc'] == frc]
    condition = (df.day == 1) & (df.hour.between(hour_start,hour_end-1))  # day = 1 for a single representative day
    df_time = df[condition]
    df_grouped = df_time.groupby('measurement_tstamp').mean().reset_index()  # for the given frc, take the mean travel time at a timestamp
    # make plot
    fig, ax = plt.subplots()
    ax.plot(df_grouped.measurement_tstamp, df_grouped.travel_time_seconds)
    ax.set_title('FRC =' + str(frc))
    ax.set_xlabel('time')
    ax.set_ylabel('travel time (s)')
    ax.tick_params(axis='x', rotation=90)

In [None]:
# add datetime columns
df_merge['measurement_tstamp'] = pd.to_datetime(df_merge['measurement_tstamp']) 
df_merge = add_time_cols(df_merge, 'measurement_tstamp')
for frc in range(1,2): # df_merge['frc'].unique().tolist():
    print(frc)
    intraday_variation(df_merge, frc, 7, 9)

In [None]:
def day2day_variation(df, frc, hour, minute):
    df = df.copy()
    df = df[df['frc'] == frc] # subset by frc
    df = df[(df['hour']==hour) & (df['minute']==minute)]  # subset by hour and minute
    df_day = df.groupby('day')[['measurement_tstamp','travel_time_seconds']].mean().sort_values(by='travel_time_seconds', ascending=True)
    #df_day.reset_index(inplace=True)
    #fig,ax = plt.subplots()
    #ax.scatter(x=df_day.index, y=df_day['travel_time_seconds'])
    minTT = df_day['travel_time_seconds'].min()
    maxTT = df_day['travel_time_seconds'].max()
    reliability_ratio = maxTT/minTT
    #print(reliability_ratio)
    return(reliability_ratio)

In [None]:
start_time = 7
end_time = 9
frc_hr_min = [(frc,hr,min) for frc in range(1,2) for hr in [start_time, end_time-1] for min in range(0,60,5)]
rel_ratio = [day2day_variation(df_merge, frc, hr, min) for frc in range(1,2) for hr in [start_time, end_time-1] for min in range(0,60,5)]

In [None]:
# now find reliability i.e. 95th percentile travel time. use all 10 days
#day2day_variation(df_merge)

# test for 7:30
hour = 7
minute = 30
df_specific_time = df_merge[(df_merge['hour']==7) & (df_merge['minute']==30)]
df_specific_time.head()


In [None]:
df_specific_time = df_specific_time[df_specific_time['frc'] == frc]
df_day = df_specific_time.groupby('day')[['measurement_tstamp','travel_time_seconds']].mean().sort_values(by='travel_time_seconds', ascending=True)
#df_day.reset_index(inplace=True)
fig,ax = plt.subplots()
ax.scatter(x=df_day.index, y=df_day['travel_time_seconds'])
minTT = df_day['travel_time_seconds'].min()
maxTT = df_day['travel_time_seconds'].max()
reliability_ratio = maxTT/minTT
print(reliability_ratio)


In [None]:
condition = (df_test.day == 1) & (df_test.hour.between(7,9)) 
df_79 = df_test[condition]
df_79['smoothed_tt'] = df_79['travel_time_seconds'].rolling(5).mean()
fig, ax = plt.subplots()
ax.plot(df_79.measurement_tstamp, df_79.smoothed_tt)

In [None]:
fig, ax = plt.subplots()
ax.plot(df_79.measurement_tstamp, df_79.travel_time_seconds)

In [None]:
df_merge['measurement_tstamp'] = pd.to_datetime(df_merge['measurement_tstamp']) 
df = add_time_cols(df_merge, 'measurement_tstamp')

In [None]:
# choose xd_id == 133749395
# choose a single time i.e. 08:00:00
# plot x = day, y = travel_time 