In [6]:
precovid_duration = r"C:\Users\athen\Documents\GitHub\TCD_Masters_Dissertation\final_code\metrics\precovid\durationsdf_precovid.csv"
lockdown_duration = r"C:\Users\athen\Documents\GitHub\TCD_Masters_Dissertation\final_code\metrics\lockdown\durationsdf_lockdown.csv"
recovery_duration = r"C:\Users\athen\Documents\GitHub\TCD_Masters_Dissertation\final_code\metrics\recovery\durationsdf_recovery.csv"
postcovid_duration = r"C:\Users\athen\Documents\GitHub\TCD_Masters_Dissertation\final_code\metrics\postcovid\durationsdf_postcovid.csv"



# HEADWAYS

In [7]:
import pandas as pd

# File paths
precovid_headways = r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\precovid\headwaydf_precovid.csv"
lockdown_headways = r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\lockdown\headwaydf_lockdown.csv"
recovery_headways = r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\recovery\headwaydf_recovery.csv"
postcovid_headways = r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\postcovid\headwaydf_postcovid.csv"

# Load and label
def load_with_label(path, period_label):
    df = pd.read_csv(path)
    df["Period"] = period_label
    return df

# Load all datasets
dfs = [
    load_with_label(precovid_headways, "Pre-COVID"),
    load_with_label(lockdown_headways, "Lockdown"),
    load_with_label(recovery_headways, "Recovery"),
    load_with_label(postcovid_headways, "Post-COVID")
]

# Combine
df_all = pd.concat(dfs, ignore_index=True)

# Clean
df_all.columns = df_all.columns.str.strip()
df_all['Stop'] = df_all['Stop'].str.upper()

# Analysis function
def show_extremes(df, period):
    subset = df[df["Period"] == period]
    print(f"\n==== {period} Headways ====")

    if subset.empty:
        print("No data available.")
        return

    top = subset.sort_values("AvgHeadwayMinutes", ascending=False).head(3)
    low = subset.sort_values("AvgHeadwayMinutes").head(3)

    print("\nTop 3 Stops with Highest Avg Headways:")
    print(top[["Line", "Direction", "Stop", "AvgHeadwayMinutes"]].to_string(index=False))

    print("\nTop 3 Stops with Lowest Avg Headways:")
    print(low[["Line", "Direction", "Stop", "AvgHeadwayMinutes"]].to_string(index=False))

# Run for each period
for period in df_all["Period"].unique():
    show_extremes(df_all, period)



==== Pre-COVID Headways ====

Top 3 Stops with Highest Avg Headways:
 Line Direction Stop  AvgHeadwayMinutes
Green  Outbound  CHE          27.541667
  Red   Inbound  SDK          23.000000
Green   Inbound  CAB          19.807333

Top 3 Stops with Lowest Avg Headways:
Line Direction Stop  AvgHeadwayMinutes
 Red  Outbound  SMI           8.058869
 Red  Outbound  FOU           8.102937
 Red  Outbound  BEL           8.121203

==== Lockdown Headways ====

Top 3 Stops with Highest Avg Headways:
 Line Direction Stop  AvgHeadwayMinutes
Green  Outbound  CHE          20.262222
  Red  Outbound  HOS          19.085632
  Red   Inbound  SDK          18.914368

Top 3 Stops with Lowest Avg Headways:
Line Direction Stop  AvgHeadwayMinutes
 Red  Outbound  JER           7.896346
 Red  Outbound  SMI           7.898741
 Red  Outbound  FOU           7.899041

==== Recovery Headways ====

Top 3 Stops with Highest Avg Headways:
 Line Direction Stop  AvgHeadwayMinutes
  Red  Outbound  FOR          23.779630
Gr

# TRAVEL TIME VOLATILITY

In [8]:
import pandas as pd

# Load the CSV files as DataFrames
precovid_volatility = pd.read_csv(r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\precovid\volatilitydf_precovid.csv")
lockdown_volatility = pd.read_csv(r"c:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\lockdown\volatilitydf_lockdown.csv")
recovery_volatility = pd.read_csv(r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\recovery\volatilitydf_recovery.csv")
postcovid_volatility = pd.read_csv(r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\postcovid\volatilitydf_postcovid.csv")
# Define your analysis function
def analyse_ttv(df, label):
    under_1_min = (df['AvgVolatilityMinutes'] < 1).sum()
    total = df.shape[0]
    percent_under_1 = round((under_1_min / total) * 100, 2)
    max_ttv = df['AvgVolatilityMinutes'].max()
    return {
        'period': label,
        'percent_under_1_min': percent_under_1,
        'max_ttv': round(max_ttv, 2)
    }

# Run analysis and print results
for df, name in zip(
    [precovid_volatility, lockdown_volatility, recovery_volatility, postcovid_volatility],
    ['Pre-COVID', 'Lockdown', 'Recovery', 'Post-COVID']
):
    results = analyse_ttv(df, name)
    print(results)


{'period': 'Pre-COVID', 'percent_under_1_min': np.float64(77.66), 'max_ttv': np.float64(4.28)}
{'period': 'Lockdown', 'percent_under_1_min': np.float64(74.56), 'max_ttv': np.float64(4.94)}
{'period': 'Recovery', 'percent_under_1_min': np.float64(73.84), 'max_ttv': np.float64(4.27)}
{'period': 'Post-COVID', 'percent_under_1_min': np.float64(72.63), 'max_ttv': np.float64(4.24)}


# JOURNEY DURATION

In [9]:
import pandas as pd

# File paths for each pandemic-defined period
period_files = {
    "Pre-COVID": r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\precovid\durationsdf_precovid.csv",
    "Lockdown": r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\lockdown\durationsdf_lockdown.csv",
    "Recovery": r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\recovery\durationsdf_recovery.csv",
    "Post-COVID": r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\postcovid\durationsdf_postcovid.csv"}

# Function to map hours to time-of-day periods
def assign_period(hour):
    if 5 <= hour < 11:
        return "Morning"
    elif 11 <= hour < 17:
        return "Midday"
    elif 17 <= hour < 21:
        return "Evening"
    elif 21 <= hour <= 23 or 0 <= hour < 5:
        return "Night"
    else:
        return "Unknown"

# List to hold summary data
time_of_day_summaries = []

# Loop through each period's file
for period, path in period_files.items():
    df = pd.read_csv(path)
    
    # Extract hour if not already present
    if 'hour_of_day' not in df.columns:
        df['hour_of_day'] = pd.to_datetime(df['StartTime']).dt.hour
    
    # Assign time of day
    df['TimeOfDay'] = df['hour_of_day'].apply(assign_period)
    df['Period'] = period

    # Group by time of day, line, and direction
    summary = (
        df.groupby(['Period', 'TimeOfDay', 'Line', 'Direction'])
        .agg(
            avg_duration=('TravelTimeMinutes', 'mean'),
            std_duration=('TravelTimeMinutes', 'std'),
            trip_count=('TravelTimeMinutes', 'count')
        )
        .reset_index()
    )
    
    time_of_day_summaries.append(summary)

# Combine into one DataFrame
tod_df = pd.concat(time_of_day_summaries, ignore_index=True)

# Preview the results
print(tod_df.head(10))


      Period TimeOfDay   Line    Direction  avg_duration  std_duration  \
0  Pre-COVID   Evening  Green   Inbound to     48.323664      7.697966   
1  Pre-COVID   Evening  Green  Outbound to     47.217329      6.145860   
2  Pre-COVID   Evening    Red   Inbound to     48.723784      3.320792   
3  Pre-COVID   Evening    Red  Outbound to     49.483297      3.570066   
4  Pre-COVID    Midday  Green   Inbound to     47.190991      6.336064   
5  Pre-COVID    Midday  Green  Outbound to     47.525709      5.629320   
6  Pre-COVID    Midday    Red   Inbound to     48.948927      3.961750   
7  Pre-COVID    Midday    Red  Outbound to     49.155598      3.896574   
8  Pre-COVID   Morning  Green   Inbound to     49.686740      6.949606   
9  Pre-COVID   Morning  Green  Outbound to     49.735998      7.786544   

   trip_count  
0        1603  
1        1560  
2         747  
3        1385  
4        2394  
5        2339  
6        1647  
7        2748  
8        1830  
9        1401  
