In [6]:
precovid_duration = r"C:\Users\athen\Documents\GitHub\TCD_Masters_Dissertation\final_code\metrics\precovid\durationsdf_precovid.csv"
lockdown_duration = r"C:\Users\athen\Documents\GitHub\TCD_Masters_Dissertation\final_code\metrics\lockdown\durationsdf_lockdown.csv"
recovery_duration = r"C:\Users\athen\Documents\GitHub\TCD_Masters_Dissertation\final_code\metrics\recovery\durationsdf_recovery.csv"
postcovid_duration = r"C:\Users\athen\Documents\GitHub\TCD_Masters_Dissertation\final_code\metrics\postcovid\durationsdf_postcovid.csv"



# HEADWAYS

In [18]:
import pandas as pd

# File paths
precovid_headways = r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\precovid\headwaydf_precovid.csv"
lockdown_headways = r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\lockdown\headwaydf_lockdown.csv"
recovery_headways = r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\recovery\headwaydf_recovery.csv"
postcovid_headways = r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\postcovid\headwaydf_postcovid.csv"

# Load and label
def load_with_label(path, period_label):
    df = pd.read_csv(path)
    df["Period"] = period_label
    return df

# Load all datasets
dfs = [
    load_with_label(precovid_headways, "Pre-COVID"),
    load_with_label(lockdown_headways, "Lockdown"),
    load_with_label(recovery_headways, "Recovery"),
    load_with_label(postcovid_headways, "Post-COVID")
]

# Combine
df_all = pd.concat(dfs, ignore_index=True)

# Clean
df_all.columns = df_all.columns.str.strip()
df_all['Stop'] = df_all['Stop'].str.upper()

# Analysis function
def show_extremes(df, period):
    subset = df[df["Period"] == period]
    print(f"\n==== {period} Headways ====")

    if subset.empty:
        print("No data available.")
        return

    top = subset.sort_values("AvgHeadwayMinutes", ascending=False).head(3)
    low = subset.sort_values("AvgHeadwayMinutes").head(3)

    print("\nTop 3 Stops with Highest Avg Headways:")
    print(top[["Line", "Direction", "Stop", "AvgHeadwayMinutes"]].to_string(index=False))

    print("\nTop 3 Stops with Lowest Avg Headways:")
    print(low[["Line", "Direction", "Stop", "AvgHeadwayMinutes"]].to_string(index=False))

# Run for each period
for period in df_all["Period"].unique():
    show_extremes(df_all, period)



==== Pre-COVID Headways ====

Top 3 Stops with Highest Avg Headways:
 Line Direction Stop  AvgHeadwayMinutes
Green  Outbound  CHE          27.541667
  Red   Inbound  SDK          23.000000
Green   Inbound  CAB          19.807333

Top 3 Stops with Lowest Avg Headways:
Line Direction Stop  AvgHeadwayMinutes
 Red  Outbound  SMI           8.058869
 Red  Outbound  FOU           8.102937
 Red  Outbound  BEL           8.121203

==== Lockdown Headways ====

Top 3 Stops with Highest Avg Headways:
 Line Direction Stop  AvgHeadwayMinutes
Green  Outbound  CHE          20.262222
  Red  Outbound  HOS          19.085632
  Red   Inbound  SDK          18.914368

Top 3 Stops with Lowest Avg Headways:
Line Direction Stop  AvgHeadwayMinutes
 Red  Outbound  JER           7.896346
 Red  Outbound  SMI           7.898741
 Red  Outbound  FOU           7.899041

==== Recovery Headways ====

Top 3 Stops with Highest Avg Headways:
 Line Direction Stop  AvgHeadwayMinutes
  Red  Outbound  FOR          23.779630
Gr

### Tables

In [24]:
df_all.groupby(['Period', 'Line']).apply(lambda g: (g['AvgHeadwayMinutes'] > 10).mean() * 100).round(1)


  df_all.groupby(['Period', 'Line']).apply(lambda g: (g['AvgHeadwayMinutes'] > 10).mean() * 100).round(1)


Period      Line 
Lockdown    Green    62.1
            Red      71.8
Post-COVID  Green    85.5
            Red      96.4
Pre-COVID   Green    63.7
            Red      75.2
Recovery    Green    79.8
            Red      81.8
dtype: float64

In [25]:
df_all.groupby(['Period', 'Line', 'PeakPeriod', 'Direction'])['AvgHeadwayMinutes'].mean().round(2)


Period      Line   PeakPeriod  Direction
Lockdown    Green  Evening     Inbound      11.59
                               Outbound     11.56
                   Morning     Inbound      11.89
                               Outbound     13.13
            Red    Evening     Inbound      12.55
                               Outbound     10.76
                   Morning     Inbound      12.82
                               Outbound     11.16
Post-COVID  Green  Evening     Inbound      13.03
                               Outbound     12.61
                   Morning     Inbound      12.91
                               Outbound     14.25
            Red    Evening     Inbound      15.40
                               Outbound     13.10
                   Morning     Inbound      15.16
                               Outbound     13.47
Pre-COVID   Green  Evening     Inbound      12.15
                               Outbound     12.32
                   Morning     Inbound      12.18
         

# TRAVEL TIME VOLATILITY

In [8]:
import pandas as pd

# Load the CSV files as DataFrames
precovid_volatility = pd.read_csv(r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\precovid\volatilitydf_precovid.csv")
lockdown_volatility = pd.read_csv(r"c:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\lockdown\volatilitydf_lockdown.csv")
recovery_volatility = pd.read_csv(r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\recovery\volatilitydf_recovery.csv")
postcovid_volatility = pd.read_csv(r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\postcovid\volatilitydf_postcovid.csv")
# Define your analysis function
def analyse_ttv(df, label):
    under_1_min = (df['AvgVolatilityMinutes'] < 1).sum()
    total = df.shape[0]
    percent_under_1 = round((under_1_min / total) * 100, 2)
    max_ttv = df['AvgVolatilityMinutes'].max()
    return {
        'period': label,
        'percent_under_1_min': percent_under_1,
        'max_ttv': round(max_ttv, 2)
    }

# Run analysis and print results
for df, name in zip(
    [precovid_volatility, lockdown_volatility, recovery_volatility, postcovid_volatility],
    ['Pre-COVID', 'Lockdown', 'Recovery', 'Post-COVID']
):
    results = analyse_ttv(df, name)
    print(results)


{'period': 'Pre-COVID', 'percent_under_1_min': np.float64(77.66), 'max_ttv': np.float64(4.28)}
{'period': 'Lockdown', 'percent_under_1_min': np.float64(74.56), 'max_ttv': np.float64(4.94)}
{'period': 'Recovery', 'percent_under_1_min': np.float64(73.84), 'max_ttv': np.float64(4.27)}
{'period': 'Post-COVID', 'percent_under_1_min': np.float64(72.63), 'max_ttv': np.float64(4.24)}


# JOURNEY DURATION

In [16]:
import pandas as pd

# File paths for each pandemic-defined period
period_files = {
    "Pre-COVID": r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\precovid\durationsdf_precovid.csv",
    "Lockdown": r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\lockdown\durationsdf_lockdown.csv",
    "Recovery": r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\recovery\durationsdf_recovery.csv",
    "Post-COVID": r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\postcovid\durationsdf_postcovid.csv"}

# Function to map hours to time-of-day periods
def assign_period(hour):
    if 5 <= hour < 11:
        return "Morning"
    elif 11 <= hour < 17:
        return "Midday"
    elif 17 <= hour < 21:
        return "Evening"
    elif 21 <= hour <= 23 or 0 <= hour < 5:
        return "Night"
    else:
        return "Unknown"

# List to hold summary data
time_of_day_summaries = []

# Loop through each period's file
for period, path in period_files.items():
    df = pd.read_csv(path)
    
    # Extract hour if not already present
    if 'hour_of_day' not in df.columns:
        df['hour_of_day'] = pd.to_datetime(df['StartTime']).dt.hour
    
    # Assign time of day
    df['TimeOfDay'] = df['hour_of_day'].apply(assign_period)
    df['Period'] = period

    # Group by time of day, line, and direction
    summary = (
        df.groupby(['Period', 'TimeOfDay', 'Line', 'Direction'])
        .agg(
            avg_duration=('TravelTimeMinutes', 'mean'),
            std_duration=('TravelTimeMinutes', 'std'),
            trip_count=('TravelTimeMinutes', 'count')
        )
        .reset_index()
    )
    
    time_of_day_summaries.append(summary)

# Combine into one DataFrame
tod_df = pd.concat(time_of_day_summaries, ignore_index=True)


### Tables

In [31]:
tod_df.groupby(['Period', 'Line'])['avg_duration'].mean().round(1)


Period      Line 
Lockdown    Green    49.8
            Red      48.2
Post-COVID  Green    50.8
            Red      49.2
Pre-COVID   Green    50.2
            Red      48.5
Recovery    Green    50.2
            Red      48.9
Name: avg_duration, dtype: float64

In [32]:
tod_df[tod_df['TimeOfDay'].isin(['Morning', 'MidDay', 'Evening', 'Night'])].groupby(['Period', 'Line', 'TimeOfDay'])['avg_duration'].mean().round(1)


Period      Line   TimeOfDay
Lockdown    Green  Evening      47.4
                   Morning      49.6
                   Night        55.2
            Red    Evening      48.5
                   Morning      48.1
                   Night        47.3
Post-COVID  Green  Evening      48.5
                   Morning      50.4
                   Night        56.3
            Red    Evening      49.7
                   Morning      49.0
                   Night        48.3
Pre-COVID   Green  Evening      47.8
                   Morning      49.7
                   Night        55.8
            Red    Evening      49.1
                   Morning      48.4
                   Night        47.4
Recovery    Green  Evening      48.3
                   Morning      49.9
                   Night        55.0
            Red    Evening      49.2
                   Morning      48.8
                   Night        47.8
Name: avg_duration, dtype: float64

# COMBINED VOLATILTIY

In [6]:

from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

# Define your 4 phase image files ( Line TTV)
phase_files = [
    ("Pre-COVID", r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\figures\travel_time_volatility\volatility_allperiods_red_combined_lockdown.png"),
    ("Lockdown", r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\figures\travel_time_volatility\volatility_allperiods_red_combined_postcovid.png"),
    ("Recovery", r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\figures\travel_time_volatility\volatility_allperiods_red_combined_precovid.png"),
    ("Post-COVID", r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\figures\travel_time_volatility\volatility_allperiods_red_combined_recovery.png"),
]


images = []
for phase, file in phase_files:
    img = Image.open(file)
    fig, ax = plt.subplots(figsize=(img.width / 100, img.height / 100))
    ax.imshow(img)
    ax.set_title(f"Red Line Travel Time Volatility — {phase}", fontsize=16, weight="bold")
    ax.axis("off")
    fig.tight_layout()
    fig.canvas.draw()
    
    # Updated line:
    image_np = np.asarray(fig.canvas.buffer_rgba())
    images.append(Image.fromarray(image_np))
    plt.close(fig)

# Stitch images vertically
total_height = sum(img.height for img in images)
max_width = max(img.width for img in images)

final_img = Image.new("RGBA", (max_width, total_height), color=(255, 255, 255, 255))

y_offset = 0
for img in images:
    final_img.paste(img, (0, y_offset))
    y_offset += img.height

final_img.save(r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\figures\travel_time_volatility\volatility_red_all_periods_combined.png")
print("Saved")



Saved


In [3]:
import pandas as pd

period_files = {
    "Pre-COVID": r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\precovid\durationsdf_precovid.csv",
    "Lockdown": r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\lockdown\durationsdf_lockdown.csv",
    "Recovery": r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\recovery\durationsdf_recovery.csv",
    "Post-COVID": r"C:\Users\athen\Documents\GitHub\TCD_Dissertation\data\metrics\postcovid\durationsdf_postcovid.csv"
}

total_journeys = 0
per_phase_counts = {}

for phase, filepath in period_files.items():
    df = pd.read_csv(filepath)
    
    if 'TramJourneyID' not in df.columns:
        raise ValueError(f"'TramJourneyID' column not found in {filepath}")
    
    journey_count = df['TramJourneyID'].nunique()
    per_phase_counts[phase] = journey_count
    total_journeys += journey_count

# Print the results
print("Reconstructed Journey Counts by Phase:")
for phase, count in per_phase_counts.items():
    print(f"{phase}: {count:,}")

print(f"\nTotal Valid Reconstructed Journeys: {total_journeys:,}")


Reconstructed Journey Counts by Phase:
Pre-COVID: 22,574
Lockdown: 164,611
Recovery: 76,762
Post-COVID: 88,680

Total Valid Reconstructed Journeys: 352,627
