In [5]:
def safe_read_parquet(file_path):
    """
    Safely read parquet files with different engines to handle compatibility issues.
    """
    engines = ['pyarrow', 'fastparquet']
    
    for engine in engines:
        try:
            df = pd.read_parquet(file_path, engine=engine)
            return df
        except Exception as e:
            continue
    
    # If all engines fail, try with auto engine
    try:
        df = pd.read_parquet(file_path)
        return df
    except Exception as e:
        return pd.DataFrame()  # Return empty DataFrame

### This is the first test to visualize a "time" view,

This view should show the general network delay impacts by inputting a date.

---
This was done as multiple incidents can occur at once, so providing a general view of the network on a specific date the user wants to analyze can be a helpful resource

In [21]:
# Import required libraries
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import os
from datetime import datetime, timedelta
import sys
import json
import glob
import folium

# Add the parent directory to the path to import data modules
sys.path.append(os.path.join(os.getcwd(), '..'))
from data.reference import reference_files

In [22]:
def load_station_names():
    """Load station names from the DFT categories reference file."""
    try:
        with open(reference_files["all dft categories"], 'r') as f:
            stations_data = json.load(f)
        
        # Create a mapping from stanox to station name
        stanox_to_name = {}
        for station in stations_data:
            if 'stanox' in station and 'station_name' in station:
                stanox_to_name[str(station['stanox'])] = station['station_name']
        
        return stanox_to_name
    except Exception as e:
        print(f"Warning: Could not load station reference data: {e}")
        return {}

In [23]:
# Load all .parquet files from processed_data into a pandas DataFrame, trying both pyarrow and fastparquet engines

data_dir = '../processed_data' if not os.path.isdir('processed_data') else 'processed_data'
all_parquet_files = glob.glob(os.path.join(data_dir, '*', '*.parquet'))

list_df = []
skipped_files = []
for file in all_parquet_files:
    try:
        df = pd.read_parquet(file)
    except Exception as e1:
        try:
            df = pd.read_parquet(file, engine='fastparquet')
        except Exception as e2:
            skipped_files.append(file)
            continue
    stanox = os.path.basename(os.path.dirname(file))
    day = os.path.splitext(os.path.basename(file))[0]
    df['STANOX'] = stanox
    df['DAY'] = day
    list_df.append(df)

if list_df:
    all_data = pd.concat(list_df, ignore_index=True)
    print(f"Loaded {len(all_data)} rows from {len(list_df)} files. Skipped {len(skipped_files)} files.")
else:
    all_data = pd.DataFrame()
    print("No data loaded.")

Loaded 10450237 rows from 2599 files. Skipped 0 files.


In [30]:
def create_time_view_html(date_str):
    """
    Create an HTML map showing affected stations for a given date, with markers sized by incident count and colored by total PFPI minutes.
    """
    # --- Color grading function to match incident_view_heatmap_html legend ---
    def get_color(delay):
        try:
            d = float(delay)
        except Exception:
            d = 0
        if d == 0:
            return "blue"
        if d <= 5:
            return '#32CD32'     # Minor (1-5 min) - Lime Green
        elif d <= 15:
            return '#FFD700'     # Moderate (6-15 min) - Gold
        elif d <= 30:
            return '#FF8C00'     # Significant (16-30 min) - Dark Orange
        elif d <= 60:
            return '#FF0000'     # Major (31-60 min) - Red
        elif d <= 120:
            return '#8B0000'     # Severe (61-120 min) - Dark Red
        else:
            return '#8A2BE2'     # Critical (120+ min) - Blue Violet
    
    # Filter data for the specified date
    filtered_data = all_data[all_data['INCIDENT_START_DATETIME'].str.contains(date_str, na=False)]
    
    if filtered_data.empty:
        print(f"No data found for date {date_str}")
        return
    
    # Get unique affected STANOX codes
    affected_stanox = filtered_data['STANOX'].unique()
    
    # Count incidents per STANOX
    incident_counts = filtered_data.groupby('STANOX')['INCIDENT_NUMBER'].nunique()
    
    # Sum PFPI_MINUTES per STANOX
    total_pfpi = filtered_data.groupby('STANOX')['PFPI_MINUTES'].sum()
    
    # Load station coordinates
    try:
        with open(reference_files["all dft categories"], 'r') as f:
            stations_data = json.load(f)
        stanox_to_coords = {}
        for station in stations_data:
            if 'stanox' in station and 'latitude' in station and 'longitude' in station:
                stanox_to_coords[str(station['stanox'])] = [station['latitude'], station['longitude']]
    except Exception as e:
        print(f"Error loading coordinates: {e}")
        return
    
    # Create Folium map centered on UK
    m = folium.Map(location=[54.5, -2.5], zoom_start=6)
    
    # Add markers for each affected station, sized by incident count and colored by total PFPI
    for stanox in affected_stanox:
        stanox_str = str(stanox)
        if stanox_str in stanox_to_coords:
            lat, lon = stanox_to_coords[stanox_str]
            count = incident_counts.get(stanox, 0)
            count = int(count) if pd.notna(count) else 0
            total_delay = total_pfpi.get(stanox, 0)
            total_delay = float(total_delay) if pd.notna(total_delay) else 0.0
            color = get_color(total_delay)
            radius = int(5 + count * 2)  # Scale radius with incident count
            folium.CircleMarker(
                location=[float(lat), float(lon)],
                radius=radius,
                color=color,
                fill=True,
                fill_color=color,
                fill_opacity=0.7,
                popup=f"STANOX: {stanox_str}<br>Incidents: {count}<br>Total Delay: {total_delay:.1f} min"
            ).add_to(m)
        else:
            print(f"Coordinates not found for STANOX: {stanox_str}")
    
    # Add legend
    legend_html = '''
    <div style="position: fixed; bottom: 50px; left: 50px; width: 180px; height: 180px; background-color: white; border:2px solid grey; z-index:9999; font-size:14px; padding: 10px;">
    <p><b>Delay Legend (Total PFPI Minutes)</b></p>
    <p><span style="color:blue;">●</span> 0 min</p>
    <p><span style="color:#32CD32;">●</span> 1-5 min</p>
    <p><span style="color:#FFD700;">●</span> 6-15 min</p>
    <p><span style="color:#FF8C00;">●</span> 16-30 min</p>
    <p><span style="color:#FF0000;">●</span> 31-60 min</p>
    <p><span style="color:#8B0000;">●</span> 61-120 min</p>
    <p><span style="color:#8A2BE2;">●</span> 120+ min</p>
    </div>
    '''
    m.get_root().html.add_child(folium.Element(legend_html))
    
    # Save the map to HTML file
    output_file = f"time_view_{date_str.replace('-', '_')}.html"
    m.save(output_file)
    print(f"Map saved to {output_file}")

In [31]:
create_time_view_html('28-APR-2024')

Map saved to time_view_28_APR_2024.html


In [32]:
create_time_view_html('31-JAN-2024')

Map saved to time_view_31_JAN_2024.html
