## This is the first attempt at testing a function for the single incident view generation as second output of the analysis
### The function for this will be defined at the start and may be added to the utils afterwards

With INPUT: INCIDENT_CODE + DATE, the aims are

-- Table with stations affected

In [2]:
import json
import pickle
import sys
import os
import pandas as pd
import copy
from datetime import datetime
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
from datetime import datetime, timedelta
import numpy as np
import glob

# Add parent directory to sys.path to access outputs module
parent_dir = os.path.dirname(os.getcwd())
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

# Import with module reload to ensure latest version
import importlib
import outputs.utils
importlib.reload(outputs.utils)
from outputs.utils import incident_view, incident_view_html

In [3]:
# Navigate to the RDM_analysis directory where processed_data is located
os.chdir(r'c:\Users\39342\University of Glasgow\Ji-Eun Byun - MZ-JB\RDM_analysis')
current_dir = os.getcwd()
print(f"New working directory: {current_dir}")

# Now check for processed_data
if os.path.exists('processed_data'):
    print("processed_data folder found!")
    pkl_files = glob.glob('processed_data/*.pkl')
    print(f"Found {len(pkl_files)} pickle files")
    
    if pkl_files:
        print("First 5 files:", pkl_files[:5])
        # Filter out summary files and test loading one data file
        data_files = [f for f in pkl_files if 'summary' not in f]
        if data_files:
            test_df = pd.read_pickle(data_files[0])
            print(f"Test file shape: {test_df.shape}")
            print("Sample incident codes:", test_df['INCIDENT_NUMBER'].dropna().unique()[:5])
        else:
            print("No data files found (only summary files)")
else:
    print("Still no processed_data folder found")
    print("Current directory contents:", os.listdir('.'))

New working directory: c:\Users\39342\University of Glasgow\Ji-Eun Byun - MZ-JB\RDM_analysis
processed_data folder found!
Found 127 pickle files
First 5 files: ['processed_data\\category_a_stations_summary.pkl', 'processed_data\\category_a_station_12931_FR.pkl', 'processed_data\\category_a_station_12931_MO.pkl', 'processed_data\\category_a_station_12931_SA.pkl', 'processed_data\\category_a_station_12931_SU.pkl']
Test file shape: (5117, 21)
Sample incident codes: [705009.  59161. 571294. 133193. 703015.]


In [4]:
# Test with the second file (skip the summary file)
data_files = [f for f in pkl_files if 'summary' not in f]
print(f"Data files (excluding summary): {len(data_files)}")

if data_files:
    test_df = pd.read_pickle(data_files[0])
    print(f"Test file shape: {test_df.shape}")
    print("Sample incident codes:", test_df['INCIDENT_NUMBER'].dropna().unique()[:5])
    print("Sample incident start dates:", test_df['INCIDENT_START_DATETIME'].dropna().unique()[:3])

Data files (excluding summary): 126
Test file shape: (5117, 21)
Sample incident codes: [705009.  59161. 571294. 133193. 703015.]
Sample incident start dates: ['09-AUG-2024 18:41' '06-DEC-2024 05:48' '21-JUN-2024 04:27']


In [5]:
# Get a focused view of columns and sample data
print("Columns:", test_df.columns.tolist())
print(f"\nShape: {test_df.shape}")

Columns: ['TRAIN_SERVICE_CODE', 'PLANNED_ORIGIN_LOCATION_CODE', 'PLANNED_ORIGIN_GBTT_DATETIME', 'PLANNED_DEST_LOCATION_CODE', 'PLANNED_DEST_GBTT_DATETIME', 'PLANNED_CALLS', 'ACTUAL_CALLS', 'PFPI_MINUTES', 'INCIDENT_REASON', 'INCIDENT_NUMBER', 'EVENT_TYPE', 'SECTION_CODE', 'DELAY_DAY', 'EVENT_DATETIME', 'INCIDENT_START_DATETIME', 'ENGLISH_DAY_TYPE', 'STATION_ROLE', 'DFT_CATEGORY', 'PLATFORM_COUNT', 'DATASET_TYPE', 'WEEKDAY']

Shape: (5117, 21)


In [6]:
# Test the modified incident_view function with new temporal parameters

# Example 1: Analyze incident 705009 that started on 09-AUG-2024
# But analyze a specific 30-minute period later in the incident lifecycle
incident_code = 705009
incident_date = '09-AUG-2024'    # When the incident started (for locating the incident)
analysis_date = '09-AUG-2024'    # Date to analyze (could be same day or days later)
analysis_hhmm = '1900'           # Start analysis at 19:00 (7:00 PM)
period_minutes = 30              # Analyze 30 minutes from 19:00 to 19:30

print(f"\nAnalyzing incident {incident_code}:")
print(f"- Incident started: {incident_date}")
print(f"- Analysis date: {analysis_date} at {analysis_hhmm[:2]}:{analysis_hhmm[2:]}")
print(f"- Analysis duration: {period_minutes} minutes")

result, incident_start, analysis_period = incident_view(incident_code, incident_date, analysis_date, analysis_hhmm, period_minutes)

print(f"\nIncident originally started at: {incident_start}")
print(f"Analysis period: {analysis_period}")
print(f"\nResult shape: {result.shape}")
print("\nDetailed Results:")
print(result)


Analyzing incident 705009:
- Incident started: 09-AUG-2024
- Analysis date: 09-AUG-2024 at 19:00
- Analysis duration: 30 minutes
Analyzing incident 705009 (started 09-AUG-2024)
Analysis period: 09-Aug-2024 19:00 to 09-Aug-2024 19:30 (30 min)
Incident Details:
  Section Code: 53221 (Hitchin)
  Incident Reason: I1
  Started: 09-AUG-2024 18:41

Incident originally started at: 09-AUG-2024 18:41
Analysis period: 09-Aug-2024 19:00 to 09-Aug-2024 19:30 (30 min)

Result shape: (6, 7)

Detailed Results:
  STATION_CODE  PLANNED_CALLS  ACTUAL_CALLS  DELAYED_TRAINS_OUT  \
0        12931              8             8                   0   
1        16416              9             8                   1   
2        17132             26            27                   0   
3        54311              8            11                   2   
4        63630              3             3                   0   
5        87245             13            11                   2   

  DELAY_MINUTES_OUT  DELAYED_

### Concrete Example:

Incident starts: 09:00

Period: 60 minutes (09:00-10:00)

Train A: Originally scheduled 09:30, delayed 80 minutes ‚Üí arrives 10:50

This train is counted in DELAYED_TRAINS_OUT (was supposed to be in 09:00-10:00 window)

Train B: Originally scheduled 08:30, delayed 45 minutes ‚Üí arrives 09:15

This train is counted in DELAYED_TRAINS_IN (shifted into 09:00-10:00 window)

## Testing html function

In [8]:
# Test the CORRECTED temporal logic with configurable intervals

# Test parameters - MODIFY THESE AS NEEDED
incident_code = 705009
incident_date = '09-AUG-2024'
analysis_date = '09-AUG-2024'
analysis_hhmm = '1900'
period_minutes = 60              # Total analysis period: 60 minutes (19:00-20:00)
interval_minutes = 10            # Split into 10-minute intervals


# Create DYNAMIC variable name and file name based on parameters
safe_date = analysis_date.replace('-', '_')
variable_name = f"html_inc{incident_code}_{safe_date}_{analysis_hhmm}_p{period_minutes}m_i{interval_minutes}m"
file_name = f'incident_{incident_code}_{safe_date}_{analysis_hhmm}_period{period_minutes}min_interval{interval_minutes}min.html'

print(f"üìù Creating variable: '{variable_name}'")
print(f"üìÅ Output file: '{file_name}'")

# Create HTML with dynamic naming (with proper syntax highlighting)
html_result = incident_view_html(
    incident_code, incident_date, analysis_date, analysis_hhmm, 
    period_minutes, interval_minutes, 
    file_name
)

# Store the result in a dynamic variable name using globals()
globals()[variable_name] = html_result

print(f"‚úÖ HTML map created and stored in variable: {variable_name}")
print(f"üí° Access your result with: {variable_name}")


üìù Creating variable: 'html_inc705009_09_AUG_2024_1900_p60m_i10m'
üìÅ Output file: 'incident_705009_09_AUG_2024_1900_period60min_interval10min.html'
Creating dynamic HTML map for incident 705009
Analysis period: 09-Aug-2024 19:00 to 09-Aug-2024 20:00
Interval size: 10 minutes
Total intervals: 6
Found delay data for 4 stations
Found incident location: Hitchin (51.95296135, -0.262518185)
 DYNAMIC DELAY MAP CREATED! 
File: incident_705009_09_AUG_2024_1900_period60min_interval10min.html
Time steps: 6 (10-minute intervals)
Stations mapped: 4
 Features: Play/Pause controls, Color-coded delays, Interval-specific timeline
 Open the HTML file in your browser to explore the dynamic timeline!
‚úÖ HTML map created and stored in variable: html_inc705009_09_AUG_2024_1900_p60m_i10m
üí° Access your result with: html_inc705009_09_AUG_2024_1900_p60m_i10m
Found delay data for 4 stations
Found incident location: Hitchin (51.95296135, -0.262518185)
 DYNAMIC DELAY MAP CREATED! 
File: incident_705009_09_

In [9]:
# ANOTHER TEST with different parameters, NEXT DAY

# Test parameters - MODIFY THESE AS NEEDED
incident_code = 705009           # unique identifier
incident_date = '09-AUG-2024'    # unique identifier
analysis_date = '10-AUG-2024'
analysis_hhmm = '0800'
period_minutes = 360              # Total analysis period: 360 minutes
interval_minutes = 30            # Split into 30-minute intervals


# Create DYNAMIC variable name and file name based on parameters
safe_date = analysis_date.replace('-', '_')
variable_name = f"html_inc{incident_code}_{safe_date}_{analysis_hhmm}_p{period_minutes}m_i{interval_minutes}m"
file_name = f'incident_{incident_code}_{safe_date}_{analysis_hhmm}_period{period_minutes}min_interval{interval_minutes}min.html'

print(f"üìù Creating variable: '{variable_name}'")
print(f"üìÅ Output file: '{file_name}'")

# Create HTML with dynamic naming (with proper syntax highlighting)
html_result = incident_view_html(
    incident_code, incident_date, analysis_date, analysis_hhmm, 
    period_minutes, interval_minutes, 
    file_name
)

# Store the result in a dynamic variable name using globals()
globals()[variable_name] = html_result

print(f"‚úÖ HTML map created and stored in variable: {variable_name}")
print(f"üí° Access your result with: {variable_name}")


üìù Creating variable: 'html_inc705009_10_AUG_2024_0800_p360m_i30m'
üìÅ Output file: 'incident_705009_10_AUG_2024_0800_period360min_interval30min.html'
Creating dynamic HTML map for incident 705009
Analysis period: 10-Aug-2024 08:00 to 10-Aug-2024 14:00
Interval size: 30 minutes
Total intervals: 12
Found delay data for 4 stations
Found incident location: Hitchin (51.95296135, -0.262518185)
 DYNAMIC DELAY MAP CREATED! 
File: incident_705009_10_AUG_2024_0800_period360min_interval30min.html
Time steps: 12 (30-minute intervals)
Stations mapped: 4
 Features: Play/Pause controls, Color-coded delays, Interval-specific timeline
 Open the HTML file in your browser to explore the dynamic timeline!
‚úÖ HTML map created and stored in variable: html_inc705009_10_AUG_2024_0800_p360m_i30m
üí° Access your result with: html_inc705009_10_AUG_2024_0800_p360m_i30m
Found delay data for 4 stations
Found incident location: Hitchin (51.95296135, -0.262518185)
 DYNAMIC DELAY MAP CREATED! 
File: incident_705