In [None]:
# Transit Analytics Dashboard

Enhanced analysis of real-time transit delays and performance metrics.


In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

plt.style.use('default')
sns.set_palette('husl')
plt.rcParams['figure.figsize'] = (12, 6)


In [None]:
# Load Parquet data
base = Path('/home/jovyan/warehouse/aggregates/route_delay_daily')

try:
    parquet_files = list(base.rglob('*.parquet'))
    if parquet_files:
        df = pd.concat([pd.read_parquet(p) for p in parquet_files], ignore_index=True)
        df['event_date'] = pd.to_datetime(df['event_date']).dt.date
        df = df.sort_values(['event_date', 'route_id'])
        print(f"Loaded {len(df)} records from {len(parquet_files)} files")
    else:
        # Create sample data for demo
        dates = pd.date_range('2025-01-01', periods=7)
        routes = ['A', 'B', 'C', 'D']
        data = []
        for date in dates:
            for route in routes:
                delay = np.random.normal(30, 15)  # avg 30s delay, 15s std
                data.append({
                    'event_date': date.date(),
                    'route_id': route,
                    'avg_delay_seconds': max(0, delay)  # no negative delays
                })
        df = pd.DataFrame(data)
        print("Using sample data for demonstration")
        
    print(f"Data shape: {df.shape}")
    df.head()
except Exception as e:
    print(f"Error loading data: {e}")
    df = pd.DataFrame()  # Empty fallback
