# Accra Traffic Prediction & ETA Engine

A comprehensive traffic prediction system with a robust engine for:
- Real-time traffic speed prediction
- Multi-route ETA calculation and comparison
- Route optimization
- Traffic pattern analysis
- Historical data caching

## 1. Import Required Libraries

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import xgboost as xgb
import ipywidgets as widgets
from IPython.display import display, HTML
import warnings
warnings.filterwarnings('ignore')

## 2. Traffic Prediction Engine Core

The engine manages data loading, model training, predictions, and route optimization.

In [None]:
class TrafficPredictionEngine:
    def __init__(self):
        self.models = {}
        self.feature_columns = []
        self.roads = []
        self.df_full = None
        self.prediction_cache = {}
        self.residuals = {}
        self.cache_timestamps = {}
        self.road_distances = {
            'Circle Rd': 8.5,
            'Spintex Rd': 12.3,
            'Independence Ave': 6.7
        }
        self.model_performance = {}
        
    def load_data(self, traffic_path='data/traffic_data.csv', 
                  weather_path='data/weather_data.csv',
                  events_path='data/events_data.csv'):
        print("Loading data...")
        
        traffic_data = pd.read_csv(traffic_path)
        weather_data = pd.read_csv(weather_path)
        events_data = pd.read_csv(events_path)
        
        df = pd.merge(traffic_data, weather_data, on='timestamp', how='left')
        df = pd.merge(df, events_data, on='timestamp', how='left')
        df['event_type'] = df['event_type'].fillna('none')
        
        self.df_full = df
        self.roads = df['road'].unique().tolist()
        
        print(f"Loaded {len(df)} records for {len(self.roads)} roads")
        return df
    
    def engineer_features(self, df):
        df = df.copy()
        
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df['hour'] = df['timestamp'].dt.hour
        df['weekday'] = df['timestamp'].dt.weekday
        df['is_weekend'] = df['weekday'].isin([5, 6]).astype(int)
        df['is_rush_hour'] = df['hour'].isin([7, 8, 9, 17, 18, 19]).astype(int)
        
        df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
        df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)
        df['day_sin'] = np.sin(2 * np.pi * df['weekday'] / 7)
        df['day_cos'] = np.cos(2 * np.pi * df['weekday'] / 7)
        
        df['is_rainy'] = (df['rain'] > 0).astype(int)
        df['rain_category'] = pd.cut(df['rain'], bins=[-0.1, 0, 2, 5, 100], 
                                      labels=['none', 'light', 'moderate', 'heavy'])
        
        df = pd.get_dummies(df, columns=['event_type', 'rain_category'], drop_first=False)
        
        for road in df['road'].unique():
            road_mask = df['road'] == road
            for lag in range(1, 4):
                df.loc[road_mask, f'lag_{lag}'] = df.loc[road_mask, 'avg_speed'].shift(lag)
        
        for road in df['road'].unique():
            road_mask = df['road'] == road
            df.loc[road_mask, 'rolling_mean_3'] = df.loc[road_mask, 'avg_speed'].rolling(3, min_periods=1).mean()
            df.loc[road_mask, 'rolling_std_3'] = df.loc[road_mask, 'avg_speed'].rolling(3, min_periods=1).std().fillna(0)
        
        df = df.dropna()
        return df
    
    def train_models(self):
        print("\nTraining upgraded models...")
        
        df = self.engineer_features(self.df_full)
        
        exclude_cols = ['avg_speed', 'timestamp', 'road']
        self.feature_columns = [col for col in df.columns if col not in exclude_cols]
        
        results = []
        
        for road in self.roads:
            road_df = df[df['road'] == road].copy()
            
            X = road_df[self.feature_columns]
            y = road_df['avg_speed']
            
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, shuffle=False, test_size=0.2
            )
            
            model = xgb.XGBRegressor(
                n_estimators=500,
                learning_rate=0.03,
                max_depth=6,
                min_child_weight=3,
                subsample=0.8,
                colsample_bytree=0.8,
                gamma=1,
                reg_alpha=0.1,
                reg_lambda=1.0,
                random_state=42,
                objective='reg:squarederror',
                tree_method='hist',
                device='cpu'
            )
            model.fit(X_train, y_train)
            
            preds = model.predict(X_test)
            residuals = y_test.values - preds
            
            mae = mean_absolute_error(y_test, preds)
            rmse = np.sqrt(mean_squared_error(y_test, preds))
            r2 = r2_score(y_test, preds)
            
            self.models[road] = model
            self.residuals[road] = residuals
            self.model_performance[road] = {'MAE': mae, 'RMSE': rmse, 'R2': r2}
            
            results.append({
                'Road': road,
                'MAE': f'{mae:.2f} km/h',
                'RMSE': f'{rmse:.2f} km/h',
                'RÂ²': f'{r2:.3f}'
            })
            
            print(f"{road}: MAE={mae:.2f} km/h, RMSE={rmse:.2f} km/h, RÂ²={r2:.3f}")
        
        return pd.DataFrame(results)
    
    def get_confidence_interval(self, road, confidence=0.95):
        if road not in self.residuals or len(self.residuals[road]) == 0:
            return 0, 0
        
        residuals = self.residuals[road]
        std_residuals = np.std(residuals)
        
        z_score = 1.96 if confidence == 0.95 else 1.645
        ci = z_score * std_residuals
        
        return ci, std_residuals
    
    def predict_speed(self, road, hour, weekday, rain, temp, humidity, 
                     event='none', lag_speeds=None):
        
        if road not in self.models:
            raise ValueError(f"No model trained for road: {road}")
        
        cache_key = f"{road}_{hour}_{weekday}_{rain}_{temp}_{event}"
        if cache_key in self.prediction_cache:
            return self.prediction_cache[cache_key]
        
        if lag_speeds is None:
            lag_speeds = [45.0, 44.0, 43.0]
        
        is_weekend = 1 if weekday in [5, 6] else 0
        is_rush_hour = 1 if hour in [7, 8, 9, 17, 18, 19] else 0
        is_rainy = 1 if rain > 0 else 0
        
        hour_sin = np.sin(2 * np.pi * hour / 24)
        hour_cos = np.cos(2 * np.pi * hour / 24)
        day_sin = np.sin(2 * np.pi * weekday / 7)
        day_cos = np.cos(2 * np.pi * weekday / 7)
        
        rain_cat = 'none' if rain == 0 else ('light' if rain <= 2 else ('moderate' if rain <= 5 else 'heavy'))
        
        input_dict = {
            'hour': hour,
            'weekday': weekday,
            'is_weekend': is_weekend,
            'is_rush_hour': is_rush_hour,
            'hour_sin': hour_sin,
            'hour_cos': hour_cos,
            'day_sin': day_sin,
            'day_cos': day_cos,
            'rain': rain,
            'temp': temp,
            'humidity': humidity,
            'is_rainy': is_rainy,
            'lag_1': lag_speeds[0],
            'lag_2': lag_speeds[1],
            'lag_3': lag_speeds[2],
            'rolling_mean_3': np.mean(lag_speeds),
            'rolling_std_3': np.std(lag_speeds)
        }
        
        for col in self.feature_columns:
            if col.startswith('event_type_'):
                input_dict[col] = 1 if event == col.replace('event_type_', '') else 0
            elif col.startswith('rain_category_'):
                input_dict[col] = 1 if rain_cat == col.replace('rain_category_', '') else 0
            elif col not in input_dict:
                input_dict[col] = 0
        
        input_df = pd.DataFrame([input_dict])[self.feature_columns]
        
        speed = self.models[road].predict(input_df)[0]
        speed = max(5.0, min(80.0, speed))
        
        self.prediction_cache[cache_key] = speed
        return speed
    
    def calculate_eta(self, distance_km, speed_kmh):
        if speed_kmh <= 0:
            return float('inf')
        return (distance_km / speed_kmh) * 60
    
    def predict_route_eta(self, road, distance_km, conditions):
        speed = self.predict_speed(
            road=road,
            hour=conditions.get('hour', 12),
            weekday=conditions.get('weekday', 0),
            rain=conditions.get('rain', 0),
            temp=conditions.get('temp', 30),
            humidity=conditions.get('humidity', 70),
            event=conditions.get('event', 'none'),
            lag_speeds=conditions.get('lag_speeds', None)
        )
        
        eta = self.calculate_eta(distance_km, speed)
        
        ci, std_residuals = self.get_confidence_interval(road)
        eta_ci = (distance_km / (speed + ci/2)) * 60 if speed + ci/2 > 0 else eta
        
        return {
            'road': road,
            'distance_km': distance_km,
            'predicted_speed_kmh': round(speed, 2),
            'eta_minutes': round(eta, 2),
            'eta_ci': round(eta_ci, 2),
            'eta_formatted': f"{int(eta)} min" if eta < 60 else f"{int(eta//60)}h {int(eta%60)}min"
        }
    
    def compare_routes(self, routes, conditions):
        results = []
        
        for route in routes:
            result = self.predict_route_eta(
                road=route['road'],
                distance_km=route['distance'],
                conditions=conditions
            )
            results.append(result)
        
        results.sort(key=lambda x: x['eta_minutes'])
        
        return results
    
    def get_traffic_summary(self):
        summary = []
        
        for road in self.roads:
            road_data = self.df_full[self.df_full['road'] == road]['avg_speed']
            summary.append({
                'Road': road,
                'Avg Speed': f"{road_data.mean():.1f} km/h",
                'Min Speed': f"{road_data.min():.1f} km/h",
                'Max Speed': f"{road_data.max():.1f} km/h",
                'Std Dev': f"{road_data.std():.1f} km/h"
            })
        
        return pd.DataFrame(summary)

## 3. Initialize and Train the Engine

In [None]:
engine = TrafficPredictionEngine()

df = engine.load_data()

print("\nData Sample:")
display(df.head(10))

In [None]:
results = engine.train_models()

print("\nModel Performance:")
display(results)

## 4. Traffic Summary Statistics

In [None]:
summary = engine.get_traffic_summary()
print("Traffic Summary by Road:")
display(summary)

## 5. Interactive ETA Prediction Dashboard

Use the widgets below to get real-time ETA predictions and route comparisons.

In [None]:
now = datetime.now()

road_input = widgets.Dropdown(
    options=engine.roads,
    value=engine.roads[0],
    description='Road:'
)

distance_input = widgets.FloatText(
    description='Distance (km):',
    value=5.0,
    min=0.1,
    max=50.0
)

hour_input = widgets.IntSlider(
    description='Hour:',
    value=now.hour,
    min=0,
    max=23
)

weekday_input = widgets.Dropdown(
    options=[
        (0, 'Monday'), (1, 'Tuesday'), (2, 'Wednesday'), 
        (3, 'Thursday'), (4, 'Friday'), (5, 'Saturday'), (6, 'Sunday')
    ],
    value=now.weekday(),
    description='Day:'
)

rain_input = widgets.FloatText(
    description='Rain (mm):',
    value=0.0,
    min=0.0,
    max=50.0
)

temp_input = widgets.FloatText(
    description='Temp (Â°C):',
    value=30.0,
    min=20.0,
    max=45.0
)

humidity_input = widgets.FloatText(
    description='Humidity (%):',
    value=70.0,
    min=30.0,
    max=100.0
)

event_options = ['none', 'rush_hour', 'market_day', 'accident']
event_input = widgets.Dropdown(
    options=event_options,
    value='none',
    description='Event:'
)

predict_button = widgets.Button(
    description='Predict ETA',
    button_style='success',
    tooltip='Get ETA prediction'
)

compare_button = widgets.Button(
    description='Compare All Routes',
    button_style='info',
    tooltip='Compare ETAs for all roads'
)

output = widgets.Output()

def on_predict_click(b):
    with output:
        output.clear_output()
        
        conditions = {
            'hour': hour_input.value,
            'weekday': weekday_input.value,
            'rain': rain_input.value,
            'temp': temp_input.value,
            'humidity': humidity_input.value,
            'event': event_input.value
        }
        
        result = engine.predict_route_eta(
            road=road_input.value,
            distance_km=distance_input.value,
            conditions=conditions
        )
        
        print("="*60)
        print(f"ROUTE PREDICTION")
        print("="*60)
        print(f"Road: {result['road']}")
        print(f"Distance: {result['distance_km']} km")
        print(f"Predicted Speed: {result['predicted_speed_kmh']} km/h")
        print(f"\nEstimated Travel Time: {result['eta_formatted']}")
        print("="*60)
        
        speed = result['predicted_speed_kmh']
        if speed >= 45:
            condition = "[SMOOTH] Smooth Traffic"
        elif speed >= 30:
            condition = "[MODERATE] Moderate Traffic"
        elif speed >= 20:
            condition = "[HEAVY] Heavy Traffic"
        else:
            condition = "[SEVERE] Severe Congestion"
        
        print(f"\nTraffic Condition: {condition}")
        print()

def on_compare_click(b):
    with output:
        output.clear_output()
        
        conditions = {
            'hour': hour_input.value,
            'weekday': weekday_input.value,
            'rain': rain_input.value,
            'temp': temp_input.value,
            'humidity': humidity_input.value,
            'event': event_input.value
        }
        
        routes = [
            {'road': road, 'distance': distance_input.value}
            for road in engine.roads
        ]
        
        results = engine.compare_routes(routes, conditions)
        
        print("="*70)
        print(f"ROUTE COMPARISON ({distance_input.value} km on each road)")
        
        for i, result in enumerate(results, 1):
            print(f"\n#{i}: {result['road']}")
            print(f"   Speed: {result['predicted_speed_kmh']} km/h")
            print(f"   ETA: {result['eta_formatted']}")
            
            if i == 1:
                print(f"   [FASTEST ROUTE]")
        
        print("\n" + "="*70)
        
        if len(results) > 1:
            time_saved = results[-1]['eta_minutes'] - results[0]['eta_minutes']
            print(f"\nTaking {results[0]['road']} saves {time_saved:.1f} minutes")
        print()

predict_button.on_click(on_predict_click)
compare_button.on_click(on_compare_click)

print("\n" + "="*70)
print("ðŸš¦ ACCRA TRAFFIC PREDICTION DASHBOARD")

display(widgets.VBox([
    widgets.HTML("<h3>Route Settings</h3>"),
    road_input,
    distance_input,
    widgets.HTML("<h3>Time & Weather Conditions</h3>"),
    hour_input,
    weekday_input,
    rain_input,
    temp_input,
    humidity_input,
    event_input,
    widgets.HTML("<h3>Actions</h3>"),
    widgets.HBox([predict_button, compare_button]),
    output
]))

## 6. Advanced: Batch Predictions

Predict ETAs for multiple time slots to find the optimal departure time.

In [None]:
def find_best_departure_time(road, distance_km, date, start_hour=6, end_hour=22):
    """
    Find the best time to depart by comparing ETAs across different hours.
    """
    results = []
    
    for hour in range(start_hour, end_hour + 1):
        conditions = {
            'hour': hour,
            'weekday': date.weekday(),
            'rain': 0.0,
            'temp': 30.0,
            'humidity': 70.0,
            'event': 'none'
        }
        
        result = engine.predict_route_eta(road, distance_km, conditions)
        
        results.append({
            'Hour': f"{hour:02d}:00",
            'Speed (km/h)': result['predicted_speed_kmh'],
            'ETA (min)': result['eta_minutes'],
            'ETA': result['eta_formatted']
        })
    
    df_results = pd.DataFrame(results)
    best_time = df_results.loc[df_results['ETA (min)'].idxmin()]
    
    print(f"\nBEST DEPARTURE TIME ANALYSIS")
    print(f"Road: {road}")
    print(f"Distance: {distance_km} km")
    print(f"Date: {date.strftime('%A, %B %d, %Y')}")
    print(f"\nBest Time: {best_time['Hour']}")
    print(f"   ETA: {best_time['ETA']}")
    print(f"   Speed: {best_time['Speed (km/h)']} km/h\n")
    
    return df_results

tomorrow = datetime.now() + timedelta(days=1)
schedule = find_best_departure_time('Circle Rd', 10.0, tomorrow)

print("\nComplete Schedule:")
display(schedule)

## 7. Engine Performance Metrics

In [None]:
print("\nENGINE STATUS")
print("="*50)
print(f"Models Trained: {len(engine.models)}")
print(f"Roads Covered: {', '.join(engine.roads)}")
print(f"Features Used: {len(engine.feature_columns)}")
print(f"Predictions Cached: {len(engine.prediction_cache)}")
print(f"Training Records: {len(engine.df_full)}")
print("="*50)

if engine.roads:
    first_road = engine.roads[0]
    model = engine.models[first_road]
    
    importance = pd.DataFrame({
        'Feature': engine.feature_columns,
        'Importance': model.feature_importances_
    }).sort_values('Importance', ascending=False).head(10)
    
    print(f"\nTop 10 Most Important Features ({first_road}):")
    display(importance)

## Notes

### Engine Features:
- **Road-specific models**: Separate XGBoost models trained for each road segment
- **Rich feature engineering**: Time encoding, weather conditions, lag features, rolling statistics
- **Route comparison**: Compare multiple route options simultaneously
- **Prediction caching**: Improves performance for repeated queries
- **Departure time optimization**: Find the best time to travel

### Usage Examples:
```python
# Single prediction
result = engine.predict_route_eta('Circle Rd', 5.0, conditions)

# Compare routes
routes = [{'road': 'Circle Rd', 'distance': 5}, {'road': 'Spintex Rd', 'distance': 5}]
comparison = engine.compare_routes(routes, conditions)

# Get speed only
speed = engine.predict_speed('Circle Rd', hour=8, weekday=0, rain=0, temp=30, humidity=70)
```

### Future Enhancements:
1. Real-time API integration for live traffic data
2. Multi-segment route planning (connect multiple roads)
3. Historical pattern visualization
4. Mobile app integration via REST API
5. Traffic alert notifications
6. Integration with Google Maps/Waze
7. Deep learning models (LSTM) for better temporal predictions