# Cambridge Bikes Data Unification and Model Training

This notebook will:
1. Clean and unify data from three sources:
   - Bicycle crashes data (cleaned)
   - City bike count data
   - Eco-totem automatic counter data
2. Create features for TensorFlow model training
3. Train models to predict bicycle counts and accident severity


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import tensorflow as tf
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

# Set up plotting
plt.style.use('default')
sns.set_palette("husl")

print("Libraries imported successfully")


In [None]:
# Load the three datasets
print("Loading datasets...")

# 1. Bicycle crashes data (already cleaned)
crashes_df = pd.read_csv('data/processed/bicycle_crashes_cleaned.csv')
print(f"Crashes data shape: {crashes_df.shape}")

# 2. City bike count data
city_count_df = pd.read_csv('data/city_bike_count.csv')
print(f"City count data shape: {city_count_df.shape}")

# 3. Eco-totem data
eco_totem_df = pd.read_csv('data/eco_totem.csv')
print(f"Eco-totem data shape: {eco_totem_df.shape}")

print("\nDatasets loaded successfully!")


In [None]:
# Examine the structure of each dataset
print("=== CRASHES DATA ===")
print(crashes_df.columns.tolist())
print("\nSample data:")
print(crashes_df.head(2))

print("\n=== CITY COUNT DATA ===")
print(city_count_df.columns.tolist())
print("\nSample data:")
print(city_count_df.head(2))

print("\n=== ECO-TOTEM DATA ===")
print(eco_totem_df.columns.tolist())
print("\nSample data:")
print(eco_totem_df.head(2))


In [None]:
# Examine intersection names in crashes data
print("=== CRASHES DATA INTERSECTIONS ===")
print(f"Unique intersections: {crashes_df['Intersection_ID'].nunique()}")
print("\nTop 10 intersections by crash count:")
print(crashes_df['Intersection_ID'].value_counts().head(10))

print("\n=== CITY COUNT DATA LOCATIONS ===")
print(f"Unique count locations: {city_count_df['Count Location'].nunique()}")
print("\nTop 10 count locations:")
print(city_count_df['Count Location'].value_counts().head(10))


In [None]:
# Create intersection name mapping function
def normalize_intersection_name(name):
    """Normalize intersection names to a common format"""
    if pd.isna(name):
        return None
    
    # Convert to uppercase and clean
    name = str(name).upper().strip()
    
    # Handle different formats
    if '_AND_' in name:
        # Format: "STREET1_AND_STREET2"
        parts = name.split('_AND_')
        if len(parts) == 2:
            return f"{parts[0].strip()} & {parts[1].strip()}"
    elif ' & ' in name:
        # Format: "Street1 & Street2" 
        return name
    
    # Single street names
    return name

# Test the function
test_names = [
    'BEECH STREET_AND_MASSACHUSETTS AVENUE',
    'MASSACHUSETTS AVENUE',
    'Concord Ave & Garden St',
    'Broadway & Hampshire St'
]

print("Testing intersection name normalization:")
for name in test_names:
    normalized = normalize_intersection_name(name)
    print(f"'{name}' -> '{normalized}'")


In [None]:
# Create weather condition mapping function
def normalize_weather_condition(weather):
    """Normalize weather conditions to standard categories"""
    if pd.isna(weather):
        return 'UNKNOWN'
    
    weather = str(weather).upper().strip()
    
    # Map to standard categories
    if any(word in weather for word in ['CLEAR', 'SUNNY']):
        return 'CLEAR'
    elif any(word in weather for word in ['CLOUDY', 'OVERCAST', 'BROKEN CLOUDS', 'SCATTERED CLOUDS', 'FEW CLOUDS', 'PARTIALLY CLOUDY', 'PARTLY CLOUDY', 'MOSTLY CLOUDY']):
        return 'CLOUDY'
    elif any(word in weather for word in ['RAIN', 'DRIZZLE']):
        return 'RAIN'
    elif any(word in weather for word in ['SNOW', 'SLEET', 'HAIL', 'FREEZING']):
        return 'SNOW'
    else:
        return 'UNKNOWN'

# Test the function
test_weather = [
    'CLEAR',
    'CLOUDY', 
    'RAIN',
    'SNOW',
    'Clear',
    'Partially cloudy',
    'Light rain',
    'Overcast clouds'
]

print("Testing weather condition normalization:")
for weather in test_weather:
    normalized = normalize_weather_condition(weather)
    print(f"'{weather}' -> '{normalized}'")


In [None]:
# Process crashes data
print("=== PROCESSING CRASHES DATA ===")

# Create normalized intersection names
crashes_df['normalized_intersection'] = crashes_df['Intersection_ID'].apply(normalize_intersection_name)

# Normalize weather conditions
crashes_df['normalized_weather'] = crashes_df['Weather Condition 1'].apply(normalize_weather_condition)

# Convert datetime
crashes_df['datetime'] = pd.to_datetime(crashes_df['Date Time'])

# Create time features
crashes_df['hour'] = crashes_df['datetime'].dt.hour
crashes_df['minute'] = crashes_df['datetime'].dt.minute
crashes_df['time_15min'] = (crashes_df['hour'] * 4 + crashes_df['minute'] // 15) % 96  # 0-95 for 15-min intervals
crashes_df['day_of_week'] = crashes_df['datetime'].dt.dayofweek  # 0=Monday, 6=Sunday
crashes_df['month'] = crashes_df['datetime'].dt.month

# Create accident severity categories (as floats for model)
def get_accident_severity(row):
    """Convert injury data to severity scores (0-1 range)"""
    p1_injury = str(row['P1 Injury']).upper() if pd.notna(row['P1 Injury']) else ''
    p2_injury = str(row['P2 Injury']).upper() if pd.notna(row['P2 Injury']) else ''
    
    # Check for severe injuries
    if any(severity in p1_injury or severity in p2_injury for severity in ['FATAL', 'SUSPECTED SERIOUS', 'SERIOUS']):
        return 1.0  # Severe
    elif any(severity in p1_injury or severity in p2_injury for severity in ['SUSPECTED MINOR', 'MINOR']):
        return 0.5  # Moderate
    else:
        return 0.1  # Light (no apparent injury but still an accident)

crashes_df['accident_severity'] = crashes_df.apply(get_accident_severity, axis=1)

print(f"Processed crashes data: {crashes_df.shape}")
print(f"Unique normalized intersections: {crashes_df['normalized_intersection'].nunique()}")
print(f"Weather distribution: {crashes_df['normalized_weather'].value_counts().to_dict()}")
print(f"Accident severity distribution: {crashes_df['accident_severity'].value_counts().to_dict()}")


In [None]:
# Process city bike count data
print("=== PROCESSING CITY BIKE COUNT DATA ===")

# Create normalized intersection names
city_count_df['normalized_intersection'] = city_count_df['Count Location'].apply(normalize_intersection_name)

# Normalize weather conditions
city_count_df['normalized_weather'] = city_count_df['Weather'].apply(normalize_weather_condition)

# Convert datetime
city_count_df['datetime'] = pd.to_datetime(city_count_df['Date'] + ' ' + city_count_df['Time'])

# Create time features
city_count_df['hour'] = city_count_df['datetime'].dt.hour
city_count_df['minute'] = city_count_df['datetime'].dt.minute
city_count_df['time_15min'] = (city_count_df['hour'] * 4 + city_count_df['minute'] // 15) % 96
city_count_df['day_of_week'] = city_count_df['datetime'].dt.dayofweek
city_count_df['month'] = city_count_df['datetime'].dt.month

# Aggregate counts by intersection and time period (sum all movement types)
city_count_agg = city_count_df.groupby([
    'normalized_intersection', 'datetime', 'time_15min', 'day_of_week', 
    'month', 'normalized_weather', 'Temperature'
]).agg({
    'Count': 'sum'
}).reset_index()

print(f"Processed city count data: {city_count_agg.shape}")
print(f"Unique normalized intersections: {city_count_agg['normalized_intersection'].nunique()}")
print(f"Weather distribution: {city_count_agg['normalized_weather'].value_counts().to_dict()}")
print(f"Date range: {city_count_agg['datetime'].min()} to {city_count_agg['datetime'].max()}")
print(f"Average daily count: {city_count_agg['Count'].mean():.1f}")


In [None]:
# Process eco-totem data
print("=== PROCESSING ECO-TOTEM DATA ===")

# Convert datetime
eco_totem_df['datetime'] = pd.to_datetime(eco_totem_df['DateTime'])

# Create time features
eco_totem_df['hour'] = eco_totem_df['datetime'].dt.hour
eco_totem_df['minute'] = eco_totem_df['datetime'].dt.minute
eco_totem_df['time_15min'] = (eco_totem_df['hour'] * 4 + eco_totem_df['minute'] // 15) % 96
eco_totem_df['day_of_week'] = eco_totem_df['datetime'].dt.dayofweek
eco_totem_df['month'] = eco_totem_df['datetime'].dt.month

# Eco-totem is at Broadway location (based on description)
eco_totem_df['normalized_intersection'] = 'BROADWAY'

# We don't have weather data for eco-totem, so we'll need to fill this later
eco_totem_df['normalized_weather'] = 'UNKNOWN'

print(f"Processed eco-totem data: {eco_totem_df.shape}")
print(f"Date range: {eco_totem_df['datetime'].min()} to {eco_totem_df['datetime'].max()}")
print(f"Average daily total: {eco_totem_df['Total'].mean():.1f}")
print(f"Peak hour average: {eco_totem_df.groupby('hour')['Total'].mean().max():.1f}")


In [None]:
# Create unified dataset for model training
print("=== CREATING UNIFIED DATASET ===")

# First, let's get weather data for eco-totem by interpolating from city count data
# We'll use the closest available weather data by date

# Create a weather lookup from city count data
weather_lookup = city_count_agg[['datetime', 'normalized_weather', 'Temperature']].drop_duplicates()
weather_lookup['date'] = weather_lookup['datetime'].dt.date

# For eco-totem data, find closest weather data
def get_weather_for_date(target_date, weather_lookup):
    """Get weather data for a given date, using closest available date"""
    target_date_only = target_date.date()
    
    # Find exact match first
    exact_match = weather_lookup[weather_lookup['date'] == target_date_only]
    if not exact_match.empty:
        return exact_match.iloc[0]['normalized_weather'], exact_match.iloc[0]['Temperature']
    
    # Find closest date within 7 days
    weather_lookup['date_diff'] = abs((weather_lookup['date'] - target_date_only).dt.days)
    closest = weather_lookup[weather_lookup['date_diff'] <= 7].sort_values('date_diff')
    
    if not closest.empty:
        return closest.iloc[0]['normalized_weather'], closest.iloc[0]['Temperature']
    
    # Default to clear weather if no data found
    return 'CLEAR', 20.0

# Apply weather lookup to eco-totem data
print("Filling weather data for eco-totem...")
weather_data = []
temp_data = []

for _, row in eco_totem_df.iterrows():
    weather, temp = get_weather_for_date(row['datetime'], weather_lookup)
    weather_data.append(weather)
    temp_data.append(temp)

eco_totem_df['normalized_weather'] = weather_data
eco_totem_df['Temperature'] = temp_data

print(f"Eco-totem weather distribution: {eco_totem_df['normalized_weather'].value_counts().to_dict()}")


In [None]:
# Create unified dataset by combining all three sources
print("=== COMBINING ALL DATA SOURCES ===")

# Prepare city count data for merging
city_count_unified = city_count_agg[['normalized_intersection', 'datetime', 'time_15min', 
                                    'day_of_week', 'month', 'normalized_weather', 'Temperature', 'Count']].copy()
city_count_unified['data_source'] = 'city_count'
city_count_unified['accident_severity'] = 0.0  # No accidents in count data

# Prepare eco-totem data for merging
eco_totem_unified = eco_totem_df[['normalized_intersection', 'datetime', 'time_15min', 
                                 'day_of_week', 'month', 'normalized_weather', 'Temperature', 'Total']].copy()
eco_totem_unified = eco_totem_unified.rename(columns={'Total': 'Count'})
eco_totem_unified['data_source'] = 'eco_totem'
eco_totem_unified['accident_severity'] = 0.0  # No accidents in count data

# Prepare crashes data for merging (we need to create count data from crashes)
# For crashes, we'll create a sparse dataset where most time periods have 0 accidents
crashes_unified = crashes_df[['normalized_intersection', 'datetime', 'time_15min', 
                             'day_of_week', 'month', 'normalized_weather', 'accident_severity']].copy()
crashes_unified['Count'] = 0  # Crashes don't represent bike counts
crashes_unified['Temperature'] = 20.0  # Default temperature for crashes
crashes_unified['data_source'] = 'crashes'

# Combine all datasets
unified_df = pd.concat([city_count_unified, eco_totem_unified, crashes_unified], ignore_index=True)

print(f"Unified dataset shape: {unified_df.shape}")
print(f"Data sources: {unified_df['data_source'].value_counts().to_dict()}")
print(f"Unique intersections: {unified_df['normalized_intersection'].nunique()}")
print(f"Date range: {unified_df['datetime'].min()} to {unified_df['datetime'].max()}")
print(f"Total bike count records: {len(unified_df[unified_df['Count'] > 0])}")
print(f"Total accident records: {len(unified_df[unified_df['accident_severity'] > 0])}")


In [None]:
# Create a comprehensive time-series dataset for model training
print("=== CREATING TIME-SERIES DATASET FOR MODEL TRAINING ===")

# We need to create a complete time series for each intersection
# This will help us predict both bike counts and accident probabilities

# Get all unique intersections
all_intersections = unified_df['normalized_intersection'].unique()
print(f"Total unique intersections: {len(all_intersections)}")

# Create a complete time grid (15-minute intervals for a reasonable time range)
# We'll focus on the most recent data (2015-2024) for better model performance
start_date = pd.to_datetime('2015-01-01')
end_date = pd.to_datetime('2024-12-31')

# Create 15-minute time intervals
time_intervals = pd.date_range(start=start_date, end=end_date, freq='15min')
print(f"Total time intervals: {len(time_intervals)}")

# Create a base dataset with all intersections and time intervals
# This is computationally intensive, so we'll sample strategically
print("Creating base time series...")

# Sample intersections (focus on those with most data)
intersection_counts = unified_df['normalized_intersection'].value_counts()
top_intersections = intersection_counts.head(10).index.tolist()  # Top 10 intersections
print(f"Focusing on top intersections: {top_intersections}")

# Sample time periods (focus on recent years and key months)
recent_years = [2020, 2021, 2022, 2023, 2024]
key_months = [3, 4, 5, 6, 7, 8, 9, 10]  # Spring to Fall

# Create base dataset for top intersections
base_data = []
for intersection in top_intersections:
    for year in recent_years:
        for month in key_months:
            # Get all 15-minute intervals for this month
            month_start = pd.to_datetime(f'{year}-{month:02d}-01')
            if month == 12:
                month_end = pd.to_datetime(f'{year+1}-01-01')
            else:
                month_end = pd.to_datetime(f'{year}-{month+1:02d}-01')
            
            month_intervals = pd.date_range(start=month_start, end=month_end, freq='15min')
            
            for dt in month_intervals:
                base_data.append({
                    'normalized_intersection': intersection,
                    'datetime': dt,
                    'time_15min': (dt.hour * 4 + dt.minute // 15) % 96,
                    'day_of_week': dt.dayofweek,
                    'month': dt.month
                })

base_df = pd.DataFrame(base_data)
print(f"Base dataset shape: {base_df.shape}")

# Merge with actual data
model_df = base_df.merge(
    unified_df[['normalized_intersection', 'datetime', 'normalized_weather', 'Temperature', 'Count', 'accident_severity']], 
    on=['normalized_intersection', 'datetime'], 
    how='left'
)

# Fill missing values
model_df['Count'] = model_df['Count'].fillna(0)
model_df['accident_severity'] = model_df['accident_severity'].fillna(0)
model_df['normalized_weather'] = model_df['normalized_weather'].fillna('CLEAR')
model_df['Temperature'] = model_df['Temperature'].fillna(20.0)

print(f"Model dataset shape: {model_df.shape}")
print(f"Records with bike counts > 0: {len(model_df[model_df['Count'] > 0])}")
print(f"Records with accidents > 0: {len(model_df[model_df['accident_severity'] > 0])}")
print(f"Weather distribution: {model_df['normalized_weather'].value_counts().to_dict()}")


In [None]:
# Prepare data for TensorFlow model training
print("=== PREPARING DATA FOR TENSORFLOW MODEL ===")

# Focus on the most recent and complete data
# Filter to recent years and top intersections
recent_data = unified_df[unified_df['datetime'] >= '2020-01-01'].copy()

# Get top intersections by data volume
intersection_counts = recent_data['normalized_intersection'].value_counts()
top_intersections = intersection_counts.head(8).index.tolist()  # Top 8 intersections
print(f"Top intersections for model: {top_intersections}")

# Filter to top intersections
model_data = recent_data[recent_data['normalized_intersection'].isin(top_intersections)].copy()

# Create features for the model
print("Creating model features...")

# Encode categorical variables
from sklearn.preprocessing import LabelEncoder

# Intersection encoding
intersection_encoder = LabelEncoder()
model_data['intersection_encoded'] = intersection_encoder.fit_transform(model_data['normalized_intersection'])

# Weather encoding
weather_encoder = LabelEncoder()
model_data['weather_encoded'] = weather_encoder.fit_transform(model_data['normalized_weather'])

# Create additional time features
model_data['hour_sin'] = np.sin(2 * np.pi * model_data['datetime'].dt.hour / 24)
model_data['hour_cos'] = np.cos(2 * np.pi * model_data['datetime'].dt.hour / 24)
model_data['day_sin'] = np.sin(2 * np.pi * model_data['day_of_week'] / 7)
model_data['day_cos'] = np.cos(2 * np.pi * model_data['day_of_week'] / 7)
model_data['month_sin'] = np.sin(2 * np.pi * model_data['month'] / 12)
model_data['month_cos'] = np.cos(2 * np.pi * model_data['month'] / 12)

# Normalize temperature
from sklearn.preprocessing import StandardScaler
temp_scaler = StandardScaler()
model_data['temperature_normalized'] = temp_scaler.fit_transform(model_data[['Temperature']])

print(f"Model data shape: {model_data.shape}")
print(f"Date range: {model_data['datetime'].min()} to {model_data['datetime'].max()}")
print(f"Records with bike counts > 0: {len(model_data[model_data['Count'] > 0])}")
print(f"Records with accidents > 0: {len(model_data[model_data['accident_severity'] > 0])}")

# Save the processed data
model_data.to_csv('data/processed/unified_model_data.csv', index=False)
print("Saved unified model data to data/processed/unified_model_data.csv")


In [None]:
# Create and train TensorFlow model
print("=== CREATING TENSORFLOW MODEL ===")

# Prepare features and targets
feature_columns = [
    'time_15min', 'intersection_encoded', 'weather_encoded', 
    'hour_sin', 'hour_cos', 'day_sin', 'day_cos', 'month_sin', 'month_cos',
    'temperature_normalized'
]

X = model_data[feature_columns].values
y_bike_count = model_data['Count'].values
y_accident_severity = model_data['accident_severity'].values

print(f"Feature matrix shape: {X.shape}")
print(f"Bike count target shape: {y_bike_count.shape}")
print(f"Accident severity target shape: {y_accident_severity.shape}")

# Split data for training and validation
from sklearn.model_selection import train_test_split

X_train, X_test, y_bike_train, y_bike_test, y_accident_train, y_accident_test = train_test_split(
    X, y_bike_count, y_accident_severity, test_size=0.2, random_state=42
)

print(f"Training set size: {X_train.shape[0]}")
print(f"Test set size: {X_test.shape[0]}")

# Normalize features
from sklearn.preprocessing import StandardScaler
feature_scaler = StandardScaler()
X_train_scaled = feature_scaler.fit_transform(X_train)
X_test_scaled = feature_scaler.transform(X_test)

# Create TensorFlow model for dual prediction
print("Building TensorFlow model...")

# Input layer
inputs = tf.keras.Input(shape=(X_train_scaled.shape[1],), name='features')

# Shared hidden layers
x = tf.keras.layers.Dense(128, activation='relu', name='hidden1')(inputs)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Dense(64, activation='relu', name='hidden2')(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Dense(32, activation='relu', name='hidden3')(x)

# Two output heads
bike_count_output = tf.keras.layers.Dense(1, activation='relu', name='bike_count')(x)
accident_severity_output = tf.keras.layers.Dense(1, activation='sigmoid', name='accident_severity')(x)

# Create model
model = tf.keras.Model(
    inputs=inputs,
    outputs=[bike_count_output, accident_severity_output],
    name='bicycle_prediction_model'
)

# Compile model
model.compile(
    optimizer='adam',
    loss={
        'bike_count': 'mse',
        'accident_severity': 'mse'
    },
    loss_weights={
        'bike_count': 1.0,
        'accident_severity': 10.0  # Higher weight for accident prediction
    },
    metrics={
        'bike_count': ['mae'],
        'accident_severity': ['mae']
    }
)

print("Model architecture:")
model.summary()


In [None]:
# Train the model
print("=== TRAINING TENSORFLOW MODEL ===")

# Define callbacks
callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=1e-6
    )
]

# Train the model
history = model.fit(
    X_train_scaled,
    [y_bike_train, y_accident_train],
    validation_data=(X_test_scaled, [y_bike_test, y_accident_test]),
    epochs=100,
    batch_size=512,
    callbacks=callbacks,
    verbose=1
)

print("Training completed!")

# Evaluate the model
print("=== MODEL EVALUATION ===")
test_loss = model.evaluate(X_test_scaled, [y_bike_test, y_accident_test], verbose=0)
print(f"Test loss: {test_loss}")

# Make predictions
predictions = model.predict(X_test_scaled)
bike_predictions = predictions[0].flatten()
accident_predictions = predictions[1].flatten()

print(f"Bike count predictions - Mean: {bike_predictions.mean():.2f}, Std: {bike_predictions.std():.2f}")
print(f"Accident severity predictions - Mean: {accident_predictions.mean():.3f}, Std: {accident_predictions.std():.3f}")

# Calculate some basic metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

bike_mae = mean_absolute_error(y_bike_test, bike_predictions)
bike_mse = mean_squared_error(y_bike_test, bike_predictions)
bike_r2 = r2_score(y_bike_test, bike_predictions)

accident_mae = mean_absolute_error(y_accident_test, accident_predictions)
accident_mse = mean_squared_error(y_accident_test, accident_predictions)

print(f"\\nBike Count Prediction Metrics:")
print(f"  MAE: {bike_mae:.2f}")
print(f"  MSE: {bike_mse:.2f}")
print(f"  RÂ²: {bike_r2:.3f}")

print(f"\\nAccident Severity Prediction Metrics:")
print(f"  MAE: {accident_mae:.3f}")
print(f"  MSE: {accident_mse:.3f}")


In [None]:
# Save the trained model and preprocessing objects
print("=== SAVING MODEL AND PREPROCESSORS ===")

# Save the TensorFlow model
model.save('models/bicycle_prediction_model.h5')
print("Saved TensorFlow model to models/bicycle_prediction_model.h5")

# Save preprocessing objects
import pickle
import os

# Create models directory if it doesn't exist
os.makedirs('models', exist_ok=True)

# Save encoders and scalers
with open('models/intersection_encoder.pkl', 'wb') as f:
    pickle.dump(intersection_encoder, f)

with open('models/weather_encoder.pkl', 'wb') as f:
    pickle.dump(weather_encoder, f)

with open('models/temp_scaler.pkl', 'wb') as f:
    pickle.dump(temp_scaler, f)

with open('models/feature_scaler.pkl', 'wb') as f:
    pickle.dump(feature_scaler, f)

print("Saved preprocessing objects to models/ directory")

# Create a prediction function for easy use
def predict_bicycle_traffic(intersection, time_15min, weather, temperature, day_of_week, month):
    """
    Predict bicycle count and accident severity for given conditions
    
    Args:
        intersection: Intersection name (string)
        time_15min: Time in 15-minute increments (0-95)
        weather: Weather condition ('CLEAR', 'CLOUDY', 'RAIN', 'SNOW')
        temperature: Temperature in Fahrenheit
        day_of_week: Day of week (0=Monday, 6=Sunday)
        month: Month (1-12)
    
    Returns:
        tuple: (bike_count_prediction, accident_severity_prediction)
    """
    # Encode inputs
    intersection_encoded = intersection_encoder.transform([intersection])[0]
    weather_encoded = weather_encoder.transform([weather])[0]
    
    # Create time features
    hour = time_15min // 4
    hour_sin = np.sin(2 * np.pi * hour / 24)
    hour_cos = np.cos(2 * np.pi * hour / 24)
    day_sin = np.sin(2 * np.pi * day_of_week / 7)
    day_cos = np.cos(2 * np.pi * day_of_week / 7)
    month_sin = np.sin(2 * np.pi * month / 12)
    month_cos = np.cos(2 * np.pi * month / 12)
    
    # Normalize temperature
    temp_normalized = temp_scaler.transform([[temperature]])[0][0]
    
    # Create feature vector
    features = np.array([[
        time_15min, intersection_encoded, weather_encoded,
        hour_sin, hour_cos, day_sin, day_cos, month_sin, month_cos,
        temp_normalized
    ]])
    
    # Scale features
    features_scaled = feature_scaler.transform(features)
    
    # Make prediction
    predictions = model.predict(features_scaled, verbose=0)
    bike_count = predictions[0][0][0]
    accident_severity = predictions[1][0][0]
    
    return bike_count, accident_severity

# Test the prediction function
print("\\n=== TESTING PREDICTION FUNCTION ===")
test_prediction = predict_bicycle_traffic(
    intersection='BROADWAY',
    time_15min=32,  # 8:00 AM
    weather='CLEAR',
    temperature=70,
    day_of_week=0,  # Monday
    month=6  # June
)

print(f"Test prediction for Broadway at 8:00 AM on Monday in June:")
print(f"  Predicted bike count: {test_prediction[0]:.1f}")
print(f"  Predicted accident severity: {test_prediction[1]:.3f}")

print("\\nModel training and setup completed successfully!")
