# Traffic Volume Estimation with Machine Learning
## Advanced Analytics and Prediction System

This notebook analyzes traffic volume data, builds a predictive model, and creates an interactive web application for traffic volume estimation.

## 1. Data Loading and Exploration

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib
import warnings
warnings.filterwarnings('ignore')

# Load dataset
df = pd.read_csv('traffic_volume.csv')

# Display basic information
print(f"Dataset Shape: {df.shape}")
print("\nFirst 5 rows:")
display(df.head())

print("\nData Summary:")
display(df.describe())

print("\nMissing Values:")
print(df.isnull().sum())

## 2. Data Preprocessing and Feature Engineering

In [None]:
# Convert date_time to datetime format
df['date_time'] = pd.to_datetime(df['date_time'])

# Feature engineering
df['hour'] = df['date_time'].dt.hour
df['day_of_week'] = df['date_time'].dt.dayofweek
df['month'] = df['date_time'].dt.month
df['year'] = df['date_time'].dt.year
df['is_weekend'] = df['day_of_week'].apply(lambda x: 1 if x >= 5 else 0)
df['is_rush_hour'] = df['hour'].apply(lambda x: 1 if (7 <= x <= 9) or (16 <= x <= 18) else 0)

# Prepare features and target
features = df[['holiday', 'temp', 'rain_1h', 'snow_1h', 'weather_main', 
               'month', 'hour', 'day_of_week', 'is_weekend', 'is_rush_hour']]
target = df['traffic_volume']

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    features, target, test_size=0.2, random_state=42
)

## 3. Data Visualization

In [None]:
# Configure plot style
plt.style.use('seaborn-whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)
sns.set_palette('viridis')

# Create subplots
fig, axes = plt.subplots(3, 2, figsize=(15, 15))

# Traffic volume over time
df.set_index('date_time')['traffic_volume'].resample('D').mean().plot(ax=axes[0, 0])
axes[0, 0].set_title('Daily Traffic Volume')
axes[0, 0].set_ylabel('Vehicles per hour')

# Traffic by hour of day
sns.boxplot(x='hour', y='traffic_volume', data=df, ax=axes[0, 1])
axes[0, 1].set_title('Traffic Volume by Hour of Day')

# Traffic by day of week
sns.boxplot(x='day_of_week', y='traffic_volume', data=df, ax=axes[1, 0])
axes[1, 0].set_title('Traffic Volume by Day of Week')
axes[1, 0].set_xticklabels(['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'])

# Traffic by weather
sns.boxplot(x='weather_main', y='traffic_volume', data=df, ax=axes[1, 1])
axes[1, 1].set_title('Traffic Volume by Weather Condition')
axes[1, 1].tick_params(axis='x', rotation=45)

# Correlation heatmap
corr = df[['traffic_volume', 'temp', 'rain_1h', 'snow_1h', 'clouds_all']].corr()
sns.heatmap(corr, annot=True, cmap='coolwarm', ax=axes[2, 0])
axes[2, 0].set_title('Feature Correlation Matrix')

# Temporal patterns
hourly_traffic = df.groupby('hour')['traffic_volume'].mean()
hourly_traffic.plot(ax=axes[2, 1], marker='o')
axes[2, 1].set_title('Average Hourly Traffic Patterns')
axes[2, 1].set_ylabel('Vehicles per hour')
axes[2, 1].set_xlabel('Hour of Day')

plt.tight_layout()
plt.savefig('traffic_analysis.png')
plt.show()

## 4. Model Building and Evaluation

In [None]:
# Preprocessing pipeline
numeric_features = ['temp', 'rain_1h', 'snow_1h']
categorical_features = ['holiday', 'weather_main', 'month', 'hour', 'day_of_week']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])

# Create pipeline
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=200, random_state=42, n_jobs=-1))
])

# Train model
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"\nModel Evaluation:")
print(f"RMSE: {rmse:.2f}")
print(f"R² Score: {r2:.2f}")

# Save model
joblib.dump(model, 'traffic_volume_model.pkl')
print("\nModel saved as 'traffic_volume_model.pkl'")

## 5. Feature Importance Analysis

In [None]:
# Get feature importances
feature_importances = model.named_steps['regressor'].feature_importances_

# Get feature names
cat_encoder = model.named_steps['preprocessor'].named_transformers_['cat']
cat_feature_names = cat_encoder.get_feature_names_out(categorical_features)
all_feature_names = np.concatenate([numeric_features, cat_feature_names])

# Create importance dataframe
importance_df = pd.DataFrame({
    'Feature': all_feature_names,
    'Importance': feature_importances
}).sort_values('Importance', ascending=False).head(15)

# Plot feature importances
plt.figure(figsize=(12, 8))
sns.barplot(x='Importance', y='Feature', data=importance_df)
plt.title('Top 15 Feature Importances')
plt.xlabel('Importance Score')
plt.ylabel('Feature')
plt.tight_layout()
plt.savefig('feature_importance.png')
plt.show()

## 6. Web Application for Traffic Volume Estimation

In [None]:
%%writefile traffic_app.py
import streamlit as st
import pandas as pd
import joblib
import datetime

# Load the trained model
model = joblib.load('traffic_volume_model.pkl')

# Weather options
weather_options = ['Clear', 'Clouds', 'Rain', 'Snow', 'Mist', 'Drizzle']
holiday_options = ['None', 'Christmas', 'New Year', 'Thanksgiving', 'Independence Day']

def predict_traffic(input_data):
    """Predict traffic volume based on input features"""
    # Create input DataFrame
    input_df = pd.DataFrame([input_data])
    
    # Predict
    prediction = model.predict(input_df)[0]
    return max(0, int(prediction))  # Ensure non-negative value

def main():
    # Configure page
    st.set_page_config(
        page_title="TrafficTelligence",
        page_icon="🚦",
        layout="centered",
        initial_sidebar_state="collapsed"
    )
    
    # Custom CSS
    st.markdown("""
    <style>
        .stApp {
            background: linear-gradient(135deg, #1a2a6c, #b21f1f, #1a2a6c);
            background-size: 400% 400%;
            animation: gradient 15s ease infinite;
        }
        @keyframes gradient {
            0% { background-position: 0% 50%; }
            50% { background-position: 100% 50%; }
            100% { background-position: 0% 50%; }
        }
        .main-container {
            background-color: white;
            border-radius: 20px;
            padding: 2rem;
            box-shadow: 0 10px 30px rgba(0, 0, 0, 0.3);
            margin: 2rem auto;
            max-width: 700px;
        }
        .header {
            background: #1a2a6c;
            color: white;
            padding: 1.5rem;
            border-radius: 15px 15px 0 0;
            margin: -2rem -2rem 2rem -2rem;
        }
        .prediction-result {
            text-align: center;
            padding: 2rem;
            margin: 1rem 0;
        }
        .prediction-value {
            font-size: 5rem;
            font-weight: 800;
            color: #e44d26;
            margin: 1rem 0;
        }
        .stButton>button {
            background: linear-gradient(to right, #1a2a6c, #3a5ec0);
            color: white;
            font-weight: bold;
            padding: 0.75rem;
            border-radius: 12px;
            border: none;
            width: 100%;
            font-size: 1.1rem;
        }
    </style>
    """, unsafe_allow_html=True)
    
    # Current date and time
    now = datetime.datetime.now()
    
    with st.container():
        st.markdown('<div class="main-container">', unsafe_allow_html=True)
        
        # Header
        st.markdown('<div class="header">', unsafe_allow_html=True)
        st.title('🚦 Traffic Volume Estimation')
        st.markdown('<h3 style="color:white; text-align:center;">Advanced Traffic Volume Estimation with Machine Learning</h3>', 
                   unsafe_allow_html=True)
        st.markdown('</div>', unsafe_allow_html=True)
        
        # Input form
        with st.form("traffic_form"):
            col1, col2 = st.columns(2)
            
            with col1:
                holiday = st.selectbox("Holiday", holiday_options, index=0)
                temp = st.number_input("Temperature (°C)", value=22.0, min_value=-30.0, max_value=50.0)
                rain = st.number_input("Rain (mm)", value=0.0, min_value=0.0, max_value=100.0)
                snow = st.number_input("Snow (cm)", value=0.0, min_value=0.0, max_value=100.0)
                
            with col2:
                weather = st.selectbox("Weather", weather_options, index=1)
                year = st.number_input("Year", value=now.year, min_value=2000, max_value=2030)
                month = st.number_input("Month", value=now.month, min_value=1, max_value=12)
                day = st.number_input("Day", value=now.day, min_value=1, max_value=31)
                
            col3, col4 = st.columns(2)
            with col3:
                hour = st.slider("Hour", 0, 23, now.hour)
            with col4:
                minute = st.slider("Minute", 0, 59, now.minute)
            
            submit_button = st.form_submit_button("Predict Traffic Volume")
        
        # Prediction result
        if submit_button:
            # Prepare input data
            input_data = {
                'holiday': holiday,
                'temp': temp,
                'rain_1h': rain,
                'snow_1h': snow,
                'weather_main': weather,
                'month': month,
                'hour': hour,
                'day_of_week': now.weekday(),
                'is_weekend': 1 if now.weekday() >= 5 else 0,
                'is_rush_hour': 1 if (7 <= hour <= 9) or (16 <= hour <= 18) else 0
            }
            
            # Get prediction
            prediction = predict_traffic(input_data)
            
            # Display result
            st.markdown('<div class="prediction-result">', unsafe_allow_html=True)
            st.markdown('<h2>Predicted Traffic Volume</h2>', unsafe_allow_html=True)
            st.markdown(f'<div class="prediction-value">{prediction}</div>', unsafe_allow_html=True)
            st.markdown('<h3>VEHICLES PER HOUR</h3>', unsafe_allow_html=True)
            st.markdown('</div>', unsafe_allow_html=True)
            
            # Interpretation
            if prediction < 500:
                st.success("Light Traffic: Roads are clear and moving efficiently.")
            elif prediction < 1500:
                st.info("Moderate Traffic: Normal traffic conditions. Expect minor delays.")
            elif prediction < 2500:
                st.warning("Heavy Traffic: Significant congestion expected. Consider alternative routes.")
            else:
                st.error("Severe Traffic: Extreme congestion. Avoid travel if possible.")
        
        st.markdown('</div>', unsafe_allow_html=True)

if __name__ == "__main__":
    main()