# Traffic Prediction Chatbot Development Notebook

This notebook guides you through the process of building an AI-powered traffic prediction chatbot, containing approximately 138 lines of code and markdown.

# 🚦 AI-Powered Traffic Prediction Chatbot – EDA & Model Logic

## 1. 🎯 Objective
We aim to build an AI chatbot that predicts traffic based on past traffic data.

## 2. 📦 Import Required Libraries
We import libraries for data handling, plotting, and datetime manipulation.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import os

## 3. 📁 Load Dataset
We read the traffic data CSV file and clean column names.

In [None]:
df = pd.read_csv("traffic_data.csv")
df.columns = df.columns.str.strip()

## 4. 🔢 Show Number of Records
We check how many data entries (rows) are present.

In [None]:
print("Number of rows in dataset:", len(df))

## 5. 🧹 Preprocess Columns
We convert datetime and extract hour and date from it.

In [None]:
df['Datetime'] = pd.to_datetime(df['Datetime'], format='%d-%m-%Y %H:%M', dayfirst=True, errors='coerce')
df['Hour'] = df['Datetime'].dt.hour
df['Date'] = df['Datetime'].dt.date

## 6. 🔍 Initial Data Exploration
We look at data samples, types, stats, and missing values.

In [None]:
print(df.head())
print(df.info())
print(df.describe())
print("Missing values in each column:\n", df.isnull().sum())

## 7. 🧾 Dataset Summary
We print a statistical summary like mean, std, etc.

In [None]:
print("\n📊 Summary of the dataset:")
print(df.describe())

## 8. 📆 Year Coverage in Data
We check how many unique years of traffic data we have.

In [None]:
years = df['Datetime'].dt.year.nunique()
print(f"\n📆 Number of years covered in the data: {years}")

## 9. 📈 Traffic Distribution by Hour
We draw a boxplot showing traffic variation for each hour.

In [None]:
plt.figure(figsize=(12,6))
sns.boxplot(x='Hour', y='Count', data=df)
plt.title("Traffic Volume by Hour")
plt.show()

## 10. 📉 Daily Traffic Trend
We plot how traffic changes day by day on average.

In [None]:
daily_avg = df.groupby('Date')['Count'].mean()
daily_avg.plot(figsize=(14,5), title="Daily Average Traffic Volume")
plt.grid(True)
plt.show()

## 11. 📆 Monthly Pattern
We check average traffic volume per month to see seasonal trends.

In [None]:
df['Month'] = df['Datetime'].dt.month
monthly_avg = df.groupby('Month')['Count'].mean()

plt.figure(figsize=(10,5))
plt.plot(monthly_avg.index, monthly_avg.values, marker='o', linestyle='-', color='purple')
plt.title("Average Monthly Traffic Volume")
plt.xlabel("Month")
plt.ylabel("Traffic Volume")
plt.grid(True)
plt.tight_layout()
plt.show()

## 12. ⚙️ Prediction Logic from EDA
Based on traffic count, we classify traffic level:
<=100 → Light
101–200 → Moderate
>200 → Heavy

## 13. 🤖 Core Traffic Prediction Function
This function gives a traffic status based on a 3-hour window.

In [None]:
def predict_traffic(hour_query: int, date_query=None):
    hours = [(hour_query - 1) % 24, hour_query, (hour_query + 1) % 24]
    
    if date_query:
        date_obj = pd.to_datetime(date_query).date()
        filtered = df[(df['Hour'].isin(hours)) & (df['Date'] == date_obj)]
    else:
        filtered = df[df['Hour'].isin(hours)]
    
    avg = filtered['Count'].mean()
    rounded_avg = round(avg) if not pd.isna(avg) else None
    
    if rounded_avg is None:
        return "No data available for the specified hour and date."
    
    if rounded_avg <= 100:
        status = "Light traffic — you're good to go!"
    elif rounded_avg <= 200:
        status = "Moderate — leave a little early."
    else:
        status = "Heavy — plan ahead."
    
    return f"Avg traffic around {hour_query}:00 is {rounded_avg} units. {status}"

## 14. 📊 Plot 3-Hour Bin for a Given Hour
This graph shows traffic trend around a specific hour.

In [None]:
def traffic_prediction_plot(hour):
    hours_to_check = [(hour - 1) % 24, hour, (hour + 1) % 24]
    filtered = df[df['Hour'].isin(hours_to_check)]
    avg_by_hour = filtered.groupby('Hour')['Count'].mean().reindex(hours_to_check)
    smoothed_avg = round(avg_by_hour.mean())

    plt.figure(figsize=(8,5))
    plt.bar(avg_by_hour.index, avg_by_hour.values, color='orange', edgecolor='black')
    plt.title(f"Predicted Traffic Around {hour}:00 (3-Hour Bin)")
    plt.xlabel("Hour of Day")
    plt.ylabel("Average Traffic Volume")
    plt.axhline(smoothed_avg, color='red', linestyle='--', label=f"Smoothed Avg: {smoothed_avg}")
    plt.xticks(hours_to_check)
    plt.legend()
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.tight_layout()
    plt.show()
    
    print(f"🚦 Predicted Traffic at {hour}:00 is approx {smoothed_avg} units.")

## 15. 📅 Predict for Specific Date + Hour
This graph shows traffic trend for a specific date and hour.

In [None]:
def plot_date_hour_prediction(date_str: str, hour: int):
    try:
        date_obj = pd.to_datetime(date_str).date()
    except ValueError:
        print("Invalid date format. Use YYYY-MM-DD.")
        return
    
    hours_to_check = [(hour - 1) % 24, hour, (hour + 1) % 24]
    filtered = df[(df['Datetime'].dt.date == date_obj) & (df['Hour'].isin(hours_to_check))]

    if filtered.empty:
        print(f"No data for {date_str} around {hour}:00.")
        return

    avg_by_hour = filtered.groupby('Hour')['Count'].mean().reindex(hours_to_check)
    smoothed_avg = round(avg_by_hour.mean())

    plt.figure(figsize=(8,5))
    plt.bar(avg_by_hour.index, avg_by_hour.values, color='green', edgecolor='black')
    plt.title(f"Traffic on {date_str} at {hour}:00 (3-Hour Window)")
    plt.xlabel("Hour of Day")
    plt.ylabel("Average Traffic")
    plt.axhline(smoothed_avg, color='red', linestyle='--', label=f"Avg: {smoothed_avg}")
    plt.legend()
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.tight_layout()
    plt.show()

    print(f"📅 On {date_str}, around {hour}:00, predicted traffic: {smoothed_avg} units.")

## 16. ✅ Example Commands to Run

In [None]:
predict_traffic(12)  # General for 12 PM
predict_traffic(9, "2015-07-24")  # Specific date + time
traffic_prediction_plot(16)  # Visualization
plot_date_hour_prediction("2015-07-24", 9)  # Date + Hour plot