In [1]:
!pip install opencv-python numpy ultralytics transformers pillow matplotlib seaborn pandas torch reportlab

Collecting ultralytics
  Downloading ultralytics-8.3.111-py3-none-any.whl.metadata (37 kB)
Collecting reportlab
  Downloading reportlab-4.4.0-py3-none-any.whl.metadata (1.8 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch)
  Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-

In [5]:
import tensorflow as tf
print(tf.config.list_physical_devices('GPU'))


2025-04-20 06:50:20.867993: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745131821.315320      74 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745131821.448410      74 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]


In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
from transformers import pipeline
from PIL import Image
import re
from collections import defaultdict
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import os
import torch
import gc
from reportlab.lib.pagesizes import letter
from reportlab.lib import colors
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image as ReportLabImage
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
import uuid

# Clear any lingering GPU memory
if torch.cuda.is_available():
    torch.cuda.empty_cache()
gc.collect()

# Verify input files
model_path = r"/kaggle/input/test-file/best.pt"
video_path = r"/kaggle/input/test-file/test_10s.mp4"
if not os.path.exists(model_path) or not os.path.exists(video_path):
    print("Error: Model or video file not found.")
    exit()

# Load YOLO model
try:
    model = YOLO(model_path)
    print("YOLO model loaded successfully.")
except Exception as e:
    print(f"Error loading YOLO model: {e}")
    exit()

# Video setup
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    print(f"Error: Could not open video at {video_path}")
    exit()
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
if total_frames == 0:
    print("Error: Video has no frames.")
    exit()
middle_index = total_frames // 2
fps = cap.get(cv2.CAP_PROP_FPS)
selected_frame = None
peak_frame = None
vehicle_counts = {}
track_id_to_class = {}
frame_vehicle_counts = []
frame_index = 0
max_vehicles = 0
max_frame_index = 0
emergency_alerts = []
track_positions = defaultdict(list)
average_speeds = {}
congestion_indices = []

# Process video
print("Processing video...")
while cap.isOpened():
    success, frame = cap.read()
    if not success:
        break
    frame = cv2.resize(frame, (700, 500))
    results = model.track(frame, persist=True)
    boxes = results[0].boxes.xyxy.cpu().numpy()
    confidences = results[0].boxes.conf.cpu().numpy()
    classes = results[0].boxes.cls.cpu().numpy().astype(int)
    track_ids = results[0].boxes.id.cpu().numpy() if results[0].boxes.id is not None else []

    # Unique vehicle counts and speed estimation
    current_frame_counts = defaultdict(int)
    for conf, cls, track_id, box in zip(confidences, classes, track_ids, boxes):
        if conf < 0.5:
            continue
        label = results[0].names[cls]
        if track_id not in track_id_to_class:
            track_id_to_class[track_id] = label
            vehicle_counts[label] = vehicle_counts.get(label, 0) + 1
        current_frame_counts[label] += 1
        # Track position for speed
        center_x = (box[0] + box[2]) / 2
        center_y = (box[1] + box[3]) / 2
        track_positions[track_id].append((frame_index, center_x, center_y))

    frame_vehicle_counts.append(dict(current_frame_counts))

    # Emergency vehicle alerts
    if current_frame_counts.get("Ambulance", 0) > 1:
        alert = f"High ambulance activity at {frame_index/fps:.2f}s: {current_frame_counts['Ambulance']} ambulances"
        emergency_alerts.append(alert)
        print(f"Emergency alert: {alert}")

    # Congestion index
    total_in_frame = sum(current_frame_counts.values())
    congestion_index = total_in_frame / 5.0
    congestion_indices.append(congestion_index)
    if total_in_frame > max_vehicles:
        max_vehicles = total_in_frame
        max_frame_index = frame_index
        peak_frame = results[0].plot()

    # Save middle frame
    if frame_index == middle_index:
        selected_frame = results[0].plot()

    frame_index += 1

cap.release()
cv2.destroyAllWindows()
print(f"Video processing complete: {total_frames} frames processed.")

# Calculate average speeds
print("Calculating average speeds...")
for track_id, positions in track_positions.items():
    label = track_id_to_class[track_id]
    if len(positions) < 2:
        continue
    total_speed = 0
    count = 0
    for i in range(1, len(positions)):
        frame_diff = positions[i][0] - positions[i-1][0]
        if frame_diff == 0:
            continue
        dx = positions[i][1] - positions[i-1][1]
        dy = positions[i][2] - positions[i-1][2]
        distance = np.sqrt(dx**2 + dy**2)
        time = frame_diff / fps
        speed = distance / time
        total_speed += speed
        count += 1
    if count > 0:
        avg_speed = total_speed / count
        average_speeds[label] = average_speeds.get(label, 0) + avg_speed
        average_speeds[f"{label}_count"] = average_speeds.get(f"{label}_count", 0) + 1

for label in vehicle_counts.keys():
    count_key = f"{label}_count"
    if count_key in average_speeds:
        average_speeds[label] = average_speeds[label] / average_speeds[count_key]
        del average_speeds[count_key]

# Save annotated frames
try:
    if selected_frame is not None:
        cv2.imwrite("middle_frame.jpg", selected_frame)
        print("Middle frame saved as 'middle_frame.jpg'.")
    else:
        print("Warning: Middle frame not saved.")
    if peak_frame is not None:
        cv2.imwrite("peak_frame.jpg", peak_frame)
        print("Peak frame saved as 'peak_frame.jpg'.")
    else:
        print("Warning: Peak frame not saved.")
except Exception as e:
    print(f"Error saving frames: {e}")

# Convert middle frame to PIL
if selected_frame is not None:
    selected_frame_rgb = cv2.cvtColor(selected_frame, cv2.COLOR_BGR2RGB)
    selected_frame_pil = Image.fromarray(selected_frame_rgb)
else:
    print("Warning: No middle frame selected for PIL conversion.")
    selected_frame_pil = None

# Load models for ensemble pipeline
print("Loading language models...")
try:
    flan_t5 = pipeline("text2text-generation", model="google/flan-t5-large")
    print("Flan-T5-Large loaded.")
except Exception as e:
    print(f"Error loading Flan-T5-Large: {e}")
    flan_t5 = None

try:
    bart = pipeline("summarization", model="facebook/bart-large")
    print("BART-Large loaded for creative summaries.")
except Exception as e:
    print(f"Error loading BART-Large: {e}")
    bart = None

# Calculate averages and congestion
average_counts = {}
for vtype in vehicle_counts.keys():
    total = sum(frame_counts.get(vtype, 0) for frame_counts in frame_vehicle_counts)
    average_counts[vtype] = total / len(frame_vehicle_counts) if frame_vehicle_counts else 0
max_time_sec = max_frame_index / fps if fps > 0 else 0
average_congestion = np.mean(congestion_indices) if congestion_indices else 0

# Generate traffic density heatmap
print("Generating traffic density heatmap...")
times = [i / fps for i in range(len(frame_vehicle_counts))]
total_vehicles_per_frame = [sum(frame_counts.values()) for frame_counts in frame_vehicle_counts]
plt.figure(figsize=(10, 4))
sns.heatmap([total_vehicles_per_frame], cmap="YlOrRd", xticklabels=50, cbar_kws={'label': 'Vehicle Count'})
plt.xlabel("Time (seconds)")
plt.ylabel("Density")
plt.title("Traffic Density Heatmap")
plt.xticks(ticks=np.linspace(0, len(times)-1, 5), labels=[f"{t:.1f}" for t in np.linspace(0, max(times), 5)])
try:
    plt.savefig("heatmap.png")
    plt.close()
    print("Heatmap saved as 'heatmap.png'.")
except Exception as e:
    print(f"Error saving heatmap: {e}")

# Export to CSV
print("Exporting data to CSV...")
csv_data = {
    "Frame": list(range(len(frame_vehicle_counts))),
    "Time (s)": times,
    "Total Vehicles": total_vehicles_per_frame,
    "Congestion Index": congestion_indices
}
for vtype in vehicle_counts.keys():
    csv_data[vtype] = [frame_counts.get(vtype, 0) for frame_counts in frame_vehicle_counts]
df = pd.DataFrame(csv_data)
try:
    df.to_csv("traffic_data.csv", index=False)
    print("Traffic data exported to 'traffic_data.csv'.")
except Exception as e:
    print(f"Error exporting CSV: {e}")

# Enhanced context with temporal trends
context = (
    f"- Video duration: {(total_frames / fps):.2f} seconds\n"
    f"- Frames analyzed: {total_frames}\n"
    f"- FPS: {fps:.2f}\n"
    f"- Unique vehicles detected: {', '.join([f'{v}: {c}' for v, c in vehicle_counts.items()])} (total: {sum(vehicle_counts.values())})\n"
    f"- Average vehicles per frame: {', '.join([f'{v}: {c:.2f}' for v, c in average_counts.items()])}\n"
    f"- Average speeds (pixels/s): {', '.join([f'{v}: {s:.2f}' for v, s in average_speeds.items()])}\n"
    f"- Peak traffic: {max_vehicles} vehicles at {max_time_sec:.2f} seconds\n"
    f"- Middle frame (at {(middle_index / fps):.2f} seconds): {', '.join([f'{v}: {c}' for v, c in frame_vehicle_counts[middle_index].items()]) if middle_index < len(frame_vehicle_counts) else 'no vehicles'}\n"
    f"- Traffic condition: {'Heavy' if sum(vehicle_counts.values()) > 30 else 'Moderate' if sum(vehicle_counts.values()) > 15 else 'Light'}\n"
    f"- Average congestion index: {average_congestion:.2f} (0=low, 1=moderate, >2=high)\n"
    f"- Temporal trends:\n"
    f"  - First 25% of video: {sum(sum(fc.values()) for fc in frame_vehicle_counts[:len(frame_vehicle_counts)//4])} vehicles\n"
    f"  - Middle 50% of video: {sum(sum(fc.values()) for fc in frame_vehicle_counts[len(frame_vehicle_counts)//4:3*len(frame_vehicle_counts)//4])} vehicles\n"
    f"  - Last 25% of video: {sum(sum(fc.values()) for fc in frame_vehicle_counts[3*len(frame_vehicle_counts)//4:])} vehicles\n"
    f"{'- Emergency alerts: ' + '; '.join(emergency_alerts) if emergency_alerts else ''}"
)

# Generate text report
print("Generating text report...")
report_prompt = (
    f"Vehicle Detection Report:\n"
    f"Unique vehicle counts across all frames:\n"
    + "\n".join([f"{vtype}: {count}" for vtype, count in vehicle_counts.items()]) + "\n"
    f"Additional Insights:\n"
    f"Average vehicles per frame:\n"
    + "\n".join([f"{vtype}: {avg:.2f}" for vtype, avg in average_counts.items()]) + "\n"
    f"Average speeds (pixels/s):\n"
    + "\n".join([f"{vtype}: {speed:.2f}" for vtype, speed in average_speeds.items()]) + "\n"
    f"Peak traffic at {max_time_sec:.2f} seconds with {max_vehicles} vehicles.\n"
    f"Average congestion index: {average_congestion:.2f} (0=low, 1=moderate, >2=high).\n"
    f"Temporal trends:\n"
    f"  - First 25% of video: {sum(sum(fc.values()) for fc in frame_vehicle_counts[:len(frame_vehicle_counts)//4])} vehicles\n"
    f"  - Middle 50% of video: {sum(sum(fc.values()) for fc in frame_vehicle_counts[len(frame_vehicle_counts)//4:3*len(frame_vehicle_counts)//4])} vehicles\n"
    f"  - Last 25% of video: {sum(sum(fc.values()) for fc in frame_vehicle_counts[3*len(frame_vehicle_counts)//4:])} vehicles\n"
)
if emergency_alerts:
    report_prompt += "Emergency Alerts:\n" + "\n".join(emergency_alerts) + "\n"
if frame_vehicle_counts and middle_index < len(frame_vehicle_counts):
    selected_counts = frame_vehicle_counts[middle_index]
    report_prompt += "Selected middle frame shows:\n"
    for vtype, count in selected_counts.items():
        report_prompt += f"{vtype}: {count}\n"
report_prompt += (
    "Generate a detailed summary based on this data. "
    "Start with 'The scene appears to be...' and describe the traffic conditions, vehicle types, trends, and possible implications. "
    "Conclude with a paragraph on the report's insights and their potential use for traffic planning or urban design."
)

try:
    if flan_t5:
        result = flan_t5(report_prompt, max_new_tokens=300)
        report_text = result[0]['generated_text']
        print("Text report generated with Flan-T5.")
        if bart:
            summary_prompt = f"Summarize the following report creatively, focusing on the scene and implications:\n{report_text}"
            bart_result = bart(summary_prompt, max_length=200)
            report_text = bart_result[0]['summary_text']
            print("Report enhanced with BART summarization.")
    else:
        report_text = (
            f"Vehicle Detection Report:\n"
            f"Unique vehicle counts across all frames:\n"
            + "\n".join([f"{v}: {c}" for v, c in vehicle_counts.items()]) + "\n"
            f"Additional Insights:\n"
            f"Average vehicles per frame:\n"
            + "\n".join([f"{v}: {c:.2f}" for v, c in average_counts.items()]) + "\n"
            f"Peak traffic at {max_time_sec:.2f} seconds with {max_vehicles} vehicles.\n"
            f"Selected middle frame shows:\n"
            + "\n".join([f"{v}: {c}" for v, c in frame_vehicle_counts[middle_index].items()]) + "\n"
            f"The scene appears to be a typical urban intersection with {sum(frame_vehicle_counts[middle_index].values())} vehicles. "
            f"Traffic is {'heavy' if sum(vehicle_counts.values()) > 30 else 'moderate' if sum(vehicle_counts.values()) > 15 else 'light'}. "
            f"This report provides basic counts for analysis."
        )
        print("Warning: Text report generation failed, using fallback.")
except Exception as e:
    print(f"Report error: {e}")
    report_text = (
        f"Vehicle Detection Report:\n"
        f"Unique vehicle counts across all frames:\n"
        + "\n".join([f"{v}: {c}" for v, c in vehicle_counts.items()]) + "\n"
        f"Additional Insights:\n"
        f"Average vehicles per frame:\n"
        + "\n".join([f"{v}: {c:.2f}" for v, c in average_counts.items()]) + "\n"
        f"Peak traffic at {max_time_sec:.2f} seconds with {max_vehicles} vehicles.\n"
        f"Selected middle frame shows:\n"
        + "\n".join([f"{v}: {c}" for v, c in frame_vehicle_counts[middle_index].items()]) + "\n"
        f"The scene appears to be a typical urban intersection with {sum(frame_vehicle_counts[middle_index].values())} vehicles. "
        f"Traffic is {'heavy' if sum(vehicle_counts.values()) > 30 else 'moderate' if sum(vehicle_counts.values()) > 15 else 'light'}. "
        f"This report provides basic counts for analysis."
    )
    print("Warning: Text report generation failed, using fallback.")

print("\n--- Generated Report ---\n")
print(report_text)

# Generate PDF report
print("Generating PDF report...")
try:
    pdf = SimpleDocTemplate("report.pdf", pagesize=letter)
    styles = getSampleStyleSheet()
    normal_style = ParagraphStyle(name='NormalWrap', parent=styles['Normal'], wordWrap='CJK')
    heading_style = styles['Heading1']
    subheading_style = styles['Heading2']
    elements = []

    # Title
    elements.append(Paragraph("Vehicle Detection Report", heading_style))
    elements.append(Spacer(1, 0.2 * inch))

    # Summary Report Section
    elements.append(Paragraph("Summary Report", subheading_style))
    elements.append(Spacer(1, 0.1 * inch))
    summary_lines = report_text.split('\n')
    for line in summary_lines:
        elements.append(Paragraph(line, normal_style))
    elements.append(Spacer(1, 0.2 * inch))

    # Vehicle Statistics Table
    elements.append(Paragraph("Vehicle Statistics", subheading_style))
    elements.append(Spacer(1, 0.1 * inch))
    table_data = [['Vehicle Type', 'Unique Count', 'Avg/Frame', 'Middle Frame', 'Avg Speed (px/s)']]
    for vtype in vehicle_counts.keys():
        unique_count = vehicle_counts.get(vtype, 0)
        avg_count = average_counts.get(vtype, 0)
        middle_count = frame_vehicle_counts[middle_index].get(vtype, 0) if middle_index < len(frame_vehicle_counts) else 0
        speed = average_speeds.get(vtype, 0)
        table_data.append([vtype, str(unique_count), f"{avg_count:.2f}", str(middle_count), f"{speed:.2f}"])
    table = Table(table_data)
    table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, 0), 10),
        ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
        ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
        ('GRID', (0, 0), (-1, -1), 1, colors.black)
    ]))
    elements.append(table)
    elements.append(Spacer(1, 0.2 * inch))

    # Key Insights
    elements.append(Paragraph("Key Insights", subheading_style))
    elements.append(Spacer(1, 0.1 * inch))
    elements.append(Paragraph(f"Peak traffic occurred at {max_time_sec:.2f} seconds with {max_vehicles} vehicles.", normal_style))
    elements.append(Paragraph(f"Average congestion index: {average_congestion:.2f} (0=low, 1=moderate, >2=high).", normal_style))
    if emergency_alerts:
        elements.append(Paragraph("Emergency Alerts:", normal_style))
        for alert in emergency_alerts:
            elements.append(Paragraph(f"- {alert}", normal_style))
    elements.append(Spacer(1, 0.2 * inch))

    # Visualizations
    elements.append(Paragraph("Visualizations", subheading_style))
    elements.append(Spacer(1, 0.1 * inch))
    image_paths = [
        ("heatmap.png", "Traffic Density Heatmap"),
        ("middle_frame.jpg", "Middle Frame"),
        ("peak_frame.jpg", "Peak Traffic Frame")
    ]
    for path, title in image_paths:
        if os.path.exists(path):
            try:
                img = ReportLabImage(path, width=5*inch, height=3*inch)
                elements.append(Paragraph(title, normal_style))
                elements.append(img)
                elements.append(Spacer(1, 0.1 * inch))
            except Exception as e:
                print(f"Error adding image {path} to PDF: {e}")
                elements.append(Paragraph(f"{title} not available.", normal_style))
                elements.append(Spacer(1, 0.1 * inch))
        else:
            print(f"Warning: Image {path} not found.")
            elements.append(Paragraph(f"{title} not available.", normal_style))
            elements.append(Spacer(1, 0.1 * inch))

    # Note
    elements.append(Paragraph("Note: This PDF contains the complete vehicle detection report.", normal_style))

    # Build PDF
    pdf.build(elements)
    print("PDF report generated as 'report.pdf'.")
except Exception as e:
    print(f"Error generating PDF: {e}")

# Updated answer_question function
def answer_question(question, vehicle_counts, frame_vehicle_counts, fps, total_frames, average_counts, max_time_sec, max_vehicles, context, flan_t5, bart, average_speeds, congestion_indices, emergency_alerts):
    question = question.lower().strip()
    if not question:
        return "Please ask a valid question."

    # Time-range query handling
    time_range_match = re.search(r"from (\d+(?:\.\d+)?)\s*(?:to|s(?:econd)?s?\s*to)\s*(\d+(?:\.\d+)?)\s*s(?:econd)?s?", question)
    if time_range_match:
        start_time = float(time_range_match.group(1))
        end_time = float(time_range_match.group(2))
        if start_time >= end_time or end_time > total_frames / fps:
            return f"Invalid time range: {start_time}s to {end_time}s. Video duration is {total_frames/fps:.2f}s."
        start_frame = int(start_time * fps)
        end_frame = min(int(end_time * fps), total_frames - 1)
        range_counts = defaultdict(int)
        for frame_counts in frame_vehicle_counts[start_frame:end_frame + 1]:
            for vtype, count in frame_counts.items():
                range_counts[vtype] += count
        counts_str = ", ".join([f"{count} {vtype}(s)" for vtype, count in range_counts.items()])
        return f"From {start_time}s to {end_time}s: {counts_str or 'no vehicles'}."

    # Specific time query
    time_match = re.search(r"at (\d+(?:\.\d+)?) seconds?", question)
    if time_match:
        time_sec = float(time_match.group(1))
        frame_index = min(int(time_sec * fps), total_frames - 1)
        if frame_index >= total_frames:
            return f"Time {time_sec}s exceeds video duration ({total_frames/fps:.2f}s)."
        frame_counts = frame_vehicle_counts[frame_index]
        for vtype in vehicle_counts.keys():
            if vtype.lower() in question:
                count = frame_counts.get(vtype, 0)
                return f"At {time_sec} seconds, there were {count} {vtype}(s)."
        counts_str = ", ".join([f"{count} {vtype}(s)" for vtype, count in frame_counts.items()])
        return f"At {time_sec} seconds: {counts_str or 'no vehicles'}."

    # Other rule-based answers
    if "beginning" in question or "start" in question:
        frame_counts = frame_vehicle_counts[0]
        counts_str = ", ".join([f"{count} {vtype}(s)" for vtype, count in frame_counts.items()])
        return f"At the start: {counts_str or 'no vehicles'}."

    if "end" in question or "last" in question:
        frame_counts = frame_vehicle_counts[-1]
        counts_str = ", ".join([f"{count} {vtype}(s)" for vtype, count in frame_counts.items()])
        return f"At the end: {counts_str or 'no vehicles'}."

    if "middle frame" in question or "middle of the video" in question:
        frame_counts = frame_vehicle_counts[middle_index]
        counts_str = ", ".join([f"{count} {vtype}(s)" for vtype, count in frame_counts.items()])
        return f"In the middle frame at {middle_index/fps:.2f} seconds: {counts_str or 'no vehicles'}."

    if "peak" in question and "time" in question:
        return f"Peak traffic was at {max_time_sec:.2f} seconds with {max_vehicles} vehicles."

    if "types of vehicles" in question or "vehicle types" in question:
        types = list(vehicle_counts.keys())
        types_str = ", ".join(types[:-1]) + " and " + types[-1] if len(types) > 1 else types[0]
        return f"Vehicle types detected: {types_str}."

    if "average speed" in question:
        for vtype in vehicle_counts.keys():
            if vtype.lower() in question:
                speed = average_speeds.get(vtype, 0)
                return f"The average speed of {vtype}s was {speed:.2f} pixels per second."
        speeds_str = ", ".join([f"{vtype}: {speed:.2f} pixels/s" for vtype, speed in average_speeds.items()])
        return f"Average speeds: {speeds_str or 'none'}."

    if "average" in question:
        for vtype in vehicle_counts.keys():
            if vtype.lower() in question:
                avg = average_counts.get(vtype, 0)
                return f"Average {vtype}s per frame: {avg:.2f}."
        averages_str = ", ".join([f"{vtype}: {avg:.2f}" for vtype, avg in average_counts.items()])
        return f"Average vehicles per frame: {averages_str or 'none'}."

    if "how many" in question and "total" in question:
        total = sum(vehicle_counts.values())
        return f"Total unique vehicles detected: {total}."

    if "congestion" in question or "congested" in question:
        level = "high" if average_congestion > 2 else "moderate" if average_congestion > 1 else "low"
        return f"The average congestion index was {average_congestion:.2f}, indicating {level} congestion."

    if "emergency" in question or "alerts" in question:
        return f"Emergency alerts: {'; '.join(emergency_alerts) if emergency_alerts else 'None detected'}."

    # Ensemble model pipeline for creative/exploratory questions
    is_exploratory = any(kw in question for kw in ["describe", "tell me", "what can you say", "summarize", "activity", "explain"])
    is_hypothetical = any(kw in question for kw in ["would", "might", "could", "cause"])

    prompt = f"Context:\n{context}\nQuestion: {question}\n"

    if is_exploratory:
        prompt += (
            "Instruction: Provide a detailed and creative description of the traffic scene based on the provided data. "
            "Mention vehicle types, counts, speeds, temporal trends, and any notable events (e.g., emergency alerts). "
            "Start with 'The video depicts...' and weave a narrative that paints a vivid picture of the scene.\nAnswer:"
        )
        try:
            if flan_t5:
                flan_response = flan_t5(prompt, max_new_tokens=200)[0]['generated_text'].strip()
                if bart:
                    bart_prompt = f"Summarize and enhance this description creatively:\n{flan_response}"
                    response = bart(bart_prompt, max_length=150)[0]['summary_text'].strip()
                else:
                    response = flan_response
            else:
                response = "Chatbot unavailable. Summary: " + ", ".join([f"{v}: {c}" for v, c in vehicle_counts.items()]) + "."
        except Exception as e:
            response = f"Error: {e}. Summary: {', '.join([f'{v}: {c}' for v, c in vehicle_counts.items()])}."
    elif is_hypothetical:
        prompt += (
            "Instruction: Speculate on possible causes or implications of the traffic data. "
            "Discuss potential reasons for peak traffic, emergency alerts, or how this data could inform traffic management or urban planning. "
            "Start with 'Based on the data...' and provide insightful analysis.\nAnswer:"
        )
        try:
            if flan_t5:
                flan_response = flan_t5(prompt, max_new_tokens=200)[0]['generated_text'].strip()
                if bart:
                    bart_prompt = f"Summarize and enhance this analysis creatively:\n{flan_response}"
                    response = bart(bart_prompt, max_length=150)[0]['summary_text'].strip()
                else:
                    response = flan_response
            else:
                response = "Chatbot unavailable. Summary: " + ", ".join([f"{v}: {c}" for v, c in vehicle_counts.items()]) + "."
        except Exception as e:
            response = f"Error: {e}. Summary: {', '.join([f'{v}: {c}' for v, c in vehicle_counts.items()])}."
    else:
        prompt += "Instruction: Answer the question concisely using the provided data. If the question cannot be answered, state that.\nAnswer:"
        try:
            if flan_t5:
                response = flan_t5(prompt, max_new_tokens=100)[0]['generated_text'].strip()
            else:
                response = "Chatbot unavailable. Summary: " + ", ".join([f"{v}: {c}" for v, c in vehicle_counts.items()]) + "."
        except Exception as e:
            response = f"Error: {e}. Summary: {', '.join([f'{v}: {c}' for v, c in vehicle_counts.items()])}."

    return response

# Dynamic chat suggestions
suggestions = [
    "How many cars were detected in total?",
    "How many ambulances were in the video?",
    "How many vehicles from 2 to 5 seconds?",
    "What was the peak traffic time?",
    "What types of vehicles were detected?",
    "Describe the traffic in the video.",
    "Explain the traffic flow and trends.",
    "What was happening in the middle of the video?",
    "What might have caused the peak traffic?",
    "How could this data help traffic management?",
    "What was the average speed of cars?",
    "How congested was the traffic?",
    "Summarize the vehicle activity in the video."
]
if emergency_alerts:
    suggestions.append("When were ambulances most active?")

# Chat loop
print("\nStarting chat interaction...")
print("Video processing complete. Ask any question about the video (type 'exit' to quit).")
print("Suggested questions:")
for i, suggestion in enumerate(suggestions, 1):
    print(f"{i}. {suggestion}")
while True:
    try:
        user_query = input("Ask a question: ")
        if user_query.lower().strip() == "exit":
            print("Goodbye.")
            break
        response = answer_question(user_query, vehicle_counts, frame_vehicle_counts, fps, total_frames, average_counts, max_time_sec, max_vehicles, context, flan_t5, bart, average_speeds, congestion_indices, emergency_alerts)
        print("\nAnswer:")
        print(response)
        print()
    except KeyboardInterrupt:
        print("\nChat interrupted. Type 'exit' to quit or continue asking questions.")
    except Exception as e:
        print(f"Error in chat loop: {e}")
        print("Please try again or type 'exit' to quit.")

# Cleanup
if torch.cuda.is_available():
    torch.cuda.empty_cache()
gc.collect()

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


E0000 00:00:1745134279.308554      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745134279.386150      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


YOLO model loaded successfully.
Processing video...
[31m[1mrequirements:[0m Ultralytics requirement ['lap>=0.5.12'] not found, attempting AutoUpdate...
Collecting lap>=0.5.12
  Downloading lap-0.5.12-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.2 kB)
Downloading lap-0.5.12-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.7/1.7 MB 30.9 MB/s eta 0:00:00
Installing collected packages: lap
Successfully installed lap-0.5.12

[31m[1mrequirements:[0m AutoUpdate success ✅ 3.1s, installed 1 package: ['lap>=0.5.12']
[31m[1mrequirements:[0m ⚠️ [1mRestart runtime or rerun command for updates to take effect[0m


0: 480x640 1 Bus, 8 Cars, 38.6ms
Speed: 10.5ms preprocess, 38.6ms inference, 323.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 Bus, 8 Cars, 6.8ms
Speed: 2.4ms preprocess, 6.8ms inference, 1.3ms

config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Device set to use cuda:0


Flan-T5-Large loaded.


config.json:   0%|          | 0.00/1.63k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.02G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.02G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cuda:0


BART-Large loaded for creative summaries.
Generating traffic density heatmap...
Heatmap saved as 'heatmap.png'.
Exporting data to CSV...
Traffic data exported to 'traffic_data.csv'.
Generating text report...


Your max_length is set to 200, but your input_length is only 34. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=17)


Text report generated with Flan-T5.
Report enhanced with BART summarization.

--- Generated Report ---

Summarize the following report creatively, focusing on the scene and implications:The report describes the traffic conditions, vehicle types, trends, and possible implications.
Generating PDF report...
PDF report generated as 'report.pdf'.

Starting chat interaction...
Video processing complete. Ask any question about the video (type 'exit' to quit).
Suggested questions:
1. How many cars were detected in total?
2. How many ambulances were in the video?
3. How many vehicles from 2 to 5 seconds?
4. What was the peak traffic time?
5. What types of vehicles were detected?
6. Describe the traffic in the video.
7. Explain the traffic flow and trends.
8. What was happening in the middle of the video?
9. What might have caused the peak traffic?
10. How could this data help traffic management?
11. What was the average speed of cars?
12. How congested was the traffic?
13. Summarize the vehicle

Ask a question:  give me video description



Answer:
Video duration: 10.03 seconds.



Ask a question:  what are the vehicles in the video 



Answer:
Car: 18, Bus: 1 (total: 19)



Ask a question:  how many vehicles on 4th second



Answer:
4



Ask a question:  how many vechicles between 4 to 7 seconds



Answer:
5 vehicles at 1.33 seconds



Ask a question:  describe the vehicles timings



Answer:
Summarize and enhance this description creatively:The video depicts a city street with a mix of cars, buses, and cars. The video begins with a black screen with a white background. Then, the video then transitions to a white screen with an image of a car and a bus on a street with cars on the street. Then the video transitions to an image with a car on the road. The music then fades out and fades back in. The song then fades in and out. The game then fades to black. The vids then fade out.The video then transition to black screen. The photo then transitions. The Video then transitions:A. Thevideo then transitions,A-B-C-D-E-



Ask a question:  Summarize the vehicle activity in the video


Your max_length is set to 150, but your input_length is only 29. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=14)



Answer:
Summarize and enhance this description creatively:The video depicts a city street with a mix of cars, buses, and cars.



Ask a question:  How many ambulances were in the video?



Answer:
No ambulances were in the video.



Ask a question:  Describe the traffic in the video.


Your max_length is set to 150, but your input_length is only 42. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=21)



Answer:
Summarize and enhance this description creatively:The video depicts a city street with a mix of cars, buses, and cars. The video was shot at a speed of 30.00 fps.

