<a href="https://colab.research.google.com/github/tahermadraswala/Autonomous-Driving-Vision-System/blob/main/Autonomous_Driving_Vision_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install ultralytics



In [None]:
"""
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install transformers
!pip install gradio
!pip install opencv-python
!pip install albumentations
!pip install segmentation-models-pytorch
!pip install timm
!pip install scikit-learn
!pip install pillow
!pip install numpy
!pip install matplotlib

# For Kaggle dataset download
!pip install kaggle
"""

'\n!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118\n!pip install transformers\n!pip install gradio\n!pip install opencv-python\n!pip install albumentations\n!pip install segmentation-models-pytorch\n!pip install timm\n!pip install scikit-learn\n!pip install pillow\n!pip install numpy\n!pip install matplotlib\n\n# For Kaggle dataset download\n!pip install kaggle\n'

In [None]:
import torch
import torch.nn as nn
import cv2
import numpy as np
from PIL import Image
import gradio as gr
from torchvision import transforms, models
import warnings
warnings.filterwarnings('ignore')
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"ðŸš€ ACCELERATION STATUS: Running on {DEVICE}")

ðŸš€ ACCELERATION STATUS: Running on cpu


In [None]:
class SemanticSegmentationModel:
    def __init__(self):

        self.model = models.segmentation.deeplabv3_resnet50(pretrained=True)
        self.model = self.model.to(DEVICE)
        self.model.eval()

        self.colors = np.array([
            [128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156],
            [190, 153, 153], [153, 153, 153], [250, 170, 30], [220, 220, 0],
            [107, 142, 35], [152, 251, 152], [70, 130, 180], [220, 20, 60],
            [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100],
            [0, 80, 100], [0, 0, 230], [119, 11, 32]
        ])

        self.preprocess = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

    def segment(self, image: np.ndarray) -> tuple:

        h, w = image.shape[:2]
        if w > 1024:
            scale = 1024 / w
            new_w, new_h = 1024, int(h * scale)
            proc_img = cv2.resize(image, (new_w, new_h))
        else:
            proc_img = image

        img_rgb = cv2.cvtColor(proc_img, cv2.COLOR_BGR2RGB)
        img_pil = Image.fromarray(img_rgb)


        input_tensor = self.preprocess(img_pil).unsqueeze(0).to(DEVICE)

        with torch.no_grad():
            output = self.model(input_tensor)['out'][0]


        seg_mask = output.argmax(0).byte().cpu().numpy()


        mask_h, mask_w = seg_mask.shape
        colored_mask = np.zeros((mask_h, mask_w, 3), dtype=np.uint8)
        for class_id in range(min(len(self.colors), seg_mask.max() + 1)):
            colored_mask[seg_mask == class_id] = self.colors[class_id]


        colored_mask = cv2.resize(colored_mask, (w, h), interpolation=cv2.INTER_NEAREST)

        blended = cv2.addWeighted(image, 0.6, colored_mask, 0.4, 0)
        return blended, {"Info": "Segmentation Complete"}

In [None]:

class ObjectDetectionModel:
    """
    2D Object Detection for dynamic objects using YOLOv8
    Optimized for GPU and formatted for Gradio compatibility
    """
    def __init__(self):
        try:
            from ultralytics import YOLO

            self.model = YOLO('yolov8n.pt')
        except ImportError:
            print("Error: Ultralytics not installed. Run '!pip install ultralytics'")
            self.model = None


        self.vehicle_classes = ['car', 'truck', 'bus', 'train']
        self.vru_classes = ['person', 'bicycle', 'motorcycle']
        self.traffic_classes = ['traffic light', 'stop sign']

    def detect(self, image: np.ndarray) -> tuple:
        """Perform object detection"""
        if self.model is None:
            return image, []


        results = self.model(image, conf=0.25, iou=0.45, verbose=False)

        detections = []
        annotated_image = image.copy()

        for result in results:
            boxes = result.boxes
            for box in boxes:
                # Get coordinates
                x1, y1, x2, y2 = box.xyxy[0].cpu().numpy().astype(int)
                confidence = float(box.conf[0])
                class_id = int(box.cls[0])
                class_name = self.model.names[class_id]

                # --- PRIORITY & COLOR LOGIC (Restored) ---
                if class_name in self.vru_classes:
                    color = (0, 0, 255)      # Red for pedestrians/cyclists
                    priority = "High"
                elif class_name in self.traffic_classes:
                    color = (255, 165, 0)    # Orange for traffic signs
                    priority = "High"
                elif class_name in self.vehicle_classes:
                    color = (0, 255, 0)      # Green for vehicles
                    priority = "Medium"
                else:
                    color = (255, 255, 0)    # Yellow for others
                    priority = "Low"


                cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2)


                label = f"{class_name} {confidence:.2f}"
                label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
                cv2.rectangle(annotated_image, (x1, y1 - label_size[1] - 10),
                            (x1 + label_size[0], y1), color, -1)
                cv2.putText(annotated_image, label, (x1, y1 - 5),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)


                detections.append({
                    'class': class_name,
                    'confidence': f"{confidence:.2f}",
                    'bbox': f"({x1},{y1},{x2},{y2})",
                    'priority': priority
                })

        return annotated_image, detections

In [None]:


from typing import Tuple, Dict

class LaneDetectionModel:
    """
    Lane Line Detection for lateral control
    Uses Hough Transform and curve fitting
    """
    def __init__(self):
        self.roi_vertices = None

    def detect_lanes(self, image: np.ndarray) -> Tuple[np.ndarray, Dict]:
        """Detect lane lines in the image"""

        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)


        blur = cv2.GaussianBlur(gray, (5, 5), 0)


        edges = cv2.Canny(blur, 50, 150)


        height, width = image.shape[:2]
        roi_vertices = np.array([[
            (width * 0.1, height),
            (width * 0.45, height * 0.6),
            (width * 0.55, height * 0.6),
            (width * 0.9, height)
        ]], dtype=np.int32)


        mask = np.zeros_like(edges)
        cv2.fillPoly(mask, roi_vertices, 255)
        masked_edges = cv2.bitwise_and(edges, mask)


        lines = cv2.HoughLinesP(masked_edges, rho=2, theta=np.pi/180,
                               threshold=50, minLineLength=40, maxLineGap=100)


        lane_image = image.copy()

        if lines is not None:

            left_lines = []
            right_lines = []

            for line in lines:
                x1, y1, x2, y2 = line[0]
                if x2 - x1 == 0:
                    continue
                slope = (y2 - y1) / (x2 - x1)

                if abs(slope) < 0.5:
                    continue

                if slope < 0:
                    left_lines.append(line[0])
                else:
                    right_lines.append(line[0])


            def draw_lane_line(lines, color):
                if len(lines) > 0:

                    lines_array = np.array(lines)
                    x_coords = np.concatenate([lines_array[:, [0, 2]]])
                    y_coords = np.concatenate([lines_array[:, [1, 3]]])


                    if len(x_coords) > 0:
                        poly = np.polyfit(y_coords.flatten(), x_coords.flatten(), deg=1)
                        y1, y2 = height, int(height * 0.6)
                        x1, x2 = int(np.polyval(poly, y1)), int(np.polyval(poly, y2))
                        cv2.line(lane_image, (x1, y1), (x2, y2), color, 10)

            draw_lane_line(left_lines, (255, 0, 0))
            draw_lane_line(right_lines, (0, 255, 0))


            if len(left_lines) > 0 and len(right_lines) > 0:
                left_x = np.mean([line[0] for line in left_lines])
                right_x = np.mean([line[0] for line in right_lines])
                lane_center = (left_x + right_x) / 2
                image_center = width / 2
                deviation = ((lane_center - image_center) / width) * 100

                info = {
                    'Left Lane': 'Detected',
                    'Right Lane': 'Detected',
                    'Deviation': f"{deviation:.2f}% {'right' if deviation > 0 else 'left'}"
                }
            else:
                info = {
                    'Left Lane': 'Detected' if len(left_lines) > 0 else 'Not Detected',
                    'Right Lane': 'Detected' if len(right_lines) > 0 else 'Not Detected',
                    'Deviation': 'N/A'
                }
        else:
            info = {
                'Left Lane': 'Not Detected',
                'Right Lane': 'Not Detected',
                'Deviation': 'N/A'
            }


        cv2.polylines(lane_image, roi_vertices, True, (0, 255, 255), 2)

        return lane_image, info


In [None]:


from typing import Tuple, List, Dict

class TrafficSignRecognition:
    """
    Traffic Sign and Light Recognition
    Recognizes: Stop, Yield, Speed Limit, Traffic Light States
    """
    def __init__(self):

        self.traffic_light_colors = {
            'red': ([0, 0, 100], [80, 80, 255]),
            'yellow': ([0, 100, 100], [80, 255, 255]),
            'green': ([0, 100, 0], [80, 255, 80])
        }

    def detect_traffic_lights(self, image: np.ndarray) -> Tuple[np.ndarray, List[Dict]]:
        """Detect traffic lights and their states"""
        hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        result_image = image.copy()
        detections = []

        for color_name, (lower, upper) in self.traffic_light_colors.items():

            lower_bound = np.array(lower)
            upper_bound = np.array(upper)
            mask = cv2.inRange(image, lower_bound, upper_bound)


            contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL,
                                          cv2.CHAIN_APPROX_SIMPLE)

            for contour in contours:
                area = cv2.contourArea(contour)
                if area > 100:
                    x, y, w, h = cv2.boundingRect(contour)
                    aspect_ratio = h / w if w > 0 else 0


                    if 0.8 < aspect_ratio < 3.0:
                        cv2.rectangle(result_image, (x, y), (x+w, y+h),
                                    (0, 255, 255), 2)
                        cv2.putText(result_image, f"{color_name.upper()} LIGHT",
                                  (x, y-10), cv2.FONT_HERSHEY_SIMPLEX,
                                  0.5, (0, 255, 255), 2)

                        detections.append({
                            'type': 'Traffic Light',
                            'state': color_name.upper(),
                            'action': self.get_action(color_name),
                            'location': f"({x},{y})"
                        })

        return result_image, detections

    def get_action(self, light_color: str) -> str:
        """Get recommended action based on traffic light"""
        actions = {
            'red': 'STOP',
            'yellow': 'PREPARE TO STOP',
            'green': 'GO'
        }
        return actions.get(light_color, 'UNKNOWN')

In [None]:


class DepthEstimationModel:
    def __init__(self):

        self.model = torch.hub.load('intel-isl/MiDaS', 'MiDaS_small')
        self.model.to(DEVICE)
        self.model.eval()

        midas_transforms = torch.hub.load('intel-isl/MiDaS', 'transforms')
        self.transform = midas_transforms.small_transform

    def estimate_depth(self, image: np.ndarray) -> tuple:
        img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)


        input_batch = self.transform(img_rgb).to(DEVICE)

        with torch.no_grad():
            prediction = self.model(input_batch)
            prediction = torch.nn.functional.interpolate(
                prediction.unsqueeze(1),
                size=img_rgb.shape[:2],
                mode='bicubic',
                align_corners=False
            ).squeeze()


        depth_map = prediction.cpu().numpy()

        depth_norm = cv2.normalize(depth_map, None, 0, 255, cv2.NORM_MINMAX, cv2.CV_8U)
        depth_colored = cv2.applyColorMap(depth_norm, cv2.COLORMAP_MAGMA)

        return depth_colored, {'Status': 'Depth Calculated'}

In [None]:


class AutonomousDrivingSystem:
    """
    Complete Autonomous Driving System
    Integrates all CV components for comprehensive scene understanding
    """
    def __init__(self):
        print("Initializing Autonomous Driving System...")
        self.segmentation = SemanticSegmentationModel()
        print("âœ“ Semantic Segmentation Model Loaded")

        self.object_detection = ObjectDetectionModel()
        print("âœ“ Object Detection Model Loaded")

        self.lane_detection = LaneDetectionModel()
        print("âœ“ Lane Detection Model Loaded")

        self.traffic_recognition = TrafficSignRecognition()
        print("âœ“ Traffic Sign Recognition Loaded")

        self.depth_estimation = DepthEstimationModel()
        print("âœ“ Depth Estimation Model Loaded")

        print("System Ready!")

    def process_frame(self, image: np.ndarray, selected_modules: List[str]) -> Dict:
        """Process frame with selected modules"""
        results = {}

        if "Semantic Segmentation" in selected_modules:
            seg_result, seg_stats = self.segmentation.segment(image)
            results['segmentation'] = (seg_result, seg_stats)

        if "Object Detection" in selected_modules:
            det_result, detections = self.object_detection.detect(image)
            results['detection'] = (det_result, detections)

        if "Lane Detection" in selected_modules:
            lane_result, lane_info = self.lane_detection.detect_lanes(image)
            results['lanes'] = (lane_result, lane_info)

        if "Traffic Sign & Light Recognition" in selected_modules:
            traffic_result, traffic_info = self.traffic_recognition.detect_traffic_lights(image)
            results['traffic'] = (traffic_result, traffic_info)

        if "Depth Estimation" in selected_modules:
            depth_result, depth_info = self.depth_estimation.estimate_depth(image)
            results['depth'] = (depth_result, depth_info)

        return results


In [None]:


def create_gradio_interface():
    """Create Gradio interface for the system"""


    system = AutonomousDrivingSystem()

    def process_image(image, seg_check, det_check, lane_check, traffic_check, depth_check):
        """Process image with selected modules"""
        if image is None:
            return None, None, None, None, None, "Please upload an image"


        if len(image.shape) == 2:
            image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
        elif image.shape[2] == 4:
            image = cv2.cvtColor(image, cv2.COLOR_RGBA2BGR)

        selected = []
        if seg_check: selected.append("Semantic Segmentation")
        if det_check: selected.append("Object Detection")
        if lane_check: selected.append("Lane Detection")
        if traffic_check: selected.append("Traffic Sign & Light Recognition")
        if depth_check: selected.append("Depth Estimation")

        if not selected:
            return None, None, None, None, None, "Please select at least one module"

        results = system.process_frame(image, selected)


        seg_img = results.get('segmentation', (None, {}))[0]
        det_img = results.get('detection', (None, []))[0]
        lane_img = results.get('lanes', (None, {}))[0]
        traffic_img = results.get('traffic', (None, []))[0]
        depth_img = results.get('depth', (None, {}))[0]


        summary = "=== AUTONOMOUS DRIVING SYSTEM ANALYSIS ===\n\n"

        if 'segmentation' in results:
            summary += "SEMANTIC SEGMENTATION:\n"
            for key, val in results['segmentation'][1].items():
                summary += f"  â€¢ {key}: {val}\n"
            summary += "\n"

        if 'detection' in results:
            summary += "OBJECT DETECTION:\n"
            detections = results['detection'][1]
            if detections:
                for det in detections[:5]:
                    summary += f"  â€¢ {det['class']} (Conf: {det['confidence']}, Priority: {det['priority']})\n"
                if len(detections) > 5:
                    summary += f"  ... and {len(detections)-5} more objects\n"
            else:
                summary += "  â€¢ No objects detected\n"
            summary += "\n"

        if 'lanes' in results:
            summary += "LANE DETECTION:\n"
            for key, val in results['lanes'][1].items():
                summary += f"  â€¢ {key}: {val}\n"
            summary += "\n"

        if 'traffic' in results:
            summary += "TRAFFIC LIGHTS:\n"
            traffic_info = results['traffic'][1]
            if traffic_info:
                for info in traffic_info:
                    summary += f"  â€¢ {info['state']} â†’ {info['action']}\n"
            else:
                summary += "  â€¢ No traffic lights detected\n"
            summary += "\n"

        if 'depth' in results:
            summary += "DEPTH ESTIMATION:\n"
            for key, val in results['depth'][1].items():
                summary += f"  â€¢ {key}: {val}\n"

        return seg_img, det_img, lane_img, traffic_img, depth_img, summary


    with gr.Blocks(title="Autonomous Driving System", theme=gr.themes.Soft()) as demo:
        gr.Markdown("""
        # ðŸš— Autonomous Driving System - Computer Vision Project
        ### Advanced Multi-Module Vision System for Self-Driving Vehicles

        This system implements **5 critical computer vision components** for autonomous driving:
        1. **Semantic Segmentation** - Scene understanding (Road, Sky, Vehicle, Pedestrian)
        2. **Object Detection** - Dynamic object tracking (Cars, Trucks, Pedestrians, Cyclists)
        3. **Lane Detection** - Lane boundary identification for lateral control
        4. **Traffic Sign & Light Recognition** - Traffic rule compliance
        5. **Monocular Depth Estimation** - Distance estimation for safe velocity control
        """)

        with gr.Row():
            with gr.Column(scale=1):
                input_image = gr.Image(label="Upload Driving Scene Image", type="numpy")

                gr.Markdown("### Select Modules to Run:")
                seg_check = gr.Checkbox(label="Semantic Segmentation", value=True)
                det_check = gr.Checkbox(label="Object Detection", value=True)
                lane_check = gr.Checkbox(label="Lane Detection", value=True)
                traffic_check = gr.Checkbox(label="Traffic Sign & Light Recognition", value=True)
                depth_check = gr.Checkbox(label="Depth Estimation", value=True)

                process_btn = gr.Button("ðŸš€ Analyze Scene", variant="primary", size="lg")

            with gr.Column(scale=2):
                summary_output = gr.Textbox(label="Analysis Summary", lines=15)

        gr.Markdown("### Module Outputs:")

        with gr.Row():
            seg_output = gr.Image(label="Semantic Segmentation")
            det_output = gr.Image(label="Object Detection")

        with gr.Row():
            lane_output = gr.Image(label="Lane Detection")
            traffic_output = gr.Image(label="Traffic Recognition")

        depth_output = gr.Image(label="Depth Estimation")

        process_btn.click(
            fn=process_image,
            inputs=[input_image, seg_check, det_check, lane_check, traffic_check, depth_check],
            outputs=[seg_output, det_output, lane_output, traffic_output, depth_output, summary_output]
        )

        gr.Markdown("""
        ---




        """)

    return demo


In [None]:


if __name__ == "__main__":

    demo = create_gradio_interface()
    demo.launch(share=True, debug=True)


    print("\n" + "="*70)
    print("AUTONOMOUS DRIVING VISION SYSTEM LAUNCHED SUCCESSFULLY!")
    print("="*70)
    print("\nTo use in Google Colab:")
    print("1. Run all installation commands at the top")
    print("2. Run this entire script")
    print("3. Upload driving scene images (download from Kaggle datasets)")
    print("\nRecommended Kaggle Datasets:")
    print("â€¢ BDD100K: berkeley-deep-drive/bdd100k")
    print("â€¢ Cityscapes: dansbecker/cityscapes-image-pairs")
    print("â€¢ KITTI: kitti-dataset/kitti")
    print("="*70)

Initializing Autonomous Driving System...
âœ“ Semantic Segmentation Model Loaded
âœ“ Object Detection Model Loaded
âœ“ Lane Detection Model Loaded
âœ“ Traffic Sign Recognition Loaded


Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master


Loading weights:  None


Using cache found in /root/.cache/torch/hub/rwightman_gen-efficientnet-pytorch_master
Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master


âœ“ Depth Estimation Model Loaded
System Ready!
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://66fc5b3fdbd0690dd8.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/uvicorn/protocols/http/h11_impl.py", line 403, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
    return await self.app(scope, receive, send)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/fastapi/applications.py", line 1133, in __call__
    await super().__call__(scope, receive, send)
  File "/usr/local/lib/python3.12/dist-packages/starlette/applications.py", line 113, in __call__
    await self.middleware_stack(scope, receive, send)
  File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/errors.py", line 186, in __call__
    raise exc
  File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/errors.py",