In [2]:
import torch
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import cv2


In [3]:
class ConvBlock(torch.nn.Module):
    def __init__(self, in_c, out_c, kernel_size=3, stride=1):
        super().__init__()
        padding = kernel_size // 2
        self.conv = torch.nn.Conv2d(in_c, out_c, kernel_size, stride, padding, bias=False)
        self.bn = torch.nn.BatchNorm2d(out_c)
        self.act = torch.nn.LeakyReLU(0.1)
    def forward(self, x): return self.act(self.bn(self.conv(x)))

class ResidualBlock(torch.nn.Module):
    def __init__(self, in_c):
        super().__init__()
        self.layer = torch.nn.Sequential(
            ConvBlock(in_c, in_c // 2, 1),
            ConvBlock(in_c // 2, in_c, 3)
        )
    def forward(self, x): return x + self.layer(x)

class Darknet53(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = torch.nn.Sequential(
            ConvBlock(3, 32),
            ConvBlock(32, 64, 3, 2),
            ResidualBlock(64)
        )
        self.layer2 = torch.nn.Sequential(
            ConvBlock(64, 128, 3, 2),
            *[ResidualBlock(128) for _ in range(2)]
        )
        self.layer3 = torch.nn.Sequential(
            ConvBlock(128, 256, 3, 2),
            *[ResidualBlock(256) for _ in range(8)]
        )
        self.layer4 = torch.nn.Sequential(
            ConvBlock(256, 512, 3, 2),
            *[ResidualBlock(512) for _ in range(8)]
        )
        self.layer5 = torch.nn.Sequential(
            ConvBlock(512, 1024, 3, 2),
            *[ResidualBlock(1024) for _ in range(4)]
        )
    def forward(self, x):
        for l in [self.layer1, self.layer2, self.layer3, self.layer4, self.layer5]:
            x = l(x)
        return x

class AttentionModule(torch.nn.Module):
    def __init__(self, in_channels):
        super().__init__()
        self.gamma = torch.nn.Parameter(torch.zeros(1))
    def forward(self, x):
        attn = torch.softmax(x.view(x.size(0), -1), dim=1).view_as(x)
        return x + self.gamma * attn * x

class AttentionYOLOv3(torch.nn.Module):
    def __init__(self, num_classes=2):
        super().__init__()
        self.backbone = Darknet53()
        self.attn = AttentionModule(1024)
        self.gap = torch.nn.AdaptiveAvgPool2d((1, 1))
        self.fc = torch.nn.Linear(1024, num_classes)
    def forward(self, x):
        feat = self.backbone(x)
        feat = self.attn(feat)
        out = self.gap(feat).flatten(1)
        logits = self.fc(out)
        return logits

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AttentionYOLOv3(num_classes=2).to(device)

In [5]:
model.load_state_dict(torch.load("attention_yolov3_drowsy_8epochs.pth", map_location=device))
model.eval()

AttentionYOLOv3(
  (backbone): Darknet53(
    (layer1): Sequential(
      (0): ConvBlock(
        (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): LeakyReLU(negative_slope=0.1)
      )
      (1): ConvBlock(
        (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): LeakyReLU(negative_slope=0.1)
      )
      (2): ResidualBlock(
        (layer): Sequential(
          (0): ConvBlock(
            (conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (act): LeakyReLU(negative_slope=0.1)
          )
          (1): ConvBlock(
            (conv): Conv2d(32, 64, kernel_size=(3, 3), stri

In [6]:
transform = transforms.Compose([
    transforms.Resize((416, 416)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [10]:
cap = cv2.VideoCapture(0)  # 0 is your default webcam

if not cap.isOpened():
    print("‚ùå Error: Could not open webcam.")
    exit()

classes = ["drowsy", "not_drowsy"]

In [11]:
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert OpenCV frame (BGR) to PIL (RGB)
    img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    img_t = transform(img).unsqueeze(0).to(device)

    # Get prediction
    with torch.no_grad():
        outputs = model(img_t)
        _, predicted = torch.max(outputs, 1)
        label = classes[predicted.item()]

    # Draw prediction on frame
    cv2.putText(frame, f"{label}", (20, 40),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Show the live frame
    cv2.imshow("Live Drowsiness Detection", frame)

    # Press 'q' to stop
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# -------------------------------
# Cleanup
# -------------------------------
cap.release()
cv2.destroyAllWindows()

In [9]:
cap.release()