##### *Import all required libraries*

In [1]:
import cv2 as cv
import playsound

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as T

import PIL.Image as Image
from threading import Thread

##### *Define* **ResNet34** *model*

In [2]:
class ResNet34(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        
        # Use a pretrained model
        self.network = torchvision.models.resnet34(pretrained = True)
        
        # Replace last layer
        self.network.fc = nn.Linear(self.network.fc.in_features, num_classes)

    def forward(self, x):
        return self.network(x)

##### *Define a function named* **classify** *which takes an image as input, applies transformations, feeds it through a* **pre-trained model** *, and returns the* **predicted class label** *for that image.*

In [3]:
def classify(model, transforms, image, classes):
    img = Image.fromarray(image)
    img = transforms(img).float()

    # Add a batch dimension
    img = img.unsqueeze(0)  
    output = model(img)
    _, pred = torch.max(output.data, 1)
    return pred.item()

##### *Define a function named* **sound** *that allows you to play a sound file by providing it's path (i.e., path of* **.wav** *file) as a parameter.*

In [4]:
def sound(path):
    playsound.playsound(path)

##### *Create an instance of the* **ResNet34** *model with 10 output classes and defining a list of* **class labels** *corresponding to those classes.*

##### **Note:** *These class labels are likely used for interpreting the* **model's predictions** *or* **labeling** *the outputs during the classification tasks.*

In [5]:
model = ResNet34(num_classes = 10)
classes = [
    'Attentive driving', 'Absent minded', 'Absent minded', 'Absent minded', 
    'Absent minded', 'Absent minded', 'Absent minded', 'Absent minded', 
    'Absent minded', 'Absent minded'
]



##### *Check whether* **CUDA-enabled GPU** *(Graphics Processing Unit) is available for use with the* **PyTorch** *library.*

In [6]:
# Device configuration
if torch.cuda.is_available(): 
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

##### *Now, we'll load a pre-trained model's* **state dictionary** *from a specified file, assigns it to the* **model** *object, and then prepares the model for evaluation by switching it to* **evaluation mode.**

In [7]:
model_path = 'learning_models/armor_resnet34.pth'
model.load_state_dict(torch.load(model_path, map_location = device))
model.eval()

ResNet34(
  (network): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_ru

##### **Transform** *and* **augment** *all images of* **training** *dataset through* **resizing** *as well as* **slight adjustment** *in color and brightness*

In [8]:
transforms = T.Compose([
    T.Resize((64, 64)),
    T.ColorJitter(brightness = 0.5, hue = 0.3),
    T.ToTensor(),
])

##### *Let's initialize a* **video capturing object** *and a* **counter** *to begin capturing frames and counting them respectively from the default camera.*

In [9]:
s, counter = 0, 0

cap = cv.VideoCapture(0)
count = 0

##### *Now, we will perform a real-time* **object classification** *using a pre-trained model on* **video frames.**

In [10]:
while True:
    isTrue, frame = cap.read()
    if not isTrue:
        break

    preds = classify(model, transforms, frame, classes)

    if preds == 0:
        counter = 0
    else:
        counter += 1

    cv.putText(frame, classes[preds], (250, 350), cv.FONT_HERSHEY_TRIPLEX, 1, (0, 255, 0), 3)

    if counter > 210:
        cv.putText(frame, 'CAUTION', (100, 100), cv.FONT_HERSHEY_TRIPLEX, 0.5, (255, 0, 0), 3)
        if s == 0:
            s = 1
            sound('beep.wav')

    cv.imshow('Video', frame)
    key = cv.waitKey(1)
    if key == 27:
        break

cap.release()
cv.destroyAllWindows()