### Import libraries

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from PIL import Image as Img
from torch.utils.data import Dataset
import os
import cv2
import numpy as np

from IPython.display import display, Image
import ipywidgets as widgets
from IPython.display import clear_output

In [7]:
class ResNet(nn.Module): #Resnet18 model
    def __init__(self, ResidualBlock, num_classes=3):
        super(ResNet, self).__init__()
        self.inchannel = 64
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),  # Change kernel_size, stride, and padding
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)  # Add MaxPool2d
        )
        self.layer1 = self.make_layer(ResidualBlock, 64, 2, stride=1)
        self.layer2 = self.make_layer(ResidualBlock, 128, 2, stride=2)
        self.layer3 = self.make_layer(ResidualBlock, 256, 2, stride=2)        
        self.layer4 = self.make_layer(ResidualBlock, 512, 2, stride=2)        
        self.fc = nn.Linear(512, num_classes)
        
    def make_layer(self, block, channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.inchannel, channels, stride))
            self.inchannel = channels
        return nn.Sequential(*layers)
    
    def forward(self, x):
        out = self.conv1(x)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 7)  # Change average pooling to have a kernel_size of 7 to match input size
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

In [8]:
class ResidualBlock(nn.Module):
    def __init__(self, inchannel, outchannel, stride=1):
        super(ResidualBlock, self).__init__()
        self.left = nn.Sequential(
            nn.Conv2d(inchannel, outchannel, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(outchannel),
            nn.ReLU(inplace=True),
            nn.Conv2d(outchannel, outchannel, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(outchannel)
        )
        self.shortcut = nn.Sequential()
        if stride != 1 or inchannel != outchannel:
            self.shortcut = nn.Sequential(
                nn.Conv2d(inchannel, outchannel, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(outchannel)
            )
            
    def forward(self, x):
        out = self.left(x)
        out = out + self.shortcut(x)
        out = F.relu(out)
        
        return out

In [9]:
def ResNet18():
    return ResNet(ResidualBlock)

In [10]:
def main():
    # Load the model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    net = ResNet18().to(device)
    checkpoint = torch.load('/home/tr/server/datasets/test/train/checkpoints/resnet_model_epoch_8.pth', map_location=device)
    net.load_state_dict(checkpoint['model_state_dict'])
    net.eval()
    
    # Define the transform to match model's input size
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    # Open a connection to the webcam (default is device 0)
    cap = cv2.VideoCapture(0)
    image_widget = widgets.Image()
    display(image_widget)
    
    if not cap.isOpened():
        print("Error: Could not open webcam.")
        return
    try:
        # Continuously capture frames from the webcam
        while True:
            ret, frame = cap.read()

            if not ret:
                print("Error: Could not read frame.")
                break

            # Convert frame to PIL image
            image = Img.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

            # Apply the transform
            image = transform(image).unsqueeze(0).to(device)

            # Perform inference
            with torch.no_grad():
                outputs = net(image)
                _, predicted = torch.max(outputs.data, 1)
                prediction = predicted.item()

            # Display the prediction on the frame
            label = f"Predicted Class: {prediction}"
            cv2.putText(frame, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            
            ret, jpeg = cv2.imencode('.jpg', frame)
            if not ret:
                break
            image_widget.value = jpeg.tobytes()

    except KeyboardInterrupt:
        cap.release()
        print("Video stream stopped.")
        clear_output(wait=True)


if __name__ == "__main__":
    main()

[ WARN:0@8.992] global cap_v4l.cpp:997 open VIDEOIO(V4L2:/dev/video2): can't open camera by index
[ERROR:0@9.509] global obsensor_uvc_stream_channel.cpp:159 getStreamChannelGroup Camera index out of range


Image(value=b'')

Error: Could not open webcam.
