# Object Detection with Raspi 4:

<code>sudo apt-get update
sudo apt-get upgrade
pip install torch torchvision torchaudio

</code>

In [3]:

from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import time
import matplotlib.pyplot as plt
import torch
import numpy as np
from torchvision import models, transforms

import cv2
from PIL import Image

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

classes = ['Cat', 'Deer', 'Dog', 'Human', 'Owl', 'Racoon']
preprocess = transforms.Compose([
#         transforms.Resize(256),
#         transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])


model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
model_ft.fc = nn.Linear(num_ftrs, len(classes))

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

model = torch.load('Resnet18.pt')

cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 224)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 224)
cap.set(cv2.CAP_PROP_FPS, 36)

started = time.time()
last_logged = time.time()
frame_count = 0

with torch.no_grad():
    while True:
        # read frame
        model.eval()
        ret, image = cap.read()
        if not ret:
            raise RuntimeError("failed to read frame")

        # convert opencv output from BGR to RGB
        image_data = image[:, :, [2, 1, 0]]
        permuted = image_data
        # preprocess
        input_tensor = preprocess(image_data)

        # create a mini-batch as expected by the model
        input_batch = input_tensor.unsqueeze(0)
        
        
        # run model
        output = model(input_batch)        
        
        frame_count += 1
        now = time.time()
        if now - last_logged > 1:
            print(f"{frame_count / (now-last_logged)} fps")
            last_logged = now
            frame_count = 0
        
        top = list(enumerate(output[0].softmax(dim=0)))
        top.sort(key=lambda x: x[1], reverse=True)
        idx, val = top[0]
        print(f"{val.item()*100:.2f}% {classes[idx]}")
              
        cv2.imshow(classes[idx],image)
        
        
        
        if cv2.waitKey(20) ==ord('q'):
            cap.release()
            cv2.destroyAllWindows()
            break
                


90.87% Human
85.30% Human
88.01% Human
83.14% Human
88.97% Human
88.18% Human
83.58% Human
85.36% Human
90.98% Human
91.08% Human
10.880597085322565 fps
94.85% Human
94.54% Human
96.45% Human
96.75% Human
97.18% Human
95.84% Human
95.90% Human
95.64% Human
95.95% Human
96.30% Human
94.89% Human
94.93% Human
96.28% Human
96.19% Human
95.06% Human
14.258544818241093 fps
95.80% Human
96.31% Human
95.81% Human
96.51% Human
97.92% Human
97.79% Human
97.41% Human
97.41% Human
96.79% Human
96.88% Human
97.35% Human
97.27% Human
95.82% Human
95.53% Human
96.24% Human
14.16367437301411 fps
95.11% Human
96.10% Human
96.01% Human
94.47% Human
94.56% Human
94.93% Human
94.56% Human
90.97% Human
92.15% Human
91.50% Human
96.55% Human
96.79% Human
92.69% Human
95.06% Human
13.806155091914453 fps
95.70% Human
96.54% Human
95.78% Human
94.59% Human
93.69% Human
93.93% Human
93.48% Human
93.69% Human
95.79% Human
94.90% Human
94.07% Human
94.81% Human
94.38% Human
95.35% Human
13.745342884182648 fps
94