## Importing Dependencies

In [None]:
import cv2
import matplotlib.pyplot as plt
import os
from PIL import Image
import numpy as np

from transformers import pipeline
from transformers import AutoImageProcessor, AutoModelForObjectDetection

import torch


# Setting up the Hugging Face Model

In [None]:
pipe = pipeline("object-detection", model="facebook/detr-resnet-50")

In [None]:

processor = AutoImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = AutoModelForObjectDetection.from_pretrained("facebook/detr-resnet-50")

# Setting up Video Capture device

### For errors in this step:
- Change the device index i.e 0 -> 1/2/3... until the camera is accessible.


## Keys:
- press **'q'** to abort the operation.
- press **'v'** to detect the object present in the screen.


#### **Note**: The output may not always give the accurate result.


In [None]:
cap=cv2.VideoCapture(0)
while cap.isOpened():
    ret,frame=cap.read()
    
    frame=(frame[200:200+600,100:100+800])
 

    if cv2.waitKey(10) & 0xFF==ord('v'):

        cv2.imwrite(os.path.join('detection','input_image.jpg'),frame)
        
        image=Image.open(r'detection/input_image.jpg')
        
        inputs = processor(images=image, return_tensors="pt")
        outputs = model(**inputs)
                    
            
        target_sizes = torch.tensor([image.size[::-1]])
        results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
                    
        for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
             box = [round(i, 2) for i in box.tolist()]
             print(
             f"Detected {model.config.id2label[label.item()]} with confidence "
             f"{round(score.item(), 3)} at location {box} "
            f"\n"
             )
        
    cv2.imshow('Webcam',frame)

    if cv2.waitKey(1) & 0xFF==ord('q'):
        break
cap.release()
cv2.destroyAllWindows()