# Live Inference
This workbork should be able to run live inference of the trained models using the webcam on the jetson. It loads in the stored weights, and then reads in the live camera output. 

In [1]:
# !{os.sys.executable} -m pip install --upgrade ultralytics==8.1.29

In [1]:
# import packages
import os
import cv2
import numpy as np
from IPython.display import Video, display
import time
import torch
from ultralytics import RTDETR
import torchvision.transforms as T
from PIL import Image
import random



from utils.pretrained_deployment import download_images, download_images2, download_images_with_resize, download_images_full_size
from utils.display import *
from utils.make_dataset_nyc_landmarks import make_nyc_dataset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# reload modules every 2 seconds to see changes in notebook
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
classes = {
    0: "EmpireState",
    1: "WTC",
    2: "432ParkAve",
    3: "UNBuilding",
    4: "Flatiron",
    5: "BrooklynBridge",
    6: "ChryslerBuilding",
    7: "MetlifeBuilding",
    8: "StatueOfLiberty",
    9: "30HudsonYards",
}

## Initialize the model

In [3]:


weights_path = '/home/jws2215/e6692-2024spring-finalproject-jwss-jws2215/runs/detect/detr/weights/best.pt' # vm path

# Load trained weights
model = RTDETR(weights_path)



# Testing Evaluation on Validation dataset

In [8]:
valid_path = '/home/jws2215/e6692-2024spring-finalproject-jwss-jws2215/datasets/nyc_landmarks/valid'

# Get a list of all .jpg files in the folder
image_files = [f for f in os.listdir(valid_path) if f.endswith('.jpg')]

# Randomly select 10 images
selected_images = random.sample(image_files, 10)

# Hyperparameters
conf = 0.85 # 85% sure its an object
visualize = False

# Loop over the selected images
for image_file in selected_images:
    # Load and preprocess the image
    image_path = os.path.join(valid_path, image_file)
    image = Image.open(image_path).convert('RGB')
    
    print("image", np.shape(image))
    
    
    predictions = model(image, conf = conf, visualize = visualize)
    print("predictions", predictions)

    # Display predictions
    predictions[0].show()

image (900, 1600, 3)

0: 640x640 1 EmpireState, 456.2ms
Speed: 2.8ms preprocess, 456.2ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 640)
predictions [ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: None
names: {0: 'EmpireState', 1: 'WTC', 2: '432ParkAve', 3: 'UNBuilding', 4: 'Flatiron', 5: 'BrooklynBridge', 6: 'ChryslerBuilding', 7: 'MetlifeBuilding', 8: 'StatueOfLiberty', 9: '30HudsonYards'}
obb: None
orig_img: array([[[72, 41, 48],
        [72, 41, 48],
        [72, 41, 48],
        ...,
        [97, 67, 78],
        [97, 67, 78],
        [97, 67, 78]],

       [[72, 41, 48],
        [72, 41, 48],
        [72, 41, 48],
        ...,
        [97, 67, 78],
        [97, 67, 78],
        [97, 67, 78]],

       [[72, 41, 48],
        [72, 41, 48],
        [72, 41, 48],
        ...,
        [97, 67, 78],
        [97, 67, 78],
        [97, 67, 78]],

       ...,

       [[21, 16, 13],
        [

/usr/bin/xdg-open: 882: www-browser: not found
/usr/bin/xdg-open: 882: links2: not found
/usr/bin/xdg-open: 882: elinks: not found
/usr/bin/xdg-open: 882: links: not found
/usr/bin/xdg-open: 882: lynx: not found
/usr/bin/xdg-open: 882: w3m: not found
xdg-open: no method available for opening '/var/tmp/tmp08r7_o3s.PNG'


0: 640x640 1 EmpireState, 1 WTC, 430.4ms
Speed: 3.2ms preprocess, 430.4ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 640)
predictions [ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: None
names: {0: 'EmpireState', 1: 'WTC', 2: '432ParkAve', 3: 'UNBuilding', 4: 'Flatiron', 5: 'BrooklynBridge', 6: 'ChryslerBuilding', 7: 'MetlifeBuilding', 8: 'StatueOfLiberty', 9: '30HudsonYards'}
obb: None
orig_img: array([[[198, 124,   0],
        [198, 124,   0],
        [199, 125,   0],
        ...,
        [174, 202, 219],
        [174, 202, 219],
        [174, 202, 219]],

       [[200, 126,   0],
        [200, 126,   0],
        [201, 127,   1],
        ...,
        [175, 203, 220],
        [175, 203, 220],
        [175, 203, 220]],

       [[202, 128,   2],
        [202, 128,   2],
        [202, 128,   2],
        ...,
        [175, 203, 220],
        [175, 203, 220],
        [175, 203, 220]],

     

/usr/bin/xdg-open: 882: www-browser: not found
/usr/bin/xdg-open: 882: links2: not found
/usr/bin/xdg-open: 882: elinks: not found
/usr/bin/xdg-open: 882: links: not found
/usr/bin/xdg-open: 882: lynx: not found
/usr/bin/xdg-open: 882: w3m: not found
xdg-open: no method available for opening '/var/tmp/tmp786aj4yg.PNG'


0: 640x640 1 ChryslerBuilding, 473.5ms
Speed: 2.9ms preprocess, 473.5ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 640)
predictions [ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: None
names: {0: 'EmpireState', 1: 'WTC', 2: '432ParkAve', 3: 'UNBuilding', 4: 'Flatiron', 5: 'BrooklynBridge', 6: 'ChryslerBuilding', 7: 'MetlifeBuilding', 8: 'StatueOfLiberty', 9: '30HudsonYards'}
obb: None
orig_img: array([[[ 90,  95,  96],
        [ 74,  79,  82],
        [ 81,  85,  90],
        ...,
        [255, 252, 255],
        [250, 254, 255],
        [255, 255, 242]],

       [[ 60,  75,  91],
        [ 47,  60,  76],
        [ 54,  64,  81],
        ...,
        [  3,   7,   8],
        [ 52,  64,  66],
        [218, 225, 212]],

       [[ 56,  70,  89],
        [ 46,  60,  78],
        [ 57,  70,  86],
        ...,
        [ 46,  59,  51],
        [ 10,  28,  29],
        [ 52,  62,  50]],

       

/usr/bin/xdg-open: 882: www-browser: not found
/usr/bin/xdg-open: 882: links2: not found
/usr/bin/xdg-open: 882: elinks: not found
/usr/bin/xdg-open: 882: links: not found
/usr/bin/xdg-open: 882: lynx: not found
/usr/bin/xdg-open: 882: w3m: not found
xdg-open: no method available for opening '/var/tmp/tmpvbj1ow7i.PNG'


0: 640x640 1 ChryslerBuilding, 464.5ms
Speed: 2.4ms preprocess, 464.5ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 640)
predictions [ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: None
names: {0: 'EmpireState', 1: 'WTC', 2: '432ParkAve', 3: 'UNBuilding', 4: 'Flatiron', 5: 'BrooklynBridge', 6: 'ChryslerBuilding', 7: 'MetlifeBuilding', 8: 'StatueOfLiberty', 9: '30HudsonYards'}
obb: None
orig_img: array([[[170, 204, 234],
        [195, 229, 255],
        [  0,  35,  65],
        ...,
        [124, 123, 127],
        [117, 116, 120],
        [ 57,  56,  60]],

       [[216, 250, 255],
        [ 81, 117, 147],
        [  3,  39,  69],
        ...,
        [150, 149, 153],
        [ 56,  55,  59],
        [ 76,  75,  79]],

       [[129, 166, 194],
        [ 10,  49,  77],
        [125, 164, 192],
        ...,
        [ 83,  79,  84],
        [ 56,  52,  57],
        [128, 124, 129]],

       

/usr/bin/xdg-open: 882: www-browser: not found
/usr/bin/xdg-open: 882: links2: not found
/usr/bin/xdg-open: 882: elinks: not found
/usr/bin/xdg-open: 882: links: not found
/usr/bin/xdg-open: 882: lynx: not found
/usr/bin/xdg-open: 882: w3m: not found
xdg-open: no method available for opening '/var/tmp/tmps6isk96q.PNG'


0: 640x640 1 StatueOfLiberty, 458.7ms
Speed: 3.1ms preprocess, 458.7ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 640)
predictions [ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: None
names: {0: 'EmpireState', 1: 'WTC', 2: '432ParkAve', 3: 'UNBuilding', 4: 'Flatiron', 5: 'BrooklynBridge', 6: 'ChryslerBuilding', 7: 'MetlifeBuilding', 8: 'StatueOfLiberty', 9: '30HudsonYards'}
obb: None
orig_img: array([[[219, 161, 119],
        [221, 163, 121],
        [220, 165, 122],
        ...,
        [220, 183, 161],
        [221, 183, 159],
        [220, 182, 158]],

       [[221, 162, 117],
        [221, 164, 119],
        [222, 165, 120],
        ...,
        [221, 184, 162],
        [222, 184, 160],
        [221, 183, 159]],

       [[221, 161, 115],
        [222, 163, 117],
        [224, 165, 119],
        ...,
        [221, 185, 161],
        [223, 185, 161],
        [223, 185, 161]],

       .

/usr/bin/xdg-open: 882: www-browser: not found
/usr/bin/xdg-open: 882: links2: not found
/usr/bin/xdg-open: 882: elinks: not found
/usr/bin/xdg-open: 882: links: not found
/usr/bin/xdg-open: 882: lynx: not found
/usr/bin/xdg-open: 882: w3m: not found
xdg-open: no method available for opening '/var/tmp/tmpuvrppqwd.PNG'


0: 640x640 1 ChryslerBuilding, 450.8ms
Speed: 3.0ms preprocess, 450.8ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 640)
predictions [ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: None
names: {0: 'EmpireState', 1: 'WTC', 2: '432ParkAve', 3: 'UNBuilding', 4: 'Flatiron', 5: 'BrooklynBridge', 6: 'ChryslerBuilding', 7: 'MetlifeBuilding', 8: 'StatueOfLiberty', 9: '30HudsonYards'}
obb: None
orig_img: array([[[183, 187, 158],
        [184, 188, 159],
        [184, 188, 159],
        ...,
        [174, 170, 135],
        [174, 170, 135],
        [176, 170, 135]],

       [[183, 187, 158],
        [182, 186, 157],
        [183, 187, 158],
        ...,
        [175, 171, 136],
        [174, 170, 135],
        [176, 170, 135]],

       [[184, 188, 159],
        [182, 186, 157],
        [183, 187, 158],
        ...,
        [176, 172, 137],
        [175, 171, 136],
        [175, 171, 136]],

       

/usr/bin/xdg-open: 882: www-browser: not found
/usr/bin/xdg-open: 882: links2: not found
/usr/bin/xdg-open: 882: elinks: not found
/usr/bin/xdg-open: 882: links: not found
/usr/bin/xdg-open: 882: lynx: not found
/usr/bin/xdg-open: 882: w3m: not found
xdg-open: no method available for opening '/var/tmp/tmp4tl2w7l4.PNG'


0: 640x640 1 EmpireState, 1 WTC, 469.0ms
Speed: 3.1ms preprocess, 469.0ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 640)
predictions [ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: None
names: {0: 'EmpireState', 1: 'WTC', 2: '432ParkAve', 3: 'UNBuilding', 4: 'Flatiron', 5: 'BrooklynBridge', 6: 'ChryslerBuilding', 7: 'MetlifeBuilding', 8: 'StatueOfLiberty', 9: '30HudsonYards'}
obb: None
orig_img: array([[[178, 212, 201],
        [178, 212, 201],
        [177, 211, 200],
        ...,
        [167, 196, 200],
        [167, 196, 200],
        [167, 196, 200]],

       [[178, 212, 201],
        [178, 212, 201],
        [178, 212, 201],
        ...,
        [162, 191, 195],
        [163, 192, 196],
        [163, 192, 196]],

       [[178, 212, 201],
        [178, 212, 201],
        [178, 212, 201],
        ...,
        [156, 185, 189],
        [156, 185, 189],
        [157, 186, 190]],

     

/usr/bin/xdg-open: 882: www-browser: not found
/usr/bin/xdg-open: 882: links2: not found
/usr/bin/xdg-open: 882: elinks: not found
/usr/bin/xdg-open: 882: links: not found
/usr/bin/xdg-open: 882: lynx: not found
/usr/bin/xdg-open: 882: w3m: not found
xdg-open: no method available for opening '/var/tmp/tmpedc2898y.PNG'


0: 640x640 1 EmpireState, 443.2ms
Speed: 2.4ms preprocess, 443.2ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 640)
predictions [ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: None
names: {0: 'EmpireState', 1: 'WTC', 2: '432ParkAve', 3: 'UNBuilding', 4: 'Flatiron', 5: 'BrooklynBridge', 6: 'ChryslerBuilding', 7: 'MetlifeBuilding', 8: 'StatueOfLiberty', 9: '30HudsonYards'}
obb: None
orig_img: array([[[184, 158, 144],
        [184, 158, 146],
        [188, 164, 152],
        ...,
        [208, 203, 202],
        [208, 203, 202],
        [208, 203, 202]],

       [[186, 160, 146],
        [185, 159, 145],
        [189, 165, 153],
        ...,
        [209, 201, 201],
        [209, 201, 201],
        [209, 201, 201]],

       [[185, 159, 145],
        [185, 159, 145],
        [189, 166, 151],
        ...,
        [208, 200, 200],
        [208, 200, 200],
        [207, 199, 199]],

       ...,


/usr/bin/xdg-open: 882: www-browser: not found
/usr/bin/xdg-open: 882: links2: not found
/usr/bin/xdg-open: 882: elinks: not found
/usr/bin/xdg-open: 882: links: not found
/usr/bin/xdg-open: 882: lynx: not found
/usr/bin/xdg-open: 882: w3m: not found
xdg-open: no method available for opening '/var/tmp/tmp5xhrfa_2.PNG'


0: 640x640 1 ChryslerBuilding, 447.0ms
Speed: 3.0ms preprocess, 447.0ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)
predictions [ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: None
names: {0: 'EmpireState', 1: 'WTC', 2: '432ParkAve', 3: 'UNBuilding', 4: 'Flatiron', 5: 'BrooklynBridge', 6: 'ChryslerBuilding', 7: 'MetlifeBuilding', 8: 'StatueOfLiberty', 9: '30HudsonYards'}
obb: None
orig_img: array([[[136,  60,  38],
        [136,  60,  38],
        [136,  60,  38],
        ...,
        [162,  74,  58],
        [162,  74,  58],
        [162,  74,  58]],

       [[136,  60,  38],
        [136,  60,  38],
        [136,  60,  38],
        ...,
        [161,  73,  57],
        [161,  73,  57],
        [161,  73,  57]],

       [[136,  60,  38],
        [136,  60,  38],
        [136,  60,  38],
        ...,
        [161,  73,  56],
        [161,  73,  56],
        [161,  73,  56]],

       

/usr/bin/xdg-open: 882: www-browser: not found
/usr/bin/xdg-open: 882: links2: not found
/usr/bin/xdg-open: 882: elinks: not found
/usr/bin/xdg-open: 882: links: not found
/usr/bin/xdg-open: 882: lynx: not found
/usr/bin/xdg-open: 882: w3m: not found
xdg-open: no method available for opening '/var/tmp/tmpbnzoa2cv.PNG'


0: 640x640 (no detections), 465.2ms
Speed: 2.4ms preprocess, 465.2ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 640)
predictions [ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: None
names: {0: 'EmpireState', 1: 'WTC', 2: '432ParkAve', 3: 'UNBuilding', 4: 'Flatiron', 5: 'BrooklynBridge', 6: 'ChryslerBuilding', 7: 'MetlifeBuilding', 8: 'StatueOfLiberty', 9: '30HudsonYards'}
obb: None
orig_img: array([[[170,  58,  35],
        [170,  58,  35],
        [170,  58,  35],
        ...,
        [161, 104, 105],
        [160, 103, 104],
        [160, 103, 104]],

       [[170,  58,  35],
        [170,  58,  35],
        [170,  58,  35],
        ...,
        [161, 104, 105],
        [161, 104, 105],
        [160, 103, 104]],

       [[170,  58,  35],
        [170,  58,  35],
        [170,  58,  35],
        ...,
        [162, 105, 106],
        [161, 104, 105],
        [161, 104, 105]],

       ...

/usr/bin/xdg-open: 882: www-browser: not found
/usr/bin/xdg-open: 882: links2: not found
/usr/bin/xdg-open: 882: elinks: not found
/usr/bin/xdg-open: 882: links: not found
/usr/bin/xdg-open: 882: lynx: not found
/usr/bin/xdg-open: 882: w3m: not found
xdg-open: no method available for opening '/var/tmp/tmp3pjheh1m.PNG'
