https://www.learnopencv.com/read-write-and-display-a-video-using-opencv-cpp-python/
https://towardsdatascience.com/object-detection-and-tracking-in-pytorch-b3cf1a696a98
https://discuss.pytorch.org/t/how-to-classify-single-image-using-loaded-net/1411/17
https://medium.com/@josh_2774/deep-learning-with-pytorch-9574e74d17ad


In [1]:
import cv2
import numpy as np
from torchvision import models, transforms
from PIL import Image
from utils import *
import torch
from torch.autograd import Variable
from shapely.geometry import Polygon

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
checkpoint = 'BEST_checkpoint_ssd300-Copy1.pth.tar'
#checkpoint = 'checkpoint_ssd300.pth.tar'
checkpoint = torch.load(checkpoint)
model = checkpoint['model']
model = model.to(device)
model.eval()

SSD300(
  (base): VGGBase(
    (conv1_1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv1_2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv2_1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv2_2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv3_1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv3_2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv3_3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=True)
    (conv4_1): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv4_2): Conv2d(512, 512, kernel_size=(3, 3), 

In [3]:
def rotated_bbox_to_contours(cx, cy, w, h, a):
    """
    Return the four contours for a rotated bounding box
    :param cx: center x coordinate
    :param cy: center y coordinate
    :param w: width of the rectangle
    :param h: height of the rectangle
    :param a: the center angle of the rectangle as degrees
    :return: ((x1, y1), (x2, y2), (x3, y3), (x4, y4))
    """
    theta = float(a) * np.pi / 180 
    dx = w/2
    dy = h/2
    dxcos = dx * np.cos(theta)
    dxsin = dx * np.sin(theta)
    dycos = dy * np.cos(theta)
    dysin = dy * np.sin(theta)
    return (
        np.asarray([cx, cy]) + np.asarray([-dxcos - -dysin, -dxsin + -dycos]),
        np.asarray([cx, cy]) + np.asarray([dxcos - -dysin,  dxsin + -dycos]),
        np.asarray([cx, cy]) + np.asarray([dxcos -  dysin,  dxsin +  dycos]),
        np.asarray([cx, cy]) + np.asarray([-dxcos -  dysin, -dxsin +  dycos])
    )

def create_polygon(cx, cy, w, h, a):
    return Polygon(rotated_bbox_to_contours(cx, cy, w, h, a))

def create_polygon_list(rotated_bboxes):
    return [create_polygon(box[0], box[1], box[3], box[2], box[4]) for box in rotated_bboxes]

In [4]:

imsize = 300
resize = transforms.Resize((300, 300))
to_tensor = transforms.ToTensor()
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])


def randomSSDOutput():
    return torch.tensor([[.5,.5,.5,.5,15], [.3,.4,.3,.4,30], [.6,.6,.2,.2,0], [.3,.7,.1,.2,10]], dtype=torch.double)


def image_loader(frame):
    """load image, returns cuda tensor"""
    #image = Image.open(image_name)
    pilimg = Image.fromarray(frame)
    image = image_loader(pilimg)
    print(type(image))
    image = loader(image).float()
    image = Variable(image, requires_grad=True)
    image = image.unsqueeze(0)  #this is for VGG, may not be needed for ResNet
    return image.cuda()  #assumes that you're using GPU


def img_to_tensor(img):
  
            
    img = np.array(img)/255
    mean = np.array([0.485, 0.456, 0.406]) #provided mean
    std = np.array([0.229, 0.224, 0.225]) #provided std
    img = (img - mean)/std
    
            # Move color channels to first dimension as expected by PyTorch
    img = img.transpose((2, 0, 1))
   
    img = torch.from_numpy(img).type(torch.FloatTensor) 
    
    img.unsqueeze_(0)
    
    return img.cuda()

# Create a VideoCapture object and read from input file
# If the input is the camera, pass 0 instead of the video file name
cap = cv2.VideoCapture('../TestVideos/cvTest.mp4')

frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
 
# Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file.
out = cv2.VideoWriter('outpy.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame_width,frame_height))
# Check if camera opened successfully
if (cap.isOpened()== False): 
    print("Error opening video stream or file")
 # Read until video is completed
frameCounter = 0
while(cap.isOpened()):
  # Capture frame-by-frame
    ret, frame = cap.read()
    if ret == True:
        # Display the resulting frame
        
        
        if frameCounter % 1 == 0:
            
            img = Image.fromarray(frame)
            
            # Transform
            image = normalize(to_tensor(resize(img)))

            # Move to default device
            image = image.to(device)

            # Forward prop.
            predicted_locs, predicted_scores = model(image.unsqueeze(0))
            '''
            if img.size[0] > img.size[1]:
                img.thumbnail((10000, imsize))
            else:
                img.thumbnail((imsize, 10000))
           
            
            
            left_margin = (img.width-imsize)/2
            bottom_margin = (img.height-imsize)/2
            right_margin = left_margin + imsize
            top_margin = bottom_margin + imsize
            cropped_img = img.crop((left_margin, bottom_margin, right_margin,   
                      top_margin))
            
            '''
            model_input = img_to_tensor(img)
            
            
            det_boxes = list()
            det_labels = list()
            det_scores = list()
            # Forward prop.
            
            predicted_locs, predicted_scores = model(model_input)

            # Detect objects in SSD output
            det_boxes_batch, det_labels_batch, det_scores_batch = model.detect_objects(predicted_locs, 
                                        predicted_scores,min_score=0.01, max_overlap=0.45,top_k=200)
            det_boxes.extend(det_boxes_batch)
            det_labels.extend(det_labels_batch)
            det_scores.extend(det_scores_batch)                                                                
                                                                       
              
            
            
            
            
            # Move detections to the CPU
            det_boxes = det_boxes[0].to('cpu')

            # Transform to original image dimensions
            original_dims = torch.FloatTensor(
                    [original_image.width, original_image.height, original_image.width, original_image.height]).unsqueeze(0)
            det_boxes = det_boxes * original_dims

            # Decode class integer labels
            det_labels = [rev_label_map[l] for l in det_labels[0].to('cpu').tolist()]
            print(rev_label_map)
        
            # Suppress specific classes, if needed
            drawing_boxes = []
            print(det_labels)
            for i in range(det_boxes.size(0)):
                

                # Boxes
                box_location = det_boxes[i].tolist()
                
                cx,cy,w,h = box_location
                a = 0
                drawing_boxes.append(rotated_bbox_to_contours(cx,cy,h,w,a))
            cv2.polylines(frame, np.int32(drawing_boxes), True, 255)
            
            
            
            
            
            '''
            
    
            boxes = det_boxes[0]#randomSSDOutput()
            drawing_boxes = []
     
            for box in boxes:
                
                a = 0
                cx,cy,h,w = box
                cx = cx * frame_padded.shape[1]# + left_margin
                cy = cy * frame_padded.shape[0]# + top_margin
                h *= frame_padded.shape[0]
                w *= frame_padded.shape[1]
                
                drawing_boxes.append(rotated_bbox_to_contours(cx,cy,h,w,a))
                
            cv2.polylines(frame, np.int32(drawing_boxes), True, 255)
          '''
            
            
        out.write(frame)
        frameCounter += 1
        if frameCounter > 20:
            break
        '''
        Ignore since its not a playbick thingy
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        '''
 
  # Break the loop
    else: 
        break
# When everything done, release the video capture object
cap.release()
out.release()

RuntimeError: CUDA out of memory. Tried to allocate 1012.50 MiB (GPU 0; 14.73 GiB total capacity; 10.90 GiB already allocated; 529.94 MiB free; 11.62 MiB cached)