https://www.learnopencv.com/read-write-and-display-a-video-using-opencv-cpp-python/
https://towardsdatascience.com/object-detection-and-tracking-in-pytorch-b3cf1a696a98
https://discuss.pytorch.org/t/how-to-classify-single-image-using-loaded-net/1411/17
https://medium.com/@josh_2774/deep-learning-with-pytorch-9574e74d17ad


In [1]:
from models import *
import cv2
import numpy as np
from torchvision import models, transforms
from PIL import Image
import torch
from torch.autograd import Variable
from shapely.geometry import Polygon

In [2]:
def rotated_bbox_to_contours(cx, cy, w, h, a):
    """
    Return the four contours for a rotated bounding box
    :param cx: center x coordinate
    :param cy: center y coordinate
    :param w: width of the rectangle
    :param h: height of the rectangle
    :param a: the center angle of the rectangle as degrees
    :return: ((x1, y1), (x2, y2), (x3, y3), (x4, y4))
    """
    theta = a.double() * np.pi / 180 
    dx = w/2
    dy = h/2
    dxcos = dx * np.cos(theta)
    dxsin = dx * np.sin(theta)
    dycos = dy * np.cos(theta)
    dysin = dy * np.sin(theta)
    return (
        np.asarray([cx, cy]) + np.asarray([-dxcos - -dysin, -dxsin + -dycos]),
        np.asarray([cx, cy]) + np.asarray([dxcos - -dysin,  dxsin + -dycos]),
        np.asarray([cx, cy]) + np.asarray([dxcos -  dysin,  dxsin +  dycos]),
        np.asarray([cx, cy]) + np.asarray([-dxcos -  dysin, -dxsin +  dycos])
    )

def create_polygon(cx, cy, w, h, a):
    return Polygon(rotated_bbox_to_contours(cx, cy, w, h, a))

def create_polygon_list(rotated_bboxes):
    return [create_polygon(box[0], box[1], box[3], box[2], box[4]) for box in rotated_bboxes]

In [4]:



config_path='PyTorch-YOLOv3/config/yolov3.cfg'
weights_path='PyTorch-YOLOv3/weights/yolov3.weights'



visionModel = models.resnet18(pretrained=True)
visionModel.eval()

imsize = 224
loader = transforms.Compose([transforms.Resize(imsize),transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])])



def randomSSDOutput():
    return torch.tensor([[.5,.5,.5,.5,15], [.3,.4,.3,.4,30], [.6,.6,.2,.2,0], [.3,.7,.1,.2,10]], dtype=torch.double)


def image_loader(frame):
    """load image, returns cuda tensor"""
    #image = Image.open(image_name)
    pilimg = Image.fromarray(frame)
    image = image_loader(pilimg)
    print(type(image))
    image = loader(image).float()
    image = Variable(image, requires_grad=True)
    image = image.unsqueeze(0)  #this is for VGG, may not be needed for ResNet
    return image.cuda()  #assumes that you're using GPU


def img_to_tensor(img):
  
            
    img = np.array(img)/255
    mean = np.array([0.485, 0.456, 0.406]) #provided mean
    std = np.array([0.229, 0.224, 0.225]) #provided std
    img = (img - mean)/std
    
            # Move color channels to first dimension as expected by PyTorch
    img = img.transpose((2, 0, 1))
   
    img = torch.from_numpy(img).type(torch.FloatTensor) 
    
    img.unsqueeze_(0)
    
    return img

# Create a VideoCapture object and read from input file
# If the input is the camera, pass 0 instead of the video file name
cap = cv2.VideoCapture('TestVideos/cvTest.mp4')

frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
 
# Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file.
out = cv2.VideoWriter('outpy.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame_width,frame_height))
# Check if camera opened successfully
if (cap.isOpened()== False): 
    print("Error opening video stream or file")
 # Read until video is completed
frameCounter = 0
while(cap.isOpened()):
  # Capture frame-by-frame
    ret, frame = cap.read()
    if ret == True:
        # Display the resulting frame
        
        
        if frameCounter % 1 == 0:
            
            img = Image.fromarray(frame)
            
            original_width, original_height = frame.shape[1], frame.shape[0]
            bottom, right =  max(original_width - original_height,0), max(original_height - original_width,0)

            frame_padded = cv2.copyMakeBorder( frame, 0, bottom, 0, right, cv2.BORDER_CONSTANT)
            frame_resized = cv2.resize(frame_padded, (imsize,imsize))
            
            img = Image.fromarray(frame_resized)
            
            '''
            if img.size[0] > img.size[1]:
                img.thumbnail((10000, imsize))
            else:
                img.thumbnail((imsize, 10000))
           
            
            
            left_margin = (img.width-imsize)/2
            bottom_margin = (img.height-imsize)/2
            right_margin = left_margin + imsize
            top_margin = bottom_margin + imsize
            cropped_img = img.crop((left_margin, bottom_margin, right_margin,   
                      top_margin))
            
            '''
            model_input = img_to_tensor(img)
            
            
        
            #outputs = visionModel(model_input)
            boxes = randomSSDOutput()
            drawing_boxes = []
            for box in boxes:
                cx,cy,h,w,a = box
                cx = cx * original_width# + left_margin
                cy = cy * original_height# + top_margin
                h *= original_height
                w *= original_width
                
                drawing_boxes.append(rotated_bbox_to_contours(cx,cy,h,w,a))
                
            cv2.polylines(frame_padded, np.int32(drawing_boxes), True, 255)
          
            
            
            out.write(frame_padded)
        frameCounter += 1
        if frameCounter > 100:
            break
        '''
        Ignore since its not a playbick thingy
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        '''
 
  # Break the loop
    else: 
        break
# When everything done, release the video capture object
cap.release()
out.release()