In [39]:
import copy
import cv2
import torch
import torchvision.transforms as transforms
from torchsummary import summary
from torch.utils.data import DataLoader
import numpy as np
from model import YOLOv1,YOLO_Quant,CNNBlock
from utils import convert_cellboxes
from dataset import COCODataset
from train import Compose

In [2]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
IMAGE_SIZE = 88
S = 4
print(DEVICE)

cpu


In [3]:
def cellboxes_to_boxes(out, S=S):
    converted_pred = convert_cellboxes(out,S)
    converted_pred = converted_pred.reshape(out.shape[0], S * S, -1)
    return converted_pred.tolist()

In [4]:
def non_max_suppression(bboxes, threshold):
    assert type(bboxes) == list
    bboxes = [box for box in bboxes if box[0] > threshold]
    return bboxes

In [28]:
transform = Compose([transforms.Resize((88, 88)), transforms.ToTensor()])
calibrate_dataset_1000 = COCODataset(transform=transform,dataset_size=1000)
calibrate_dataset_1000.load_dataset()
calibrate_loader = DataLoader(dataset=calibrate_dataset_1000, batch_size=64, num_workers=0)

Downloading split 'train' to '/Users/lukas/fiftyone/coco-2017/train' if necessary
Found annotations at '/Users/lukas/fiftyone/coco-2017/raw/instances_train2017.json'
Sufficient images already downloaded
Existing download of split 'train' is sufficient
Loading existing dataset 'coco-2017-train-1000'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


In [34]:
# load model
model = YOLO_Quant().to(DEVICE)
#checkpoint = torch.load('overfit.pth.tar', map_location=torch.device(DEVICE))
#model.load_state_dict(checkpoint["state_dict"])
model.eval()

YOLO_Quant(
  (darknet): Sequential(
    (0): CNNBlock(
      (conv): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batchnorm): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
    )
    (1): CNNBlock(
      (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batchnorm): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
    )
    (2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (3): CNNBlock(
      (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batchnorm): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
    )
    (4): CNNBlock(
      (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batchnorm): BatchNorm2d(32, eps=1e-05, mo

In [35]:
def fuse_CNN_layers(model):
    for name, module in model.named_children():
        if name == 'darknet':
            for idx, sub_module in enumerate(module):
                if isinstance(sub_module,CNNBlock):
                    torch.ao.quantization.fuse_modules(sub_module,[['conv', 'batchnorm']],inplace=True)
        #if name == 'fcs':
            #torch.ao.quantization.fuse_modules(module,[['1', '2']],inplace=True)    

In [36]:
def calibrate_model(model,dataset):
    for x,_ in dataset:
        model(x.to(DEVICE))
        break

In [37]:
##--------------------Quantize model--------------------##
# turn model in eval mode
model.eval()
# set quantization backend
backend = "qnnpack"
model.qconfig = torch.ao.quantization.get_default_qconfig(backend)
# first fuse modules
fuse_CNN_layers(model)
print(model)
# then prepare model for static quantization
model_static_prepared = torch.ao.quantization.prepare(model, inplace=False)
# calibrate with representative dataset
calibrate_model(model_static_prepared,calibrate_loader)
# quantize model
model_static_quantized = torch.ao.quantization.convert(model_static_prepared, inplace=False)

YOLO_Quant(
  (darknet): Sequential(
    (0): CNNBlock(
      (conv): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (batchnorm): Identity()
      (relu): ReLU()
    )
    (1): CNNBlock(
      (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (batchnorm): Identity()
      (relu): ReLU()
    )
    (2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (3): CNNBlock(
      (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (batchnorm): Identity()
      (relu): ReLU()
    )
    (4): CNNBlock(
      (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (batchnorm): Identity()
      (relu): ReLU()
    )
    (5): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (6): CNNBlock(
      (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (batchnorm): Identity()
      (relu): ReLU()
  

In [15]:
def print_model_size(model):
    param_size = 0
    for param in model.parameters():
        param_size += param.nelement() * param.element_size()
    buffer_size = 0
    for buffer in model.buffers():
        buffer_size += buffer.nelement() * buffer.element_size()
    
    size_all_mb = (param_size + buffer_size) / 1024**2
    print('model size: {:.3f}MB'.format(size_all_mb))

In [40]:
input_size = (1, 1, 88, 88)
input_tensor = torch.randn(input_size)
torch.onnx.export(model_static_quantized, input_tensor, "model.onnx", verbose=True, opset_version=11)

Exported graph: graph(%x.1 : Float(1, 1, 88, 88, strides=[7744, 7744, 88, 1], requires_grad=0, device=cpu)):
  %/quant/Constant_output_0 : Byte(requires_grad=0, device=cpu) = onnx::Constant[value={0}, onnx_name="/quant/Constant"](), scope: model.YOLO_Quant::/torch.ao.nn.quantized.modules.Quantize::quant # /usr/local/anaconda3/envs/machineLearning/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__init__.py:97:0
  %/quant/Constant_1_output_0 : Float(requires_grad=0, device=cpu) = onnx::Constant[value={0.00391965}, onnx_name="/quant/Constant_1"](), scope: model.YOLO_Quant::/torch.ao.nn.quantized.modules.Quantize::quant # /usr/local/anaconda3/envs/machineLearning/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__init__.py:97:0
  %/quant/QuantizeLinear_output_0 : Byte(1, 1, 88, 88, strides=[7744, 7744, 88, 1], device=cpu) = onnx::QuantizeLinear[onnx_name="/quant/QuantizeLinear"](%x.1, %/quant/Constant_1_output_0, %/quant/Constant_output_0), scope: model.YOLO_Quant::/to

KeyboardInterrupt: 

In [17]:
print_model_size(model_static_quantized)

model size: 0.000MB


In [None]:
# Replace 'video.mp4' with the actual path to your video file
video_path = 'video2.MOV'

# Create a VideoCapture object
cap = cv2.VideoCapture(video_path)

# Check if the video is opened successfully
if not cap.isOpened():
    print("Error opening video file")
    exit(1)

In [None]:
def iterate_video(transform):
    # Read and process frames one by one
    while True:
        # Capture the current frame
        ret, frame = cap.read()
        # Convert the image to grayscale using cv2.cvtColor()
        grey_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
        # Check if the frame was captured correctly
        if not ret:
            break
    
        # Predict frame
        grey_frame = transform(grey_frame)
        with torch.no_grad():
            boxes = model_static_quantized(grey_frame.unsqueeze(0).to(DEVICE))
            boxes = cellboxes_to_boxes(boxes)
            #boxes[0] = non_max_suppression(boxes[0], 0.4)
            for box in boxes[0]:
                box[1] = frame.shape[1]*box[1]
                box[2] = frame.shape[0]*box[2]
                box[3] = frame.shape[1]*box[3]
                box[4] = frame.shape[0]*box[4]
                print(f"Box: prob:{int(box[0])}, x:{int(box[1])}, y:{int(box[2])}, w:{int(box[3])}, h:{int(box[4])}")
                cv2.rectangle(frame, (int(box[1]-0.5*box[3]),int(box[2]-0.5*box[4])), (int(box[1]+0.5*box[3]),int(box[2]+0.5*box[4])), (255, 255, 255), 4)
    
        # Display the current frame
        cv2.imshow('Video', frame)
    
        # Wait for a key press to capture the next frame or quit
        key = cv2.waitKey(1) & 0xFF
        if key == 27:  # Esc key
            break
    
    # Release the video capture object and close all windows
    cv2.destroyWindow('Video')
    cv2.waitKey(1)
    cap.release()

In [None]:
class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, img):
        for t in self.transforms:
            img = t(img)
        return img

transform = Compose([transforms.ToTensor(),transforms.Resize((IMAGE_SIZE, IMAGE_SIZE))])

In [None]:
iterate_video(transform)

In [None]:
summary(model,input_size=[(1,88,88)])