In [1]:
from torchinfo import summary
from torch import nn
import torch
import sys
import cv2
sys.path.append('/home/nguyensolbadguy/Code_Directory/compression/models/yolov3') 

from yolov3.pytorchyolo import models,detect
from yolov3.pytorchyolo.models import YOLOLayer,create_modules
from yolov3.pytorchyolo.utils.utils import non_max_suppression,rescale_boxes
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Define the Yolov3 model by ultralytics

In [2]:
model = models.load_model('/home/nguyensolbadguy/Code_Directory/compression/models/yolov3/config/yolov3.cfg','/home/nguyensolbadguy/Code_Directory/compression/models/yolov3/weights/yolov3.weights')
model.eval()

Darknet(
  (module_list): ModuleList(
    (0): Sequential(
      (conv_0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batch_norm_0): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (leaky_0): LeakyReLU(negative_slope=0.1)
    )
    (1): Sequential(
      (conv_1): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (batch_norm_1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (leaky_1): LeakyReLU(negative_slope=0.1)
    )
    (2): Sequential(
      (conv_2): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (batch_norm_2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (leaky_2): LeakyReLU(negative_slope=0.1)
    )
    (3): Sequential(
      (conv_3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batch_norm_3): BatchNorm2d(64, eps=1e-05, momen

Check the layers and output of each layers

In [4]:
img = cv2.imread("/home/nguyensolbadguy/Code_Directory/compression/models/yolov3/messi.jpg")

# Convert OpenCV bgr to rgb
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
print(img.shape)

# Runs the YOLO model on the image
boxes = detect.detect_image(model, img)

print(boxes)

(729, 1296, 3)
[[5.3251965e+01 1.7344706e+01 7.9719135e+02 7.3750366e+02 9.9736142e-01
  0.0000000e+00]
 [1.2074897e+03 4.7534967e+02 1.2929420e+03 6.9665808e+02 9.9536353e-01
  0.0000000e+00]
 [6.2586298e+02 1.1301550e+02 1.1306825e+03 7.3140393e+02 9.7910422e-01
  0.0000000e+00]
 [8.1113174e+01 9.0996696e+01 1.3527492e+02 1.5972826e+02 5.2961564e-01
  3.2000000e+01]]


Create sub module from layer 5 (layer 13 in the paper)

In [139]:
input_tensor = torch.randn(1, 3, 416, 416).to(device)
with torch.no_grad():
    x = input_tensor
    for i in range(13):  # layer 0 to 5
        x = model.module_list[i](x)
        print(f"Layer {i} output shape: {x.shape}")

Layer 0 output shape: torch.Size([1, 32, 416, 416])
Layer 1 output shape: torch.Size([1, 64, 208, 208])
Layer 2 output shape: torch.Size([1, 32, 208, 208])
Layer 3 output shape: torch.Size([1, 64, 208, 208])
Layer 4 output shape: torch.Size([1, 64, 208, 208])
Layer 5 output shape: torch.Size([1, 128, 104, 104])
Layer 6 output shape: torch.Size([1, 64, 104, 104])
Layer 7 output shape: torch.Size([1, 128, 104, 104])
Layer 8 output shape: torch.Size([1, 128, 104, 104])
Layer 9 output shape: torch.Size([1, 64, 104, 104])
Layer 10 output shape: torch.Size([1, 128, 104, 104])
Layer 11 output shape: torch.Size([1, 128, 104, 104])
Layer 12 output shape: torch.Size([1, 256, 52, 52])


Create sub module from layer 5 (layer 13 in the paper)

Test the appropriate input to this submodule  
We expect this submodule receives input with shape (B,218,H/4,W/4)  
We set B=1 and H=416 in this test

In [146]:
class PartialYOLO(nn.Module):
    def __init__(self, full_model, start_idx,image_size):
        super().__init__()
        self.module_defs = full_model.module_defs
        self.module_list = full_model.module_list    
        self.start_idx = start_idx
        self.yolo_layers = [layer[0]
                            for layer in self.module_list if isinstance(layer[0], YOLOLayer)]
        self.image_size = image_size

    def forward(self, x):
        layer_outputs, yolo_outputs = {}, []
        for i in range(self.start_idx, len(self.module_list)):
            module_def = self.module_defs[i]
            module = self.module_list[i]

            if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
                x = module(x)
                if i == 12:
                    F_tilde = x 

            elif module_def["type"] == "route":
                
                layers = [int(l) for l in module_def["layers"].split(",")]
                layers = [l if l >= 0 else i + l for l in layers]
                
                try:
                    route_tensors = [layer_outputs[l] for l in layers]
                except KeyError as e:
                    raise RuntimeError(f"Route error at layer {i}: missing dependency {e}")

                x = torch.cat(route_tensors, dim=1)
                
                if "groups" in module_def:
                    group_size = x.shape[1] // int(module_def["groups"])
                    group_id = int(module_def["group_id"])
                    x = x[:, group_size * group_id : group_size * (group_id + 1)]

            elif module_def["type"] == "shortcut":

                from_idx = int(module_def["from"])
                shortcut_idx = i + from_idx if from_idx < 0 else from_idx

                try:
                    x = layer_outputs[i - 1] + layer_outputs[shortcut_idx]
                except KeyError as e:
                    raise RuntimeError(f"Shortcut error at layer {i}: missing dependency {e}")

            elif module_def["type"] == "yolo":
                x = module[0](x, self.image_size)
                yolo_outputs.append(x)

            layer_outputs[i] = x

        return F_tilde,torch.cat(yolo_outputs, 1)
    

input_tensor = torch.randn(1, 128, 104, 104).to(device)
partial_model = PartialYOLO(model, start_idx=12,image_size=416).to(device)

def detect_image(model, img_size=416, conf_thres=0.5, nms_thres=0.5):

    F_tilde,out = model(input_tensor)
    detections = non_max_suppression(out, conf_thres, nms_thres)
    detections = rescale_boxes(detections[0], img_size, [416,416])
    print(detections)
    return F_tilde
    
F_tilde = detect_image(partial_model)
print(F_tilde.shape)

tensor([], size=(0, 6))
torch.Size([1, 256, 52, 52])


In [145]:
module_list = model.module_list
module_def = model.module_defs
for i in range(12, len(module_list)):
    print(i)
    print(module_list[i])
    print(module_def[i])

12
Sequential(
  (conv_12): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (batch_norm_12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (leaky_12): LeakyReLU(negative_slope=0.1)
)
{'type': 'convolutional', 'batch_normalize': '1', 'filters': '256', 'size': '3', 'stride': '2', 'pad': '1', 'activation': 'leaky'}
13
Sequential(
  (conv_13): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (batch_norm_13): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (leaky_13): LeakyReLU(negative_slope=0.1)
)
{'type': 'convolutional', 'batch_normalize': '1', 'filters': '128', 'size': '1', 'stride': '1', 'pad': '1', 'activation': 'leaky'}
14
Sequential(
  (conv_14): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (batch_norm_14): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (leaky_14): LeakyReLU(negative_slope