In [1]:
import math

import torch

from objdet.YOLOV8.utils.tal import make_anchors
from objdet.YOLOV8.nn.modules.conv import Conv, Concat
from objdet.YOLOV8.nn.modules.block import DFL, C2f, SPPF
from objdet.YOLOV8.nn.modules.head import Detect


def fuse_conv(conv, norm):
    fused_conv = torch.nn.Conv2d(conv.in_channels,
                                 conv.out_channels,
                                 kernel_size=conv.kernel_size,
                                 stride=conv.stride,
                                 padding=conv.padding,
                                 groups=conv.groups,
                                 bias=True).requires_grad_(False).to(conv.weight.device)

    w_conv = conv.weight.clone().view(conv.out_channels, -1)
    w_norm = torch.diag(norm.weight.div(torch.sqrt(norm.eps + norm.running_var)))
    fused_conv.weight.copy_(torch.mm(w_norm, w_conv).view(fused_conv.weight.size()))

    b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
    b_norm = norm.bias - norm.weight.mul(norm.running_mean).div(torch.sqrt(norm.running_var + norm.eps))
    fused_conv.bias.copy_(torch.mm(w_norm, b_conv.reshape(-1, 1)).reshape(-1) + b_norm)

    return fused_conv



class DarkNet(torch.nn.Module):
    def __init__(self, width, depth):
        super().__init__()
        self.p1 = []
        self.p2 = []
        self.p3 = []
        self.p4 = []
        self.p5 = []

        # p1/2
        self.p1.append(Conv(width[0], width[1], k=3, s=2, p=1))
        # p2/4
        self.p2.append(Conv(width[1], width[2], k=3, s=2, p=1))
        self.p2.append(C2f(width[2], width[2], depth[0]))
        # p3/8
        self.p3.append(Conv(width[2], width[3], k=3, s=2, p=1))
        self.p3.append(C2f(width[3], width[3], depth[1]))
        # p4/16
        self.p4.append(Conv(width[3], width[4], k=3, s=2, p=1))
        self.p4.append(C2f(width[4], width[4], depth[2]))
        # p5/32
        self.p5.append(Conv(width[4], width[5], k=3, s=2, p=1))
        self.p5.append(C2f(width[5], width[5], depth[0]))
        self.p5.append(SPPF(width[5], width[5]))

        self.p1 = torch.nn.Sequential(*self.p1)
        self.p2 = torch.nn.Sequential(*self.p2)
        self.p3 = torch.nn.Sequential(*self.p3)
        self.p4 = torch.nn.Sequential(*self.p4)
        self.p5 = torch.nn.Sequential(*self.p5)

    def forward(self, x):
        p1 = self.p1(x)
        p2 = self.p2(p1)
        p3 = self.p3(p2)
        p4 = self.p4(p3)
        p5 = self.p5(p4)
        return p3, p4, p5


class DarkFPN(torch.nn.Module):
    def __init__(self, width, depth):
        super().__init__()
        self.up = torch.nn.Upsample(scale_factor=2)
        self.h1 = C2f(width[4] + width[5], width[4], depth[0])
        self.h2 = C2f(width[3] + width[4], width[3], depth[0])
        self.h3 = Conv(width[3], width[3], k=3, s=2, p=1)
        self.h4 = C2f(width[3] + width[4], width[4], depth[0])
        self.h5 = Conv(width[4], width[4], k=3, s=2, p=1)
        self.h6 = C2f(width[4] + width[5], width[5], depth[0])

    def forward(self, x):
        p3, p4, p5 = x
        p4 = self.h1(torch.cat(tensors=[self.up(p5), p4], dim=1))
        p3 = self.h2(torch.cat(tensors=[self.up(p4), p3], dim=1))
        p4 = self.h4(torch.cat(tensors=[self.h3(p3), p4], dim=1))
        p5 = self.h6(torch.cat(tensors=[self.h5(p4), p5], dim=1))
        return p3, p4, p5


class Head(torch.nn.Module):
    shape = None
    anchors = torch.empty(0)
    strides = torch.empty(0)

    def __init__(self, nc=80, filters=()):
        super().__init__()
        self.nc = nc  # number of classes
        self.no = nc + 4  # number of outputs per anchor
        self.stride = torch.zeros(len(filters))  # strides computed during build

        box = max(64, filters[0] // 4)
        cls = max(80, filters[0], self.nc)

        self.box = torch.nn.ModuleList(torch.nn.Sequential(Conv(x, box, k=3, p=1),
                                                           Conv(box, box, k=3, p=1),
                                                           torch.nn.Conv2d(box, out_channels=4,
                                                                           kernel_size=1)) for x in filters)
        self.cls = torch.nn.ModuleList(torch.nn.Sequential(Conv(x, cls, k=3, p=1),
                                                           Conv(cls, cls, k=3, p=1),
                                                           torch.nn.Conv2d(cls, out_channels=self.nc,
                                                                           kernel_size=1)) for x in filters)

    def forward(self, x):
        shape = x[0].shape
        for i, (box, cls) in enumerate(zip(self.box, self.cls)):
            x[i] = torch.cat(tensors=(box(x[i]), cls(x[i])), dim=1)
        if self.training:
            return x
        if self.shape != shape:
            self.shape = shape
            self.anchors, self.strides = (i.transpose(0, 1) for i in make_anchors(x, self.stride))

        x = torch.cat([i.view(x[0].shape[0], self.no, -1) for i in x], dim=2)
        box, cls = x.split(split_size=(4, self.nc), dim=1)

        a, b = box.chunk(2, 1)
        a = self.anchors.unsqueeze(0) - a
        b = self.anchors.unsqueeze(0) + b
        box = torch.cat(tensors=((a + b) / 2, b - a), dim=1)

        return torch.cat(tensors=(box * self.strides, cls.sigmoid()), dim=1)

    def initialize_biases(self):
        # Initialize biases
        # WARNING: requires stride availability
        for box, cls, s in zip(self.box, self.cls, self.stride):
            # box
            box[-1].bias.data[:] = 1.0
            # cls (.01 objects, 80 classes, 640 img)
            cls[-1].bias.data[:self.nc] = math.log(5 / self.nc / (640 / s) ** 2)


class YOLO(torch.nn.Module):
    def __init__(self, width, depth, num_classes):
        super().__init__()
        self.net = DarkNet(width, depth)
        self.fpn = DarkFPN(width, depth)

        img_dummy = torch.zeros(1, width[0], 256, 256)
        self.head = Head(num_classes, (width[3], width[4], width[5]))
        self.head.stride = torch.tensor([256 / x.shape[-2] for x in self.forward(img_dummy)])
        self.stride = self.head.stride
        self.head.initialize_biases()

    def forward(self, x):
        x = self.net(x)
        x = self.fpn(x)
        return self.head(list(x))

    def fuse(self):
        for m in self.modules():
            if type(m) is Conv and hasattr(m, 'norm'):
                m.conv = fuse_conv(m.conv, m.norm)
                m.forward = m.fuse_forward
                delattr(m, 'norm')
        return self


def yolo_v8_n(num_classes: int = 80):
    depth = [1, 2, 2]
    width = [3, 16, 32, 64, 128, 256]
    return YOLO(width, depth, num_classes)


def yolo_v8_t(num_classes: int = 80):
    depth = [1, 2, 2]
    width = [3, 24, 48, 96, 192, 384]
    return YOLO(width, depth, num_classes)


def yolo_v8_s(num_classes: int = 80):
    depth = [1, 2, 2]
    width = [3, 32, 64, 128, 256, 512]
    return YOLO(width, depth, num_classes)


def yolo_v8_m(num_classes: int = 80):
    depth = [2, 4, 4]
    width = [3, 48, 96, 192, 384, 576]
    return YOLO(width, depth, num_classes)


def yolo_v8_l(num_classes: int = 80):
    depth = [3, 6, 6]
    width = [3, 64, 128, 256, 512, 512]
    return YOLO(width, depth, num_classes)


def yolo_v8_x(num_classes: int = 80):
    depth = [3, 6, 6]
    width = [3, 80, 160, 320, 640, 640]
    return YOLO(width, depth, num_classes)

In [4]:
image_path = '/mnt/d/projects/objdet/src/objdet/samples/sample.png'

In [6]:
import cv2
img = cv2.imread(image_path)
img.shape

(576, 768, 3)

In [8]:
import numpy as np
def input_transform(image):
    image = image.astype(np.float32)[:, :, ::-1]
    image = image / 255.0
    return image

img2= input_transform(img)
img2.shape

(576, 768, 3)

In [None]:
# img = cv2.imread(file_path, cv2.IMREAD_COLOR)
# height, width, _ = img.shape
# padded_img = pad_image(img)
# padded_height, padded_width, _ = padded_img.shape

# sv_img = np.zeros_like(padded_img).astype(np.uint8)
# img_transformed = input_transform(padded_img)
# img_transformed = img_transformed.transpose((2, 0, 1)).copy()
# img_tensor = torch.from_numpy(img_transformed).unsqueeze(0).cuda()

# pred = model(img_tensor)
# pred = F.interpolate(pred, size=(padded_height, padded_width), mode='bilinear', align_corners=True)
# pred = torch.argmax(pred, dim=1).squeeze(0).cpu().numpy()
# pred = pred[:height, :width]  # 패딩된 부분을 제거하여 원본 크기로 되돌림

In [9]:
img3 = img2.transpose((2,0,1))
print(img3.shape)
img4 = torch.from_numpy(img3).unsqueeze(0).cuda()
print(img4.shape)

(3, 576, 768)
torch.Size([1, 3, 576, 768])


In [11]:
model = yolo_v8_n().cuda()
prediction = model(img4)

In [16]:
print(len(prediction))

3


In [24]:
32*24

768

In [18]:
for p in prediction:
    print(p.shape)

torch.Size([1, 84, 72, 96])
torch.Size([1, 84, 36, 48])
torch.Size([1, 84, 18, 24])


In [28]:
img.shape

(576, 768, 3)

In [29]:
type(img)

numpy.ndarray

In [26]:
img4.shape

torch.Size([1, 3, 576, 768])

In [51]:
prediction[2][0].shape

torch.Size([84, 18, 24])

In [60]:
prediction[0].shape

torch.Size([1, 84, 72, 96])

In [71]:
image_path = '/mnt/d/projects/objdet/src/objdet/samples/sample.png'
im = cv2.imread(image_path)
im0 = im.copy()
im = im.astype(np.float32)[:,:,::-1]
im = im/255
im = im.transpose((2,0,1))
im = torch.from_numpy(im).unsqueeze(0)

model = yolo_v8_n()

pred = model(im)

print(im.shape)
print(pred.shape)

torch.Size([1, 3, 576, 768])


AttributeError: 'list' object has no attribute 'shape'

In [77]:
print(pred[0].shape)
pred[0].reshape(-1,6).shape

torch.Size([1, 84, 72, 96])


torch.Size([96768, 6])

In [78]:
for p in pred:
    print(p.shape)

torch.Size([1, 84, 72, 96])
torch.Size([1, 84, 36, 48])
torch.Size([1, 84, 18, 24])


In [79]:
m2 = yolo_v8_x()

pred2= m2(im)

In [80]:
for p in pred2:
    print(p.shape)

torch.Size([1, 84, 72, 96])
torch.Size([1, 84, 36, 48])
torch.Size([1, 84, 18, 24])


In [94]:
result1= model(im)

model.eval()
result2 = model(im)


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [97]:
result1[0].shape

torch.Size([1, 84, 72, 96])

In [103]:
result2.reshape(-1,6).shape

torch.Size([127008, 6])

In [105]:
array = result2.reshape(-1,6)

In [106]:
boxes = array[:, :4]  # Bounding box coordinates
confidences = array[:, 4]  # Confidence scores
class_scores = array[:, 5]  # Class scores

In [101]:
result2.shape

torch.Size([1, 84, 9072])

In [109]:
import numpy as np
import cv2
image_path = '/mnt/d/projects/objdet/src/objdet/samples/sample.png'
im = cv2.imread(image_path)
im0 = im.copy()
im = im.astype(np.float32)[:,:,::-1]
im = im/255
im = im.transpose((2,0,1))
im = torch.from_numpy(im).unsqueeze(0)

model = yolo_v8_n()

pred = model(im)

print(im.shape)
print(pred.shape)

model.eval()
result2 = model(im)
array = result2.reshape(-1,6)
boxes = array[:, :4]  # Bounding box coordinates
confidences = array[:, 4]  # Confidence scores
class_scores = array[:, 5]  # Class scores
# Assume 'boxes', 'confidences', and 'class_scores' are already defined
indices = cv2.dnn.NMSBoxes(boxes.tolist(), confidences.tolist(), score_threshold=0.5, nms_threshold=0.4)

final_boxes = boxes[indices].reshape(-1, 4)
final_confidences = confidences[indices]
final_class_scores = class_scores[indices]

In [111]:
indices

array([1151, 1166, 1165, 1164, 1163, 1441, 1140, 1139, 1161, 1440, 1719, 1687, 1137, 1160, 2956, 2687, 1136, 5975, 2672, 1559, 2952, 1543, 2671, 1527], dtype=int32)

In [112]:
img.shape

(576, 768, 3)

In [None]:
final_boxes

In [2]:
ckpt = torch.load('yolov8n.pt')

In [None]:
ckpt

In [5]:
model = yolo_v8_n()

# 저장된 가중치 로드
state_dict = torch.load('yolov8n.pt')

# 모델의 state_dict에서 필요한 가중치만 추출
filtered_state_dict = {k: v for k, v in state_dict.items() if k in model.state_dict()}

# 모델에 가중치 로드
model.load_state_dict(filtered_state_dict)

RuntimeError: Error(s) in loading state_dict for YOLO:
	Missing key(s) in state_dict: "net.p1.0.conv.weight", "net.p1.0.bn.weight", "net.p1.0.bn.bias", "net.p1.0.bn.running_mean", "net.p1.0.bn.running_var", "net.p2.0.conv.weight", "net.p2.0.bn.weight", "net.p2.0.bn.bias", "net.p2.0.bn.running_mean", "net.p2.0.bn.running_var", "net.p2.1.cv1.conv.weight", "net.p2.1.cv1.bn.weight", "net.p2.1.cv1.bn.bias", "net.p2.1.cv1.bn.running_mean", "net.p2.1.cv1.bn.running_var", "net.p2.1.cv2.conv.weight", "net.p2.1.cv2.bn.weight", "net.p2.1.cv2.bn.bias", "net.p2.1.cv2.bn.running_mean", "net.p2.1.cv2.bn.running_var", "net.p2.1.m.0.cv1.conv.weight", "net.p2.1.m.0.cv1.bn.weight", "net.p2.1.m.0.cv1.bn.bias", "net.p2.1.m.0.cv1.bn.running_mean", "net.p2.1.m.0.cv1.bn.running_var", "net.p2.1.m.0.cv2.conv.weight", "net.p2.1.m.0.cv2.bn.weight", "net.p2.1.m.0.cv2.bn.bias", "net.p2.1.m.0.cv2.bn.running_mean", "net.p2.1.m.0.cv2.bn.running_var", "net.p3.0.conv.weight", "net.p3.0.bn.weight", "net.p3.0.bn.bias", "net.p3.0.bn.running_mean", "net.p3.0.bn.running_var", "net.p3.1.cv1.conv.weight", "net.p3.1.cv1.bn.weight", "net.p3.1.cv1.bn.bias", "net.p3.1.cv1.bn.running_mean", "net.p3.1.cv1.bn.running_var", "net.p3.1.cv2.conv.weight", "net.p3.1.cv2.bn.weight", "net.p3.1.cv2.bn.bias", "net.p3.1.cv2.bn.running_mean", "net.p3.1.cv2.bn.running_var", "net.p3.1.m.0.cv1.conv.weight", "net.p3.1.m.0.cv1.bn.weight", "net.p3.1.m.0.cv1.bn.bias", "net.p3.1.m.0.cv1.bn.running_mean", "net.p3.1.m.0.cv1.bn.running_var", "net.p3.1.m.0.cv2.conv.weight", "net.p3.1.m.0.cv2.bn.weight", "net.p3.1.m.0.cv2.bn.bias", "net.p3.1.m.0.cv2.bn.running_mean", "net.p3.1.m.0.cv2.bn.running_var", "net.p3.1.m.1.cv1.conv.weight", "net.p3.1.m.1.cv1.bn.weight", "net.p3.1.m.1.cv1.bn.bias", "net.p3.1.m.1.cv1.bn.running_mean", "net.p3.1.m.1.cv1.bn.running_var", "net.p3.1.m.1.cv2.conv.weight", "net.p3.1.m.1.cv2.bn.weight", "net.p3.1.m.1.cv2.bn.bias", "net.p3.1.m.1.cv2.bn.running_mean", "net.p3.1.m.1.cv2.bn.running_var", "net.p4.0.conv.weight", "net.p4.0.bn.weight", "net.p4.0.bn.bias", "net.p4.0.bn.running_mean", "net.p4.0.bn.running_var", "net.p4.1.cv1.conv.weight", "net.p4.1.cv1.bn.weight", "net.p4.1.cv1.bn.bias", "net.p4.1.cv1.bn.running_mean", "net.p4.1.cv1.bn.running_var", "net.p4.1.cv2.conv.weight", "net.p4.1.cv2.bn.weight", "net.p4.1.cv2.bn.bias", "net.p4.1.cv2.bn.running_mean", "net.p4.1.cv2.bn.running_var", "net.p4.1.m.0.cv1.conv.weight", "net.p4.1.m.0.cv1.bn.weight", "net.p4.1.m.0.cv1.bn.bias", "net.p4.1.m.0.cv1.bn.running_mean", "net.p4.1.m.0.cv1.bn.running_var", "net.p4.1.m.0.cv2.conv.weight", "net.p4.1.m.0.cv2.bn.weight", "net.p4.1.m.0.cv2.bn.bias", "net.p4.1.m.0.cv2.bn.running_mean", "net.p4.1.m.0.cv2.bn.running_var", "net.p4.1.m.1.cv1.conv.weight", "net.p4.1.m.1.cv1.bn.weight", "net.p4.1.m.1.cv1.bn.bias", "net.p4.1.m.1.cv1.bn.running_mean", "net.p4.1.m.1.cv1.bn.running_var", "net.p4.1.m.1.cv2.conv.weight", "net.p4.1.m.1.cv2.bn.weight", "net.p4.1.m.1.cv2.bn.bias", "net.p4.1.m.1.cv2.bn.running_mean", "net.p4.1.m.1.cv2.bn.running_var", "net.p5.0.conv.weight", "net.p5.0.bn.weight", "net.p5.0.bn.bias", "net.p5.0.bn.running_mean", "net.p5.0.bn.running_var", "net.p5.1.cv1.conv.weight", "net.p5.1.cv1.bn.weight", "net.p5.1.cv1.bn.bias", "net.p5.1.cv1.bn.running_mean", "net.p5.1.cv1.bn.running_var", "net.p5.1.cv2.conv.weight", "net.p5.1.cv2.bn.weight", "net.p5.1.cv2.bn.bias", "net.p5.1.cv2.bn.running_mean", "net.p5.1.cv2.bn.running_var", "net.p5.1.m.0.cv1.conv.weight", "net.p5.1.m.0.cv1.bn.weight", "net.p5.1.m.0.cv1.bn.bias", "net.p5.1.m.0.cv1.bn.running_mean", "net.p5.1.m.0.cv1.bn.running_var", "net.p5.1.m.0.cv2.conv.weight", "net.p5.1.m.0.cv2.bn.weight", "net.p5.1.m.0.cv2.bn.bias", "net.p5.1.m.0.cv2.bn.running_mean", "net.p5.1.m.0.cv2.bn.running_var", "net.p5.2.cv1.conv.weight", "net.p5.2.cv1.bn.weight", "net.p5.2.cv1.bn.bias", "net.p5.2.cv1.bn.running_mean", "net.p5.2.cv1.bn.running_var", "net.p5.2.cv2.conv.weight", "net.p5.2.cv2.bn.weight", "net.p5.2.cv2.bn.bias", "net.p5.2.cv2.bn.running_mean", "net.p5.2.cv2.bn.running_var", "fpn.h1.cv1.conv.weight", "fpn.h1.cv1.bn.weight", "fpn.h1.cv1.bn.bias", "fpn.h1.cv1.bn.running_mean", "fpn.h1.cv1.bn.running_var", "fpn.h1.cv2.conv.weight", "fpn.h1.cv2.bn.weight", "fpn.h1.cv2.bn.bias", "fpn.h1.cv2.bn.running_mean", "fpn.h1.cv2.bn.running_var", "fpn.h1.m.0.cv1.conv.weight", "fpn.h1.m.0.cv1.bn.weight", "fpn.h1.m.0.cv1.bn.bias", "fpn.h1.m.0.cv1.bn.running_mean", "fpn.h1.m.0.cv1.bn.running_var", "fpn.h1.m.0.cv2.conv.weight", "fpn.h1.m.0.cv2.bn.weight", "fpn.h1.m.0.cv2.bn.bias", "fpn.h1.m.0.cv2.bn.running_mean", "fpn.h1.m.0.cv2.bn.running_var", "fpn.h2.cv1.conv.weight", "fpn.h2.cv1.bn.weight", "fpn.h2.cv1.bn.bias", "fpn.h2.cv1.bn.running_mean", "fpn.h2.cv1.bn.running_var", "fpn.h2.cv2.conv.weight", "fpn.h2.cv2.bn.weight", "fpn.h2.cv2.bn.bias", "fpn.h2.cv2.bn.running_mean", "fpn.h2.cv2.bn.running_var", "fpn.h2.m.0.cv1.conv.weight", "fpn.h2.m.0.cv1.bn.weight", "fpn.h2.m.0.cv1.bn.bias", "fpn.h2.m.0.cv1.bn.running_mean", "fpn.h2.m.0.cv1.bn.running_var", "fpn.h2.m.0.cv2.conv.weight", "fpn.h2.m.0.cv2.bn.weight", "fpn.h2.m.0.cv2.bn.bias", "fpn.h2.m.0.cv2.bn.running_mean", "fpn.h2.m.0.cv2.bn.running_var", "fpn.h3.conv.weight", "fpn.h3.bn.weight", "fpn.h3.bn.bias", "fpn.h3.bn.running_mean", "fpn.h3.bn.running_var", "fpn.h4.cv1.conv.weight", "fpn.h4.cv1.bn.weight", "fpn.h4.cv1.bn.bias", "fpn.h4.cv1.bn.running_mean", "fpn.h4.cv1.bn.running_var", "fpn.h4.cv2.conv.weight", "fpn.h4.cv2.bn.weight", "fpn.h4.cv2.bn.bias", "fpn.h4.cv2.bn.running_mean", "fpn.h4.cv2.bn.running_var", "fpn.h4.m.0.cv1.conv.weight", "fpn.h4.m.0.cv1.bn.weight", "fpn.h4.m.0.cv1.bn.bias", "fpn.h4.m.0.cv1.bn.running_mean", "fpn.h4.m.0.cv1.bn.running_var", "fpn.h4.m.0.cv2.conv.weight", "fpn.h4.m.0.cv2.bn.weight", "fpn.h4.m.0.cv2.bn.bias", "fpn.h4.m.0.cv2.bn.running_mean", "fpn.h4.m.0.cv2.bn.running_var", "fpn.h5.conv.weight", "fpn.h5.bn.weight", "fpn.h5.bn.bias", "fpn.h5.bn.running_mean", "fpn.h5.bn.running_var", "fpn.h6.cv1.conv.weight", "fpn.h6.cv1.bn.weight", "fpn.h6.cv1.bn.bias", "fpn.h6.cv1.bn.running_mean", "fpn.h6.cv1.bn.running_var", "fpn.h6.cv2.conv.weight", "fpn.h6.cv2.bn.weight", "fpn.h6.cv2.bn.bias", "fpn.h6.cv2.bn.running_mean", "fpn.h6.cv2.bn.running_var", "fpn.h6.m.0.cv1.conv.weight", "fpn.h6.m.0.cv1.bn.weight", "fpn.h6.m.0.cv1.bn.bias", "fpn.h6.m.0.cv1.bn.running_mean", "fpn.h6.m.0.cv1.bn.running_var", "fpn.h6.m.0.cv2.conv.weight", "fpn.h6.m.0.cv2.bn.weight", "fpn.h6.m.0.cv2.bn.bias", "fpn.h6.m.0.cv2.bn.running_mean", "fpn.h6.m.0.cv2.bn.running_var", "head.box.0.0.conv.weight", "head.box.0.0.bn.weight", "head.box.0.0.bn.bias", "head.box.0.0.bn.running_mean", "head.box.0.0.bn.running_var", "head.box.0.1.conv.weight", "head.box.0.1.bn.weight", "head.box.0.1.bn.bias", "head.box.0.1.bn.running_mean", "head.box.0.1.bn.running_var", "head.box.0.2.weight", "head.box.0.2.bias", "head.box.1.0.conv.weight", "head.box.1.0.bn.weight", "head.box.1.0.bn.bias", "head.box.1.0.bn.running_mean", "head.box.1.0.bn.running_var", "head.box.1.1.conv.weight", "head.box.1.1.bn.weight", "head.box.1.1.bn.bias", "head.box.1.1.bn.running_mean", "head.box.1.1.bn.running_var", "head.box.1.2.weight", "head.box.1.2.bias", "head.box.2.0.conv.weight", "head.box.2.0.bn.weight", "head.box.2.0.bn.bias", "head.box.2.0.bn.running_mean", "head.box.2.0.bn.running_var", "head.box.2.1.conv.weight", "head.box.2.1.bn.weight", "head.box.2.1.bn.bias", "head.box.2.1.bn.running_mean", "head.box.2.1.bn.running_var", "head.box.2.2.weight", "head.box.2.2.bias", "head.cls.0.0.conv.weight", "head.cls.0.0.bn.weight", "head.cls.0.0.bn.bias", "head.cls.0.0.bn.running_mean", "head.cls.0.0.bn.running_var", "head.cls.0.1.conv.weight", "head.cls.0.1.bn.weight", "head.cls.0.1.bn.bias", "head.cls.0.1.bn.running_mean", "head.cls.0.1.bn.running_var", "head.cls.0.2.weight", "head.cls.0.2.bias", "head.cls.1.0.conv.weight", "head.cls.1.0.bn.weight", "head.cls.1.0.bn.bias", "head.cls.1.0.bn.running_mean", "head.cls.1.0.bn.running_var", "head.cls.1.1.conv.weight", "head.cls.1.1.bn.weight", "head.cls.1.1.bn.bias", "head.cls.1.1.bn.running_mean", "head.cls.1.1.bn.running_var", "head.cls.1.2.weight", "head.cls.1.2.bias", "head.cls.2.0.conv.weight", "head.cls.2.0.bn.weight", "head.cls.2.0.bn.bias", "head.cls.2.0.bn.running_mean", "head.cls.2.0.bn.running_var", "head.cls.2.1.conv.weight", "head.cls.2.1.bn.weight", "head.cls.2.1.bn.bias", "head.cls.2.1.bn.running_mean", "head.cls.2.1.bn.running_var", "head.cls.2.2.weight", "head.cls.2.2.bias". 

In [None]:
from safetensors.torch import load

with open('/mnt/d/projects/objdet/src/objdet/YOLOV8/weights/yolov8n-coco.safetensors', 'rb') as f:
    state_dict = load(f.read())
    
model = yolo_v8_n()
model.load_state_dict(state_dict)

In [8]:
import torch
from ultralytics import YOLO
import cv2 
import numpy as np

m = YOLO('yolov8n.pt')

m_model = m.model
m_model.eval()

image_path = '/mnt/d/projects/objdet/src/objdet/samples/sample.png'
im = cv2.imread(image_path)
im0 = im.copy()
im = im.astype(np.float32)[:,:,::-1]
im = im/255
im = im.transpose((2,0,1))
im = torch.from_numpy(im).unsqueeze(0)

pred = m_model(im)

array = pred[0].reshape(-1,6)
boxes = array[:, :4]  # Bounding box coordinates
confidences = array[:, 4]  # Confidence scores
class_scores = array[:, 5]  # Class scores
# Assume 'boxes', 'confidences', and 'class_scores' are already defined
indices = cv2.dnn.NMSBoxes(boxes.tolist(), confidences.tolist(), score_threshold=0.01, nms_threshold=0.01)

final_boxes = boxes[indices].reshape(-1, 4)
final_confidences = confidences[indices]
final_class_scores = class_scores[indices]

In [None]:
img = cv2.imread('/mnt/d/projects/objdet/src/objdet/samples/sample.png')

# 각 박스를 이미지 위에 그립니다.
for box in final_boxes:
    cv2.rectangle(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 255, 0), 2)  # 초록색으로 박스 그리기

# 결과를 화면에 표시합니다.
cv2.imshow('Image with Boxes', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [27]:
pred[0].shape

torch.Size([1, 84, 9072])

In [30]:
for p in pred[1]:
    print(p.shape)
im.shape

torch.Size([1, 144, 72, 96])
torch.Size([1, 144, 36, 48])
torch.Size([1, 144, 18, 24])


torch.Size([1, 3, 576, 768])

In [None]:
import numpy as np
import cv2
from ultralytics import YOLO

image_path = '/mnt/d/projects/objdet/src/objdet/samples/sample.png'
im = cv2.imread(image_path)
im0 = im.copy()
im = im.astype(np.float32)[:,:,::-1]
im = im/255
im = im.transpose((2,0,1))
im = torch.from_numpy(im).unsqueeze(0)

model = YOLO('yolov8n.pt')

pred = model(im)

# model.eval()
result2 = model(im)
array = result2[0].reshape(-1,6)
boxes = array[:, :4]  # Bounding box coordinates
confidences = array[:, 4]  # Confidence scores
class_scores = array[:, 5]  # Class scores
# Assume 'boxes', 'confidences', and 'class_scores' are already defined
indices = cv2.dnn.NMSBoxes(boxes.tolist(), confidences.tolist(), score_threshold=0.5, nms_threshold=0.4)

final_boxes = boxes[indices].reshape(-1, 4)
final_confidences = confidences[indices]
final_class_scores = class_scores[indices]

In [None]:
import cv2
import numpy as np

# 이미지를 로드합니다.
img = cv2.imread('/mnt/d/projects/objdet/src/objdet/samples/sample.png')

# 각 박스를 이미지 위에 그립니다.
for box in final_boxes:
    cv2.rectangle(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 255, 0), 2)  # 초록색으로 박스 그리기

# 결과를 화면에 표시합니다.
cv2.imshow('Image with Boxes', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [84]:
img4.shape

torch.Size([1, 3, 576, 768])

In [None]:
from ultralytics import YOLO

m = YOLO('yolov8n.pt')

# m.eval()
result = m('/mnt/d/projects/objdet/src/objdet/samples/sample.png')
array = result[0].reshape(-1,6)
boxes = array[:, :4]  # Bounding box coordinates
confidences = array[:, 4]  # Confidence scores
class_scores = array[:, 5]  # Class scores

# Assume 'boxes', 'confidences', and 'class_scores' are already defined
indices = cv2.dnn.NMSBoxes(boxes.tolist(), confidences.tolist(), score_threshold=0.5, nms_threshold=0.4)

final_boxes = boxes[indices].reshape(-1, 4)
final_confidences = confidences[indices]
final_class_scores = class_scores[indices]

In [None]:
import cv2
import numpy as np

# 이미지를 로드합니다.
img = cv2.imread('/mnt/d/projects/objdet/src/objdet/samples/sample.png')

# 각 박스를 이미지 위에 그립니다.
for box in final_boxes:
    cv2.rectangle(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 255, 0), 2)  # 초록색으로 박스 그리기

# 결과를 화면에 표시합니다.
cv2.imshow('Image with Boxes', img)
cv2.waitKey(0)
cv2.destroyAllWindows()