In [1]:
import torch
import torchvision
import numpy as np
import yolos

In [344]:
class YOLOMODEL(torch.nn.Module):
    def __init__(self, S: int = 7, B: int = 2, C: int = 20, *args, **kwargs) -> None:
        super(YOLOMODEL, self).__init__(*args, **kwargs)

        self.S, self.B, self.C = S, B, C
        self.N = B * 5 + C

class YoloLossModel(YOLOMODEL):
    def __init__(self, lambdaobj: float = 5., lambdanoobj: float = .5, *args, **kwargs):
        super(YoloLossModel, self).__init__(*args, **kwargs)

        self.lambdaobj = lambdaobj
        self.lambdanoobj = lambdanoobj

        self.CI = [4, 9]  # Confidence Index
        self.BI = [[0, 1, 2, 3], [5, 6, 7, 8]]  # BBoxIndex
        self.LI = [self.B * 5 + idx for idx in range(self.C)]  # Label Index
        self.XYI = [0, 1]  # XY or XYMin
        self.WHI = [2, 3]  # WH or XYMax

    def forward(self, P: torch.Tensor, T: torch.Tensor):
        B, C, N = self.B, self.C, self.N
        CI = self.CI
        BI = self.BI
        LI = self.LI
        XYI = self.XYI
        WHI = self.WHI

        Batch = P.size(0)

        coordMask = (T[..., 4] == 1).unsqueeze(-1).expand_as(T)
        noobjMask = (T[..., 4] == 0).unsqueeze(-1).expand_as(T)

        coordP = P[coordMask].reshape(-1, N)  # [coord_n, N]
        noobjP = P[noobjMask].reshape(-1, N)  # [coord_n, N]

        coordT = T[coordMask].reshape(-1, N)  # [coord_n, N]
        noobjT = T[noobjMask].reshape(-1, N)  # [coord_n, N]

        # Class Label
        ClassP = coordP[..., LI].reshape(-1, C)  # [coord_n, C]
        ClassT = coordT[..., LI].reshape(-1, C)  # [coord_n, C]
        # No Object Confidence
        NoObjP = noobjP[..., CI].reshape(-1, B)  # [nooobj_n, B]
        NoObjT = noobjT[..., CI].reshape(-1, B)  # [nooobj_n, B]
        # Object Confidence
        ConfP = coordP[..., CI].reshape(-1, B);  # [coord_n, B]
        # BBox
        BBoxP = coordP[..., BI].reshape(-1, B, 4)  # [coord_n, B, 4(XYXY)]
        BBoxT = coordT[..., BI].reshape(-1, B, 4)  # [coord_n, B, 4(XYXY)]

        with torch.no_grad():
            iou, iouIndex = self.IoUCul(BBoxP.reshape(-1, 4), BBoxT.reshape(-1, 4))

        Range = torch.arange(iouIndex.size(0)).long()
        BBoxP = BBoxP[Range, iouIndex].reshape(-1, 4)
        BBoxT = BBoxT[Range, iouIndex].reshape(-1, 4)
        ConfP = ConfP[Range, iouIndex]

        lossXY = torch.nn.functional.mse_loss(BBoxP[..., XYI], BBoxT[..., XYI], reduction="sum")
        lossWH = torch.nn.functional.mse_loss(torch.sqrt(BBoxP[..., WHI]), torch.sqrt(BBoxT[..., WHI]), reduction="sum")
        lossObj = torch.nn.functional.mse_loss(ConfP, iou, reduction="sum")
        lossNObj = torch.nn.functional.mse_loss(NoObjP, NoObjT, reduction="sum")
        lossClass = torch.nn.functional.mse_loss(ClassP, ClassT, reduction="sum")
        loss = (self.lambdaobj * (lossXY + lossWH) + self.lambdanoobj * (lossNObj) + (lossObj + lossClass)) / Batch
        return loss

    def IoUCul(self, P, T):
        """
        P (input): [Batch, coord_n, xywh]
        T (input): [Batch, coord_n, xywh]
        """

        XYI = self.XYI
        WHI = self.WHI

        S = 7
        P = P.clone()
        T = T.clone()

        PXYMIN = P[..., XYI] / float(S) - 0.5 * P[..., WHI]
        PXYMAX = P[..., XYI] / float(S) + 0.5 * P[..., WHI]

        TXYMIN = T[..., XYI] / float(S) - 0.5 * T[..., WHI]
        TXYMAX = T[..., XYI] / float(S) + 0.5 * T[..., WHI]

        lt = torch.max(PXYMIN, TXYMIN)
        rb = torch.min(PXYMAX, TXYMAX)

        wh = torch.clamp(rb - lt, min=0.)
        intersect = (wh[..., 0] * wh[..., 1])

        Area1 = (PXYMAX - PXYMIN)
        Area1 = Area1[..., 0] * Area1[..., 1]
        Area2 = (TXYMAX - TXYMIN)
        Area2 = Area2[..., 0] * Area2[..., 1]
        Union = Area1 + Area2 - intersect

        iou = intersect / Union
        return torch.max(iou.reshape(-1, 2), dim=1)

class YoloV1(YOLOMODEL):
    def __init__(self, *args, **kwargs) -> None:
        super(YoloV1, self).__init__(*args, **kwargs)

        self.vgg = vgg = torchvision.models.vgg16(pretrained=True)
        vgg.features.requires_grad_()
        vgg.avgpool.requires_grad_()

        vgg.classifier = torch.nn.Sequential(
            torch.nn.Flatten(),
            torch.nn.Linear(512 * 7 * 7, 512),
            torch.nn.ReLU(inplace=True),
            torch.nn.Linear(512, self.S * self.S * self.N),
            torch.nn.Sigmoid()
        )

    def forward(self, inp):
        return self.vgg(inp).reshape(-1, self.S, self.S, self.N)

    def Testing(self):
        with torch.no_grad():
            return self.forward(torch.rand(1, 3, 224, 224))

def TorchIsistance(Tensor: any) -> torch.Tensor:

    T = lambda: torch.Tensor(Tensor)
    if isinstance(Tensor, list): return T()
    elif isinstance(Tensor, tuple): return T()
    elif isinstance(Tensor, np.ndarray): return T()
    return Tensor

def EncoderBBox(BBox: torch.Tensor, Width: int, Height: int, S: int = 7) -> torch.Tensor:

    """
        BBox: [[Xmin, Ymin, Xmax, Ymax, Label],...]
        Width:
        Height:
        S:
        return [[XIndex, YIndex, CenterX, CenterY, Width, Height],...]
    """

    BBox = TorchIsistance(BBox)
    if BBox.dim() == 1: BBox = BBox.reshape(1, -1)

    S = float(S)
    Label = BBox[..., -1].unsqueeze(-1)
    WH = torch.Tensor([Width, Height]).unsqueeze(0)

    XYXY = BBox[..., :4] / torch.cat((WH, WH), dim=1)
    XYC = (XYXY[..., [2, 3]] + XYXY[..., [0, 1]]) / 2.
    WH = (XYXY[..., [2, 3]] - XYXY[..., [0, 1]])

    XYI = (XYC * S).ceil() - 1.
    XYN = (XYC - (XYI / S)) * S

    return torch.cat((XYI, XYN, WH, Label), dim=1)

def MakeTargetBBox(BBox: torch.Tensor, S: int, B: int, C: int) -> torch.Tensor:

    """
        BBox: [[XIndex, YIndex, CenterX, CenterY, Width, Height, Label],...]
        S:
        B:
        C
        return Tensor(7, 7, B * 5 + C)
    """

    BBox = TorchIsistance(BBox)
    if (BBox.dim() == 1): BBox = BBox.reshape(1, -1)

    N = B * 5 + C
    Label = BBox[..., -1].unsqueeze(-1).long()
    Target = torch.zeros(S, S, N)

    X = BBox[..., 0].unsqueeze(-1).long()
    Y = BBox[..., 1].unsqueeze(-1).long()

    XYWH = BBox[..., [2, 3, 4, 5]]
    Target[Y, X, [0, 1, 2, 3, 5, 6, 7, 8]] = torch.cat((XYWH, XYWH), dim=1)
    Target[Y, X, [4, 9]] = torch.Tensor([1., 1.])
    Target[Y, X, B * 5 + Label] = torch.Tensor([1.])

    return Target

def makeBBoxes(num: int=1):
    BBoxes = []
    while len(BBoxes) < num:
        x1 = torch.randint(0, 350, size=(1,))
        y1 = torch.randint(0, 350, size=(1,))
        x2 = torch.randint(50, 400, size=(1,))
        y2 = torch.randint(50, 400, size=(1,))
        label = torch.randint(0, 3, size=(1,))
        if x1 < x2 and y1 < y2:
            BBoxes += [[x1, y1, x2, y2, label]]
    return torch.Tensor(BBoxes)

In [93]:
torch.manual_seed(123)
BBoxes = makeBBoxes()
enc = EncoderBBox(BBoxes, 400, 400)
Target = MakeTargetBBox(enc, 7, 2, 3).unsqueeze(0)

In [94]:
Boxes = yolos.YoloBoxes.YoloBoxes(400, 400, C=3)
for box in BBoxes:
    x1, y1, x2, y2, id = list(map(lambda x: int(x.item()), box))
    Boxes += yolos.YoloBoxes.YoloBox("UnKnow", id, x1, y1, x2, y2)

In [95]:
Boxes

Width( 400 ), Height( 400 )
  ( 0 )( objname: UnKnow | objid:     1 ), ( xmin:  192 | ymin:   86 ), ( xmax:  207 | ymax:  349 )
  ( 1 )( objname: UnKnow | objid:     1 ), ( xmin:   61 | ymin:   21 ), ( xmax:  116 | ymax:  271 )
  ( 2 )( objname: UnKnow | objid:     1 ), ( xmin:  150 | ymin:   66 ), ( xmax:  155 | ymax:  147 )

In [96]:
Target = Boxes()
print(Target.shape)

torch.Size([7, 7, 13])


In [98]:
lossmodel = YoloLossModel(C=3)
# lossmodel = yolos.Models.YoloLossModel(C=3)

In [99]:
Data = torch.zeros(*(1, 3, 224, 224))
Input = torch.FloatTensor(Data)
Target = torch.FloatTensor(Target)

In [100]:
net = YoloV1(C=3)
optim = torch.optim.Adam(net.parameters(), lr=0.0001, betas=(0.9, 0.999))



In [101]:
loss = 0.
while not ((loss > 0.) and (loss < 0.001)):
    optim.zero_grad()
    pred = net(Data)
    loss = lossmodel(pred, Target.unsqueeze(0))
    loss.backward()
    optim.step()
    print(f"loss: {loss.item(): 011.05f}", end="\r")
pred = pred.squeeze()

loss:  0000.00060

In [102]:
yolos.YoloBoxes.YoloRoot(C=3)
detect = yolos.YoloBoxes.Detect(400, 400)
P, T = detect(pred.detach(), Target.detach())

In [103]:
P.ToInt()

Width( 400 ), Height( 400 )
  ( 0 )( objname: None | objid:     1 ), ( xmin: 0061.000 | ymin: 0022.000 ), ( xmax: 0116.000 | ymax: 0269.000 ), ( Confidence: 0.977 )
  ( 1 )( objname: None | objid:     1 ), ( xmin: 0150.000 | ymin: 0065.000 ), ( xmax: 0155.000 | ymax: 0147.000 ), ( Confidence: 0.968 )
  ( 2 )( objname: None | objid:     1 ), ( xmin: 0191.000 | ymin: 0085.000 ), ( xmax: 0206.000 | ymax: 0349.000 ), ( Confidence: 0.961 )
  ( 3 )( objname: None | objid:     1 ), ( xmin: 0063.000 | ymin: -069.000 ), ( xmax: 0064.000 | ymax: 0330.000 ), ( Confidence: 0.825 )

In [104]:
T.ToInt()

Width( 400 ), Height( 400 )
  ( 0 )( objname: None | objid:     1 ), ( xmin: 0192.000 | ymin: 0085.000 ), ( xmax: 0207.000 | ymax: 0349.000 ), ( Confidence: 1.000 )
  ( 1 )( objname: None | objid:     1 ), ( xmin: 0150.000 | ymin: 0066.000 ), ( xmax: 0155.000 | ymax: 0146.000 ), ( Confidence: 1.000 )
  ( 2 )( objname: None | objid:     1 ), ( xmin: 0060.000 | ymin: 0021.000 ), ( xmax: 0116.000 | ymax: 0271.000 ), ( Confidence: 1.000 )

In [127]:
TBox = [box()[2:] for box in Boxes.Decoder() for i in range(2)]

In [128]:
TBox = torch.Tensor(TBox)

In [129]:
IoUCul(TBox, TBox)

torch.return_types.max(
values=tensor([1., 1., 1.]),
indices=tensor([0, 0, 0]))

In [513]:
torch.manual_seed(123)

In [524]:
BBoxes1 = makeBBoxes(3)
Boxes1 = yolos.YoloBoxes.YoloBoxes(400, 400, C=1)
for box in BBoxes1:
    x1, y1, x2, y2, id = list(map(lambda x: int(x.item()), box))
    Boxes1 += yolos.YoloBoxes.YoloBox("UnKnow", id, x1, y1, x2, y2)
Boxes1 = torch.Tensor([box()[2:] for box in Boxes1.Decoder() for i in range(2)])

In [525]:
BBoxes2 = makeBBoxes(3)
Boxes2 = yolos.YoloBoxes.YoloBoxes(400, 400, C=3)
for box in BBoxes2:
    x1, y1, x2, y2, id = list(map(lambda x: int(x.item()), box))
    Boxes2 += yolos.YoloBoxes.YoloBox("UnKnow", id, x1, y1, x2, y2)
Boxes2 = torch.Tensor([box()[2:] for box in Boxes2.Decoder() for i in range(2)])