In [70]:
import torch
import torch.nn as nn
from utils import intersection_over_union 

class YoloLoss(nn.Module):
    """Calculate loss of yolov1

    """
    def __init__(self , S=7 , B=2 , C=20 ):
        """
        Args:
            S (int, optional): Grid Size. Defaults to 7.
            B (int, optional): number of bounding boxes. Defaults to 2.
            C (int, optional): number of classes. Defaults to 20.
        """
        super().__init__()
        self.S = S
        self.B = B
        self.C = C
        
        self.mse = torch.nn.MSELoss(reduction ="sum")
        self.lambda_coord = 5
        self.lambda_noobj = 0.5
        
    def forward(self , predictions , target):
        # predictions의 shape = > (BATCH_SIZE , S*S*(C+B*5)) 이므로
        # (BATCH_SIZE , S, S, (C+B*5))로 shape 변경
        predictions = predictions.reshape(-1 , self.S , self.S , self.C+self.B*5)
        # predictions[0:20] => 20개 class
        # predictions[20:21] => 첫번째 bouning box의 confidence score
        # predictions[21:25] => 첫번째 bouning box 좌표 (x , y , w, h)
        # predictions[25:26] => 두번째 bouning box의 confidence score
        # predictions[26:30] => 두번째 bouning box 좌표 ( x,y,w,h)
        
        # B=2인 경우 각 셀 마다 2개의 bounding box가 존재하므로 predictions과 target간의 iou를 계산
        iou_b1 = intersection_over_union(predictions[...,21:25], target[...,21:25]) #(N , S ,S , 1)
        iou_b2 = intersection_over_union(predictions[...,26:30], target[...,26:30]) #(N , S ,S , 1)
        ious = torch.cat([iou_b1.unsquueze(0) , iou_b2.unsquueze(0)], dim=0) #(2,N , S ,S , 1)
        
        # b1과 b2 중 IOU중 큰 값을 가져오고 , target값의 객체 존재 여부(confidence score 가져옴)
        _ , bestbox = torch.max(ious , dim=0)  
        # bestbox = > (N , S ,S , 1))
        exists_box = target[...:20:21] # (N,S,S,1)
        
        #======================#
        #  FOR BOX COORDIATES  #
        #======================#
        box_predictions = exists_box*(   # => (N,S,S,1)
            # 첫번째 bounding box가 best인 경우
            bestbox*predictions[...,26:30] # => (N,S,S,4)
            # 두번째 bounding box가 best인 경우
            + (1-bestbox)*predictions[...,21:25] # => (N,S,S,4)
        )
        box_target = exists_box*target[...,21:25] # => (N,S,S,4)       
        
        # weight , height는 sqrt 취해주기
        # 이때 sqrt 안에 음수가 되는 것을 방지하고자 절대값 취하고, 만약에 0이되면 미분시 무한대 발산 방지, 
        # 또한 abs를 취하면 항상 양수여서 기울기의 방향 정보가 사라지므로 기존의 부호를 정보를 살려주어야 학습진행(torch.sign)
        box_predictions[...,2:4] = torch.sign(box_predictions[2:4])*torch.sqrt(torch.abs(box_predictions[2:4]) + 1e-6)
        box_target[...,2:4] = torch.sqrt(box_target[...,2:4])
        
        # coorediate loss 구하기 (N,S,S,4) => (N*S*S,4) 로 변경 후 prediction과 target간의 mse 계산
        box_loss = self.mse(
            torch.flatten(box_predictions , end_dim=-2) , torch.flatten(box_target , end_dim = -2)
        )
        
        #=====================#
        #   FOR OBJECT LOSS   #
        #=====================#
        # confidenc socre
        # (N,S,S,1) => (N*S*S,1)
        pred_confidence = exists_box*(bestbox*(predictions[...,25:26]) + (1-bestbox)*(predictions[...,20:21]))
        target_confidence = exists_box*target[...,20:21]
        
        object_loss = self.mse(
            torch.flatten(pred_confidence , end_dim=-2) , torch.flatten(target_confidence, end_dim=-2)
        )
        #========================#
        #   FOR NO OBJECT LOSS   #
        #========================#
        # (N , S , S , 1) =>(N , S*S*1)
        pred_noobject = (1-exists_box)*(predictions[...,25:26]) + (1-exists_box)*(predictions[...,20:21])
        target_noobject = (1-exists_box)*target[...,20:21] + (1-exists_box)*target[...,20:21] 
        no_object_loss = self.mse(
            torch.flatten(pred_noobject , start_dim=1 ) , torch.flatten(target_noobject , start_dim=1)
        )
        
        
        #====================#
        #   FOR CLASS LOSS   #
        #====================#
        
        class_loss = self.mse(
            torch.flatten(exists_box*predictions[...,:20] , end_dim=-2),
            torch.flatten(exists_box*target[...,:20] , end_dim=-2)
        )
        
        loss = (
            self.lambda_coord*box_loss
            + object_loss
            + self.lambda_noobj*no_object_loss
            + class_loss
        )
        return loss

SyntaxError: invalid syntax (2936961874.py, line 85)

In [72]:
%%writefile loss.py
import torch
import torch.nn as nn
from utils import intersection_over_union 

class YoloLoss(nn.Module):
    """Calculate loss of yolov1

    """
    def __init__(self , S=7 , B=2 , C=20 ):
        """
        Args:
            S (int, optional): Grid Size. Defaults to 7.
            B (int, optional): number of bounding boxes. Defaults to 2.
            C (int, optional): number of classes. Defaults to 20.
        """
        super().__init__()
        self.S = S
        self.B = B
        self.C = C
        
        self.mse = torch.nn.MSELoss(reduction ="sum")
        self.lambda_coord = 5
        self.lambda_noobj = 0.5
        
    def forward(self , predictions , target):
        # predictions의 shape = > (BATCH_SIZE , S*S*(C+B*5)) 이므로
        # (BATCH_SIZE , S, S, (C+B*5))로 shape 변경
        predictions = predictions.reshape(-1 , self.S , self.S , self.C+self.B*5)
        # predictions[0:20] => 20개 class
        # predictions[20:21] => 첫번째 bouning box의 confidence score
        # predictions[21:25] => 첫번째 bouning box 좌표 (x , y , w, h)
        # predictions[25:26] => 두번째 bouning box의 confidence score
        # predictions[26:30] => 두번째 bouning box 좌표 ( x,y,w,h)
        
        # B=2인 경우 각 셀 마다 2개의 bounding box가 존재하므로 predictions과 target간의 iou를 계산
        iou_b1 = intersection_over_union(predictions[...,21:25], target[...,21:25]) #(N , S ,S , 1)
        iou_b2 = intersection_over_union(predictions[...,26:30], target[...,26:30]) #(N , S ,S , 1)
        ious = torch.cat([iou_b1.unsquueze(0) , iou_b2.unsquueze(0)], dim=0) #(2,N , S ,S , 1)
        
        # b1과 b2 중 IOU중 큰 값을 가져오고 , target값의 객체 존재 여부(confidence score 가져옴)
        _ , bestbox = torch.max(ious , dim=0)  
        # bestbox = > (N , S ,S , 1))
        exists_box = target[...:20:21] # (N,S,S,1)
        
        #======================#
        #  FOR BOX COORDIATES  #
        #======================#
        box_predictions = exists_box*(   # => (N,S,S,1)
            # 첫번째 bounding box가 best인 경우
            bestbox*predictions[...,26:30] # => (N,S,S,4)
            # 두번째 bounding box가 best인 경우
            + (1-bestbox)*predictions[...,21:25] # => (N,S,S,4)
        )
        box_target = exists_box*target[...,21:25] # => (N,S,S,4)       
        
        # weight , height는 sqrt 취해주기
        # 이때 sqrt 안에 음수가 되는 것을 방지하고자 절대값 취하고, 만약에 0이되면 미분시 무한대 발산 방지, 
        # 또한 abs를 취하면 항상 양수여서 기울기의 방향 정보가 사라지므로 기존의 부호를 정보를 살려주어야 학습진행(torch.sign)
        box_predictions[...,2:4] = torch.sign(box_predictions[2:4])*torch.sqrt(torch.abs(box_predictions[2:4]) + 1e-6)
        box_target[...,2:4] = torch.sqrt(box_target[...,2:4])
        
        # coorediate loss 구하기 (N,S,S,4) => (N*S*S,4) 로 변경 후 prediction과 target간의 mse 계산
        box_loss = self.mse(
            torch.flatten(box_predictions , end_dim=-2) , torch.flatten(box_target , end_dim = -2)
        )
        
        #=====================#
        #   FOR OBJECT LOSS   #
        #=====================#
        # confidenc socre
        # (N,S,S,1) => (N*S*S,1)
        pred_confidence = exists_box*(bestbox*(predictions[...,25:26]) + (1-bestbox)*(predictions[...,20:21]))
        target_confidence = exists_box*target[...,20:21]
        
        object_loss = self.mse(
            torch.flatten(pred_confidence , end_dim=-2) , torch.flatten(target_confidence, end_dim=-2)
        )
        #========================#
        #   FOR NO OBJECT LOSS   #
        #========================#
        # (N , S , S , 1) =>(N , S*S*1)
        pred_noobject = (1-exists_box)*(predictions[...,25:26]) + (1-exists_box)*(predictions[...,20:21])
        target_noobject = (1-exists_box)*target[...,20:21] + (1-exists_box)*target[...,20:21] 
        no_object_loss = self.mse(
            torch.flatten(pred_noobject , start_dim=1 ) , torch.flatten(target_noobject , start_dim=1)
        )
        
        
        #====================#
        #   FOR CLASS LOSS   #
        #====================#
        
        class_loss = self.mse(
            torch.flatten(exists_box*predictions[...,:20] , end_dim=-2),
            torch.flatten(exists_box*target[...,:20] , end_dim=-2)
        )
        
        loss = (
            self.lambda_coord*box_loss
            + object_loss
            + self.lambda_noobj*no_object_loss
            + class_loss
        )
        return loss

Overwriting loss.py


In [22]:
%%writefile utils.py
import torch
def intersection_over_union(boxes_preds , boxes_labels , box_format = "midpoint"):
    """Calculate IOU(intersection_over_union)

    Args:
        boxes_preds (tensor): Prediction of Bounding boxes (Batch_size , 4)
        boxes_labels (tensor): Correct labels of Bouning boxes(Batch_size , 4)
        box_format (str, optional): bouning box format midpoint or corners Defaults to "midpoint".
        
    Returns :
        tensor : (... , 1) => 2 입력 Tensor bounding box의 IOU 반환
    """
    if box_format =="midpoint": # [center x , center y , width , height]
        # convert midpoint format => corners format [min x , min y , max x , max y ]
        box1_x1 = boxes_preds[...,0:1] - boxes_preds[...,2:3]/2
        box1_y1 = boxes_preds[...,1:2] - boxes_preds[...,3:4]/2
        box1_x2 = boxes_preds[...,0:1] + boxes_preds[...,2:3]/2
        box1_y2 = boxes_preds[...,1:2] + boxes_preds[...,3:4]/2
        
        box2_x1 = boxes_labels[...,0:1] - boxes_labels[...,2:3]/2
        box2_y1 = boxes_labels[...,1:2] - boxes_labels[...,3:4]/2
        box2_x2 = boxes_labels[...,0:1] + boxes_labels[...,2:3]/2
        box2_y2 = boxes_labels[...,1:2] + boxes_labels[...,3:4]/2
        
    if box_format =="corners": # [min x , min y , max x , max y ]
        box1_x1 = boxes_preds[...,0:1]
        box1_y1 = boxes_preds[...,1:2]
        box1_x2 = boxes_preds[...,2:3]
        box1_y2 = boxes_preds[...,3:4]
        
        box2_x1 = boxes_labels[...,0:1]
        box2_y1 = boxes_labels[...,1:2]
        box2_x2 = boxes_labels[...,2:3]
        box2_y2 = boxes_labels[...,3:4]
    
    # Calculate IOU
    # box1과 box2의 공통된 영역의 x1 y1 x2 y2 계산
    x1 = torch.max(box1_x1 , box2_x1) # => (... , 1)
    y1 = torch.max(box1_y1 , box2_y1) # => (... , 1)
    x2 = torch.min(box1_x2 , box2_x2) # => (... , 1)
    y2 = torch.min(box1_y2 , box2_y2) # => (... , 1)
    # torch.clamp => 두 bounding box의 교집합이 없는 경우 (x2-x1) < 0 이고, (y2-y1) < 0 가 되는데
    # clamp를 사용하여 0보다 작으면 0으로 변경하는 연산
    intersection = (x2-x1).clamp(0)*(y2-y1).clamp(0)
    
    box1_area = abs((box1_x2 - box1_x1)*(box1_y2-box1_y1))
    box2_area = abs((box2_x2 - box2_x1)*(box2_y2-box2_y1))
    
    return intersection / (box1_area+box2_area - intersection + 1e-6)

    
    
        
        

Overwriting utils.py


In [69]:
torch.randn(10,20,1) *torch.randn(10,20,4)

tensor([[[-2.5132e-01,  4.6952e-01,  1.5760e+00,  1.8567e+00],
         [ 5.0284e-01, -1.8092e+00,  5.7576e-01, -1.3015e+00],
         [ 1.2145e-01,  4.2196e-02, -9.9300e-02, -1.5776e-02],
         [ 2.8582e+00,  1.8284e+00, -1.9579e+00, -2.2795e-01],
         [-7.4400e-01,  4.5556e-01,  3.9584e-01, -4.9630e-01],
         [-3.1231e-01,  1.0872e+00,  6.6775e-01, -8.0256e-01],
         [-8.5357e-01,  3.7244e-01, -4.1710e-02,  8.0324e-01],
         [ 1.6985e+00, -3.4034e-01,  1.3576e-02, -1.2326e+00],
         [ 1.6847e-01,  6.0962e-02,  6.6231e-02, -3.1844e-01],
         [ 4.1069e-02, -6.7639e-02, -2.2936e-02,  5.0985e-02],
         [-1.8575e+00, -6.8541e-01,  2.0984e-01,  8.1979e-01],
         [ 9.9836e-02,  2.8077e-04,  1.7064e-01, -1.0605e-01],
         [ 1.2838e-01, -9.9127e-01,  4.8531e-01, -7.5959e-01],
         [-5.8952e-01, -1.5339e-01,  9.9495e-03, -2.4602e-01],
         [ 3.6586e-01, -7.6267e-01, -2.2626e-02, -1.8047e+00],
         [-5.1042e-01, -1.4695e-01,  2.5315e-01, -2.120

In [65]:
%%writefile dataset.py
import torch
import torchvision.transforms as transforms 
from PIL import Image
import pandas as pd

class VOCDataset(torch.utils.data.Dataset):
    def __init__(self , csv_file , S=7, B=2, C=20 , transforms=None):
        self.df = pd.read_csv(csv_file)
        self.S = S
        self.B = B
        self.C = C
        self.transforms = transforms    
    def __len__(self):
        return len(self.df)
        
    def __getitem__(self, x):
        image_path = self.df.iloc[x,0]
        label_path = self.df.iloc[x,1]
        boxes = []
        with open(label_path , "r") as f:
            for object in f.readlines():
                class_label , x, y , w, h = [int(i) if float(i)==int(float(i)) else float(i)  for i in object.strip().split(" ")]
                boxes.append((class_label, x , y , w, h))
        img = Image.open(image_path)
        boxes = torch.tensor(boxes)
        
        if self.transforms:
            img , boxes = self.transforms(img , boxes)
        label_matrix = torch.zeros(self.S , self.S , self.C+self.B*5)
        # 앞에서 구한 x,y,w,h가 셀을 기준으로 어디에 포함되는 구하고, 셀을 기준으로 x,y,w,h 계산
        for box in boxes:
            class_label , x , y , w , h = box.tolist()
            class_label = int(class_label)
            # i,j => SxS에서의 객체 중심점이 위치한 행, 열 정보
            i,j = int(self.S*y) , int(self.S*x)
            # 셀 기준 중심점 위치
            x_cell , y_cell = self.S*x - j , self.S*y - i
            # 셀 기준 bounding box width , hegith
            width_cell , height_cell = self.S*w , self.S*h
            # label_matrix = [7,7,30] => 0~19는 class확률 , 20 : confidence ,21~24 bounding box, 25 : confidence , 26~29 : boundingbox 
            if label_matrix[i,j,20] == 0:
                label_matrix[i,j,20]=1
                box_coordinates = torch.tensor([x_cell , y_cell , width_cell , height_cell])
                label_matrix[i,j,21:25] = box_coordinates
                label_matrix[i,j,class_label] = 1            
        return img , label_matrix
    
        
        
        

Overwriting dataset.py


In [66]:
from dataset import VOCDataset
train_dataset = VOCDataset("./data/train.csv")
test_dataset = VOCDataset("./data/test.csv")
img , label = train_dataset[0]
img2 , label2 = test_dataset[0]

len(train_dataset) + len(test_dataset)

5011

In [52]:
df = pd.read_csv("./data/test.csv" )
txt = df.iloc[1,1]
img = df.iloc[1,0]
boxes = []
with open(txt , "r") as f:
    for label in f.readlines():
        class_label , x, y, w,h = [ int(i) if float(i) == int(float(i)) else float(i) for i in label.strip().split(" ") ]
        boxes.append((class_label , x , y , w, h))
img = Image.open(img)
boxes = torch.tensor(boxes)

label_matrix = torch.zeros(7,7,30)
label_matrix.shape
for box in boxes:
    class_label , x , y , w, h = box.tolist()
    class_label = int(class_label)
    # 중심점이 7x7에서 어느 행,열에 속하는지(i,j)
    i, j = int(7*y) , int(7*x)
    # cell 기준으로 하여 중심점 위치 
    x_cell , y_cell = 7*x - j  , 7*y - i
    # 7x7 기준 이미지 width ,height 
    width_cell , height_cell = w*7 , h*7
    # label_matrix = [7,7,30] => 0~19는 class확률 , 20 : confidence ,21~24 bounding box, 25 : confidence , 26~29 : boundingbox 
    if label_matrix[i,j,20] == 0:
        label_matrix[i,j,20] = 1
        box_coordinates = torch.tensor([x_cell , y_cell , width_cell , height_cell])
        label_matrix[i,j,21:25] = box_coordinates
        label_matrix[i,j,25] = 1
    
        
    

        

AttributeError: 'Tensor' object has no attribute 'to_list'

In [31]:
[i if i > 5 else 0 for i in range(10) ]

[0, 0, 0, 0, 0, 0, 6, 7, 8, 9]

In [43]:
float(3) == int(3)

True

In [1]:
## NMS 계산
import torch
from collections import Counter
from utils import intersection_over_union

def mean_average_precision(
    pred_boxes:list,
    true_boxes,
    iou_threshold=0.5,
    box_format="corners",
    num_classes = 20
):
    # pred_boxes = [[train_idx , class_pred , prob_score , x1,y1,x2,y2] , [] ,...]
    average_precisions = []
    epsilon = 1e-6 
    
    # 클래스 갯수 만큼 AP 계산
    for c in range(num_classes):
        detections = []
        ground_truths = []
        
        # 현재 클래스 번호로 예측한 것과 정답을 가져옴
        for detection in pred_boxes:
            if detection[1] == c:
                detections.append(detection)
        for true_box in true_boxes:
            if true_box[1] == c:
                ground_truths.append(detection)

        # 각 이미지 번호에 대하여 c클래스인 object가 몇개 있는지 확인
        # img0 has 3 c class
        # img1 has 5 c class
        # {0 :3 , 1:5}
        amount_bboxes = Counter(gt[0] for gt in ground_truths)
        
        # {0:torch.tensor([0,0,0]) , 1:torch.tensor([0,0,0,0,0])}
        for key ,val in amount_bboxes.items():
            amount_bboxes[key] = torch.zeros(val)
        
        # 예측한 bboxes의 confidence socre로 내림차순 정렬
        detections.sort(key = lambda x : x[2] , reverse=True)
        TP = torch.zeros(len(detections))
        
        

        
        

In [36]:
## train 
import torch
import torchvision
import torch.optim as optim
import torchvision.transforms as transforms
from tqdm import tqdm
from torch.utils.data import DataLoader
from model import Yolov1
from dataset import VOCDataset
from loss import YoloLoss
from utils import intersection_over_union 

In [21]:
import torch
import torchvision.transforms as transforms
import torch.optim as optim
import torchvision.transforms.functional as FT
from tqdm import tqdm
from torch.utils.data import DataLoader
from model import Yolov1
from dataset import VOCDataset
from utils import (
    non_max_suppression,
    mean_average_precision,
    intersection_over_union,
    cellboxes_to_boxes,
    get_bboxes,
    plot_image,
    save_checkpoint,
    load_checkpoint,
)
from loss import YoloLoss
torch.manual_seed(42)


<torch._C.Generator at 0x1edffcdce10>

In [25]:
LEARNING_RATE = 2e-5
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 16 # 64 in original paper but I don't have that much vram, grad accum?
WEIGHT_DECAY = 0
EPOCHS = 10
PIN_MEMORY = True
LOAD_MODEL = False
LOAD_MODEL_FILE = "overfit.pth.tar"
DEVICE

'cpu'

In [35]:

import torch
import torchvision.transforms as transforms
import torch.optim as optim
import torchvision.transforms.functional as FT
from tqdm import tqdm
from torch.utils.data import DataLoader
from model import Yolov1
from dataset import VOCDataset
from utils import (
    non_max_suppression,
    mean_average_precision,
    intersection_over_union,
    cellboxes_to_boxes,
    get_bboxes,
    plot_image,
    save_checkpoint,
    load_checkpoint,
)
from loss import YoloLoss
torch.manual_seed(42)

LEARNING_RATE = 2e-5
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 16 # 64 in original paper but I don't have that much vram, grad accum?
WEIGHT_DECAY = 0
EPOCHS = 10
PIN_MEMORY = True
LOAD_MODEL = False
LOAD_MODEL_FILE = "overfit.pth.tar"

class Compose():
    def __init__(self):
        self.transforms=transforms.Compose([
        transforms.Resize(size=(448,448)),
        transforms.ToTensor()
        ])

    def __call__(self , img , bboxes):
        img  = self.transforms(img)
        return img , bboxes
    
data_transforms = Compose()

def train_fn(train_loader , model , optimizer , loss_fn , device):
    mean_loss= []
    for batch_idx , (x,y) in tqdm(enumerate(train_loader)):
        print(x)
        x , y = x.to(device) , y.to(device)
        out = model(x)
        loss = loss_fn(out , y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        mean_loss.append(loss.item())
    print(f"Mean loss was {sum(mean_loss)/len(mean_loss)}")


def main():
    model = Yolov1(split_size = 7 , num_boxes=2 , num_classes=20).to(DEVICE)
    loss_fn = YoloLoss()
    optimizer = optim.Adam(model.parameters() , lr = LEARNING_RATE , weight_decay=WEIGHT_DECAY)
    
    if LOAD_MODEL:
        load_checkpoint(torch.load(LOAD_MODEL_FILE), model, optimizer)
    
    train_dataset = VOCDataset("./data/train.csv" , transforms=data_transforms)
    test_dataset = VOCDataset("./data/test.csv" , transforms=data_transforms)
    
    train_loader = DataLoader(train_dataset , batch_size = BATCH_SIZE , shuffle=True , pin_memory=PIN_MEMORY)
    test_loader = DataLoader(test_dataset , batch_size = BATCH_SIZE , shuffle=True , pin_memory=PIN_MEMORY)
    
    
    for epoch in range(EPOCHS):
        pred_boxes, target_boxes = get_bboxes(
            train_loader, model, iou_threshold=0.5, threshold=0.4,device=DEVICE
        )
        mean_avg_prec = mean_average_precision(
            pred_boxes, target_boxes, iou_threshold=0.5, box_format="midpoint"
        )
        print(f"Train mAP: {mean_avg_prec}")
        
        train_fn(train_loader, model, optimizer, loss_fn,DEVICE)
# if __name__ == "__main__":
    # main()

    
    
    
    
    
    
    

In [1]:
import torch
import torchvision.transforms as transforms
import torch.optim as optim
import torchvision.transforms.functional as FT
from tqdm import tqdm
from torch.utils.data import DataLoader
from model import Yolov1
from dataset import VOCDataset
from utils import (
    non_max_suppression,
    mean_average_precision,
    intersection_over_union,
    cellboxes_to_boxes,
    get_bboxes,
    plot_image,
    save_checkpoint,
    load_checkpoint,
)
from loss import YoloLoss

LEARNING_RATE = 2e-5
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 16 # 64 in original paper but I don't have that much vram, grad accum?
WEIGHT_DECAY = 0
EPOCHS = 10
PIN_MEMORY = True
LOAD_MODEL = False
LOAD_MODEL_FILE = "overfit.pth.tar"

class Compose():
    def __init__(self):
        self.transforms=transforms.Compose([
        transforms.Resize(size=(448,448)),
        transforms.ToTensor()
        ])

    def __call__(self , img , bboxes):
        img  = self.transforms(img)
        return img , bboxes
    
data_transforms = Compose()
train_dataset = VOCDataset("./data/train.csv" , transforms=data_transforms)
test_dataset = VOCDataset("./data/test.csv" , transforms=data_transforms)
    
train_loader = DataLoader(train_dataset , batch_size = BATCH_SIZE , shuffle=True , pin_memory=PIN_MEMORY)
test_loader = DataLoader(test_dataset , batch_size = BATCH_SIZE , shuffle=True , pin_memory=PIN_MEMORY)
model = Yolov1(split_size = 7 , num_boxes=2 , num_classes=20).to(DEVICE)


In [2]:
img , label = next(iter(train_loader))
pred = model(img)

In [3]:
import torch
import torch.nn as nn
from utils import intersection_over_union 

class YoloLoss(nn.Module):
    """Calculate loss of yolov1

    """
    def __init__(self, S=7, B=2, C=20):
        super(YoloLoss, self).__init__()
        self.mse = nn.MSELoss(reduction="sum")

        """
        S is split size of image (in paper 7),
        B is number of boxes (in paper 2),
        C is number of classes (in paper and VOC dataset is 20),
        """
        self.S = S
        self.B = B
        self.C = C

        # These are from Yolo paper, signifying how much we should
        # pay loss for no object (noobj) and the box coordinates (coord)
        self.lambda_noobj = 0.5
        self.lambda_coord = 5
        
    def forward(self , predictions , target):
        
        predictions = predictions.reshape(-1 , self.S , self.S , self.C+self.B*5)
      
        
        iou_b1 = intersection_over_union(predictions[...,21:25], target[...,21:25]) #(N , S ,S , 1)
        iou_b2 = intersection_over_union(predictions[...,26:30], target[...,21:25]) #(N , S ,S , 1)
        ious = torch.cat([iou_b1.unsqueeze(0) , iou_b2.unsqueeze(0)], dim=0) #(2,N , S ,S , 1)
        
        
        asd , bestbox = torch.max(ious , dim=0)  
        exists_box = target[..., 20].unsqueeze(3)
        
        # box_predictions = exists_box*(   # => (N,S,S,1)
            
        #     bestbox*predictions[...,26:30] # => (N,S,S,4)
           
        #     + (1-bestbox)*predictions[...,21:25] # => (N,S,S,4)
        # )
        box_predictions = exists_box * (
            (
                bestbox * predictions[..., 26:30]
                + (1 - bestbox) * predictions[..., 21:25]
            )
        )

        box_target = exists_box*target[...,21:25] # => (N,S,S,4)       
        
        # box_predictions[...,2:4] = torch.sign(box_predictions[...,2:4])*torch.sqrt(torch.abs(box_predictions[...,2:4])+ 1e-6)
        box_predictions[..., 2:4] = torch.sign(box_predictions[..., 2:4]) * torch.sqrt(
            torch.abs(box_predictions[..., 2:4] + 1e-6)
        )
        box_target[...,2:4] = torch.sqrt(box_target[...,2:4])
        
        box_loss = self.mse(
            torch.flatten(box_predictions , end_dim=-2) , torch.flatten(box_target , end_dim = -2)
        )
        
        
        #=====================#
        #   FOR OBJECT LOSS   #
        #=====================#
        # confidenc socre
        # (N,S,S,1) => (N*S*S,1)
        # pred_confidence = exists_box*(bestbox*(predictions[...,25:26]) + (1-bestbox)*(predictions[...,20:21]))
        # target_confidence = exists_box*target[...,20:21]
        
        # object_loss = self.mse(
        #     torch.flatten(pred_confidence) , torch.flatten(target_confidence)
        # )
        
        pred_box = (
            bestbox * predictions[..., 25:26] + (1 - bestbox) * predictions[..., 20:21]
        )

        object_loss = self.mse(
            torch.flatten(exists_box * pred_box),
            torch.flatten(exists_box * target[..., 20:21]),
        )
        
        #========================#
        #   FOR NO OBJECT LOSS   #
        #========================#
        # (N , S , S , 1) =>(N , S*S*1)
        # pred_noobject = (1-exists_box)*(predictions[...,25:26]) + (1-exists_box)*(predictions[...,20:21])
        # target_noobject = (1-exists_box)*target[...,20:21] + (1-exists_box)*target[...,20:21] 
        # no_object_loss = self.mse(
        #     torch.flatten(pred_noobject , start_dim=1 ) , torch.flatten(target_noobject , start_dim=1)
        # )
        no_object_loss = self.mse(
            torch.flatten((1 - exists_box) * predictions[..., 20:21], start_dim=1),
            torch.flatten((1 - exists_box) * target[..., 20:21], start_dim=1),
        )

        no_object_loss += self.mse(
            torch.flatten((1 - exists_box) * predictions[..., 25:26], start_dim=1),
            torch.flatten((1 - exists_box) * target[..., 20:21], start_dim=1)
        )
        
        
        #====================#
        #   FOR CLASS LOSS   #
        #====================#
        
        class_loss = self.mse(
            torch.flatten(exists_box*predictions[...,:20] , end_dim=-2),
            torch.flatten(exists_box*target[...,:20] , end_dim=-2)
        )
        
        loss = (
            self.lambda_coord*box_loss
            + object_loss
            + self.lambda_noobj*no_object_loss
            + class_loss
        )
        return loss
loss_fn = YoloLoss()

In [4]:
# torch.autograd.set_detect_anomaly(True)
loss = loss_fn(pred , label)
print(loss)


tensor(1282.7128, grad_fn=<AddBackward0>)


In [5]:
loss.backward()