In [1]:
import numpy as np
import os 
import pandas as pd
import cv2
import torch
import matplotlib.pyplot as plt
from ipywidgets import interact
import albumentations as A
from albumentations.pytorch import ToTensorV2
import torchvision
from torch import nn
import torchsummary
from torch.utils.data import DataLoader
from collections import defaultdict
from torchvision.utils import make_grid

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

# Utils

In [3]:
CLASS_NAME_TO_ID = {'Unformed': 0, 'Burr': 1}
CLASS_ID_TO_NAME = {0: 'Unformed', 1: 'Burr'}
BOX_COLOR = {'Unformed':(200, 0, 0), 'Burr':(0, 0, 200)}
TEXT_COLOR = (255, 255, 255)

def save_model(model_state, model_name, save_dir="./trained_model"):
    os.makedirs(save_dir, exist_ok=True)
    torch.save(model_state, os.path.join(save_dir, model_name))


def visualize_bbox(image, bbox, class_name, color=BOX_COLOR, thickness=2):
    x_center, y_center, w, h = bbox
    x_min = int(x_center - w/2)
    y_min = int(y_center - h/2)
    x_max = int(x_center + w/2)
    y_max = int(y_center + h/2)
    
    cv2.rectangle(image, (x_min, y_min), (x_max, y_max), color=color[class_name], thickness=thickness)
    
    ((text_width, text_height), _) = cv2.getTextSize(class_name, cv2.FONT_HERSHEY_SIMPLEX, 0.35, 1)    
    cv2.rectangle(image, (x_min, y_min - int(1.3 * text_height)), (x_min + text_width, y_min), color[class_name], -1)
    cv2.putText(
        image,
        text=class_name,
        org=(x_min, y_min - int(0.3 * text_height)),
        fontFace=cv2.FONT_HERSHEY_SIMPLEX,
        fontScale=0.35, 
        color=TEXT_COLOR, 
        lineType=cv2.LINE_AA,
    )
    return image


def visualize(image, bboxes, category_ids):
    img = image.copy()
    for bbox, category_id in zip(bboxes, category_ids):
#         print('category_id: ',category_id)
        class_name = CLASS_ID_TO_NAME[category_id.item()]
        img = visualize_bbox(img, bbox, class_name)
    return img

# Datasets

In [4]:
class PET_dataset():
    def __init__(self,part,neck_dir,body_dir,phase, transformer=None, aug=None, aug_factor=0):
        self.neck_dir=neck_dir
        self.body_dir=body_dir
        self.part=part
        self.phase=phase
        self.transformer=transformer
        self.aug=aug
        self.aug_factor=aug_factor
        if(self.part=="body"):
            self.image_files = sorted([fn for fn in os.listdir(self.body_dir+"/"+self.phase+"/image") if fn.endswith("jpg")])
            self.label_files= sorted([lab for lab in os.listdir(self.body_dir+"/"+self.phase+"/label") if lab.endswith("txt")])
        elif(self.part=="neck"):
            self.image_files = sorted([fn for fn in os.listdir(self.neck_dir+"/"+self.phase+"/image") if fn.endswith("jpg")])
            self.label_files= sorted([lab for lab in os.listdir(self.neck_dir+"/"+self.phase+"/label") if lab.endswith("txt")])
        
        self.auged_img_list, self.auged_label_list=self.make_aug_list(self.image_files, self.label_files)
        
    def __getitem__(self,index):
        if(self.aug==None):
            filename, image = self.get_image(self.part, index)
            bboxes, class_ids = self.get_label(self.part, index)

            if(self.transformer):
                transformed_data=self.transformer(image=image, bboxes=bboxes, class_ids=class_ids)
                image = transformed_data['image']
                bboxes = np.array(transformed_data['bboxes'])
                class_ids = np.array(transformed_data['class_ids'])


            target = {}
    #         print(f'bboxes:{bboxes}\nclass_ids:{class_ids}\nlen_bboxes:{len(bboxes)}\nlen_class_ids:{len(class_ids)}')
    #         print(f'filename: {filename}')
            target["boxes"] = torch.Tensor(bboxes).float()
            target["labels"] = torch.Tensor(class_ids).long()

            ###
            bboxes=torch.Tensor(bboxes).float()
            class_ids=torch.Tensor(class_ids).long()
            target = np.concatenate((bboxes, class_ids[:, np.newaxis]), axis=1)
            ###
        else:
            image=self.auged_img_list[index][1]
            target=self.auged_label_list[index]
            filename=self.auged_img_list[index][0]
        return image, target, filename
    
    def __len__(self, ):
        length=0
        if(self.aug==None):
            length=len(self.image_files)
        else:
            length=len(self.auged_img_list)
        return length
    
    def make_aug_list(self,ori_image_list,ori_label_files):
        aug_image_list=[]
        aug_label_list=[]
        
        print(f"start making augmented images-- augmented factor:{self.aug_factor}")
        for i in range(len(ori_image_list)):
            filename, ori_image = self.get_image(self.part, i)
            ori_bboxes, ori_class_ids = self.get_label(self.part, i)
            for j in range(self.aug_factor):
                auged_data=self.aug(image=ori_image, bboxes=ori_bboxes, class_ids=ori_class_ids)
                image = auged_data['image']
                bboxes = np.array(auged_data['bboxes'])
                class_ids = np.array(auged_data['class_ids'])
                
                bboxes=torch.Tensor(bboxes).float()
                class_ids=torch.Tensor(class_ids).long()
                
                aug_image_list.append((filename, image))
                aug_label_list.append(np.concatenate((bboxes, class_ids[:, np.newaxis]), axis=1))
        
        print(f"total length of augmented images: {len(aug_image_list)}")
        
        return aug_image_list, aug_label_list
        
    
    def get_image(self, part, index): # 이미지 불러오는 함수
        filename = self.image_files[index]
        if(part=="body"):
#             print(f"body called!-> {self.part}")
            image_path = self.body_dir+"/"+self.phase+"/image/"+filename
        elif(part=="neck"):
#             print(f"neck called!-> {self.part}")
            image_path = self.neck_dir+"/"+self.phase+"/image/"+filename
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        return filename, image
    
    def get_label(self, part, index): # label (box좌표, class_id) 불러오는 함수
        label_filename=self.label_files[index]
        if(part=="body"):
#             print(f"body label called!-> {self.part}")
            label_path = self.body_dir+"/"+self.phase+"/label/"+label_filename
        elif(part=="neck"):
#             print(f"neck label called!-> {self.part}")
            label_path = self.neck_dir+"/"+self.phase+"/label/"+label_filename
        with open(label_path, 'r') as file:
            labels = file.readlines()
        
        class_ids=[]
        bboxes=[]
        for label in labels:
            label=label.replace("\n", "")
            obj=label.split(' ')[0]
            coor=label.split(' ')[1:]
            obj=int(obj)
            coor=list(map(float, coor))
            class_ids.append(obj)
            bboxes.append(coor)
            
        return bboxes, class_ids
    

In [5]:
IMAGE_SIZE = 448

transformer = A.Compose([ 
        # bounding box의 변환, augmentation에서 albumentations는 Detection 학습을 할 때 굉장히 유용하다. 
        A.Resize(height=IMAGE_SIZE, width=IMAGE_SIZE),
        A.Normalize(mean=(0.485, 0.456, 0.406),std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
        # albumentations 라이브러리에서는 Normalization을 먼저 진행해 주고 tensor화를 진행해 주어야한다.
    ],
    # box 위치에 대한 transformation도 함께 진행된다. 
    bbox_params=A.BboxParams(format='yolo', label_fields=['class_ids']),
)

augmentator=A.Compose([
#     A.Resize(height=IMAGE_SIZE, width=IMAGE_SIZE),
    A.HorizontalFlip(p=0.7),
#     A.Sharpen(p=0.7),
    A.BBoxSafeRandomCrop(p=0.6),
    A.VerticalFlip (p=0.5),
    A.HueSaturationValue(p=0.5),
    A.Resize(height=IMAGE_SIZE, width=IMAGE_SIZE),
    A.Normalize(mean=(0.485, 0.456, 0.406),std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
    ],
    bbox_params=A.BboxParams(format='yolo', label_fields=['class_ids']),
)

def collate_fn(batch):
    image_list = []
    target_list = []
    filename_list = []
    
    for a,b,c in batch:
        image_list.append(a)
        target_list.append(b)
        filename_list.append(c)

    return torch.stack(image_list, dim=0), target_list, filename_list


In [6]:
NECK_PATH = '/home/host_data/PET_data/Neck'
BODY_PATH = '/home/host_data/PET_data/Body'
trainset_yes_aug=PET_dataset(part='neck',neck_dir=NECK_PATH,body_dir=BODY_PATH,phase='train', transformer=transformer, aug=augmentator, aug_factor=20)
trainset_no_aug=PET_dataset(part='neck',neck_dir=NECK_PATH,body_dir=BODY_PATH,phase='train', transformer=transformer, aug=None)


start making augmented images-- augmented factor:20
total length of augmented images: 4200
start making augmented images-- augmented factor:0
total length of augmented images: 0


In [7]:
len(trainset_yes_aug)

4200

In [8]:
@interact(index=(0, len(trainset_no_aug)-1))

def show_sample(index=0):
    image, target, filename = trainset_no_aug[index]
    image=image.permute(1,2,0).numpy()
    img_H, img_W, _ = image.shape
    print(filename)
    print(image.shape)
#     print(image)

#     bboxes = target['boxes']
#     class_ids = target["labels"]
    
    ###
    bboxes = target[:, 0:4]
    class_ids = target[:, 4]
    ###
    bboxes[:, [0,2]] *= img_W
    bboxes[:, [1,3]] *= img_H

    canvas = visualize(image, bboxes, class_ids)
    plt.figure(figsize=(6,6))
    plt.imshow(canvas)
    plt.axis('off')
    plt.show()

# show_sample()

interactive(children=(IntSlider(value=0, description='index', max=209), Output()), _dom_classes=('widget-inter…

In [9]:
@interact(index=(0, len(trainset_yes_aug)-1))

def show_sample(index=0):
    image, target, filename = trainset_yes_aug[index]
    image=image.permute(1,2,0).numpy()
    img_H, img_W, _ = image.shape
    print(filename)
    print(image.shape)
#     print(image)

#     bboxes = target['boxes']
#     class_ids = target["labels"]
    ###
    bboxes = target[:, 0:4]
    class_ids = target[:, 4]
    ###
    bboxes[:, [0,2]] *= img_W
    bboxes[:, [1,3]] *= img_H
    print(bboxes)

    canvas = visualize(image, bboxes, class_ids)
    plt.figure(figsize=(6,6))
    plt.imshow(canvas)
    plt.axis('off')
    plt.show()

# show_sample()

interactive(children=(IntSlider(value=0, description='index', max=4199), Output()), _dom_classes=('widget-inte…

## Model

In [6]:
class YOLO_RESNET18(nn.Module):
    def __init__(self, num_classes):
        super().__init__()

        self.num_classes = num_classes
        self.num_bboxes = 2
        self.grid_size = 7

        resnet18 = torchvision.models.resnet18(pretrained = True)
        swin=torchvision.models.swin_v2_t(weights='IMAGENET1K_V1')
        layers = [m for m in resnet18.children()] #Resnet에서 Yolo에서 가져올수 있을만한 layer만 선별적으로 가져오기 위해서

        # 기존 Resnet18의 layer들중에서 맨 뒤에 두개만 제외하고 다 가져와서 Backbone으로 사용
        self.backbone = nn.Sequential(*layers[:-2]) 
        self.head = nn.Sequential(
                nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=1, padding=0,bias=False),
                nn.BatchNorm2d(1024),
                nn.ReLU(inplace=True),
                nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=3, padding=1,bias=False),
                nn.BatchNorm2d(1024),
                nn.ReLU(inplace=True),
                nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=3, padding=1,bias=False),
                nn.BatchNorm2d(1024),
                nn.ReLU(inplace=True),
                nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=3, padding=1,bias=False),
                nn.BatchNorm2d(1024),
                nn.ReLU(inplace=True),

                nn.Conv2d(in_channels=1024, out_channels=(4+1)*self.num_bboxes+num_classes, kernel_size=1, padding=0, bias=False),
                nn.AdaptiveAvgPool2d(output_size=(self.grid_size, self.grid_size))
            )

    def forward(self, x):
        out = self.backbone(x)
        # out = self.neck(out)
        out = self.head(out) # input (batch, 3, 448, 448) -> output feature (batch, 12, 7, 7)
        return out


In [9]:
NUM_CLASSES = 2
model = YOLO_RESNET18(num_classes=NUM_CLASSES)
model.to(device)



YOLO_RESNET18(
  (backbone): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stat

In [10]:
torchsummary.summary(model, (3,448,448))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           9,408
       BatchNorm2d-2         [-1, 64, 224, 224]             128
              ReLU-3         [-1, 64, 224, 224]               0
         MaxPool2d-4         [-1, 64, 112, 112]               0
            Conv2d-5         [-1, 64, 112, 112]          36,864
       BatchNorm2d-6         [-1, 64, 112, 112]             128
              ReLU-7         [-1, 64, 112, 112]               0
            Conv2d-8         [-1, 64, 112, 112]          36,864
       BatchNorm2d-9         [-1, 64, 112, 112]             128
             ReLU-10         [-1, 64, 112, 112]               0
       BasicBlock-11         [-1, 64, 112, 112]               0
           Conv2d-12         [-1, 64, 112, 112]          36,864
      BatchNorm2d-13         [-1, 64, 112, 112]             128
             ReLU-14         [-1, 64, 1

In [11]:
x = torch.randn(1, 3, 448, 448).to(device)
with torch.no_grad():
    y = model(x)
print(y.shape)

torch.Size([1, 12, 7, 7])


# Loss func

In [12]:
class YOLO_LOSS():
    def __init__(self, num_classes, device, lambda_coord=5., lambda_noobj=0.5):
        self.num_classes = num_classes
        self.device = device
        self.grid_size = 7
        self.lambda_coord = lambda_coord
        self.lambda_noobj = lambda_noobj
        self.mse_loss = nn.MSELoss(reduction="sum")

    def __call__(self, predictions, targets):
        self.batch_size, _, _, _ = predictions.shape
        groundtruths = self.build_batch_target_grid(targets)
        groundtruths = groundtruths.to(self.device)
        
        with torch.no_grad():
            iou1 = self.get_IoU(predictions[:, 1:5, ...], groundtruths[:, 1:5, ...])
            iou2 = self.get_IoU(predictions[:, 6:10, ...], groundtruths[:, 1:5, ...])

        ious = torch.stack([iou1, iou2], dim=1)
        max_iou, best_box = ious.max(dim=1, keepdim=True)
        max_iou = torch.cat([max_iou, max_iou], dim=1)
        best_box = torch.cat([best_box.eq(0), best_box.eq(1)], dim=1)

        predictions_ = predictions[:, :5*2, ...].reshape(self.batch_size, 2, 5, self.grid_size, self.grid_size)
        obj_pred = predictions_[:, :, 0, ...]
        xy_pred = predictions_[:, :, 1:3, ...]
        wh_pred = predictions_[:, :, 3:5, ...]
        cls_pred = predictions[:, 5*2:, ...]

        groundtruths_ = groundtruths[:, :5, ...].reshape(self.batch_size, 1, 5, self.grid_size, self.grid_size)
        obj_target = groundtruths_[:, :, 0, ...]
        xy_target = groundtruths_[:, :, 1:3, ...]
        wh_target= groundtruths_[:, :, 3:5, ...]
        cls_target = groundtruths[:, 5:, ...]
        
        positive = obj_target * best_box

        obj_loss = self.mse_loss(positive * obj_pred, positive * ious)
        noobj_loss = self.mse_loss((1 - positive) * obj_pred, ious*0)
        xy_loss = self.mse_loss(positive.unsqueeze(dim=2) * xy_pred, positive.unsqueeze(dim=2) * xy_target)
        wh_loss = self.mse_loss(positive.unsqueeze(dim=2) * (wh_pred.sign() * (wh_pred.abs() + 1e-8).sqrt()),
                           positive.unsqueeze(dim=2) * (wh_target + 1e-8).sqrt())
        cls_loss = self.mse_loss(obj_target * cls_pred, cls_target)
        
        obj_loss /= self.batch_size
        noobj_loss /= self.batch_size
        bbox_loss = (xy_loss+wh_loss) / self.batch_size
        cls_loss /= self.batch_size
        
        total_loss = obj_loss + self.lambda_noobj*noobj_loss + self.lambda_coord*bbox_loss + cls_loss
        return total_loss, (obj_loss.item(), noobj_loss.item(), bbox_loss.item(), cls_loss.item())
    
    def build_target_grid(self, target):
        target_grid = torch.zeros((1+4+self.num_classes, self.grid_size, self.grid_size), device=self.device)

        for gt in target:
            xc, yc, w, h, cls_id = gt
            xn = (xc % (1/self.grid_size))
            yn = (yc % (1/self.grid_size))
            cls_id = int(cls_id)

            i_grid = int(xc * self.grid_size)
            j_grid = int(yc * self.grid_size)
            target_grid[0, j_grid, i_grid] = 1
            target_grid[1:5, j_grid, i_grid] = torch.Tensor([xn,yn,w,h])
#             print(5+cls_id, j_grid, i_grid)
            target_grid[5+cls_id, j_grid, i_grid] = 1

        return target_grid
    
    def build_batch_target_grid(self, targets):
        target_grid_batch = torch.stack([self.build_target_grid(target) for target in targets], dim=0)
        return target_grid_batch
    
    def get_IoU(self, cbox1, cbox2):
        box1 = self.xywh_to_xyxy(cbox1)
        box2 = self.xywh_to_xyxy(cbox2)

        x1 = torch.max(box1[:, 0, ...], box2[:, 0, ...])
        y1 = torch.max(box1[:, 1, ...], box2[:, 1, ...])
        x2 = torch.min(box1[:, 2, ...], box2[:, 2, ...])
        y2 = torch.min(box1[:, 3, ...], box2[:, 3, ...])

        intersection = (x2-x1).clamp(min=0) * (y2-y1).clamp(min=0)
        union = abs(cbox1[:, 2, ...]*cbox1[:, 3, ...]) + \
                abs(cbox2[:, 2, ...]*cbox2[:, 3, ...]) - intersection

        intersection[intersection.gt(0)] = intersection[intersection.gt(0)] / union[intersection.gt(0)]
        return intersection
    
    def generate_xy_normed_grid(self):
        y_offset, x_offset = torch.meshgrid(torch.arange(self.grid_size), torch.arange(self.grid_size))
        xy_grid = torch.stack([x_offset, y_offset], dim=0)
        xy_normed_grid = xy_grid / self.grid_size
        return xy_normed_grid.to(self.device)

    def xywh_to_xyxy(self, bboxes):
        xy_normed_grid = self.generate_xy_normed_grid()
        xcyc = bboxes[:,0:2,...] + xy_normed_grid.tile(self.batch_size, 1,1,1)
        wh = bboxes[:,2:4,...]
        x1y1 = xcyc - (wh/2)
        x2y2 = xcyc + (wh/2)
        return torch.cat([x1y1, x2y2], dim=1)

# Train

In [13]:
def train_one_epoch(dataloaders, model, criterion, optimizer, device):
    train_loss = defaultdict(float)
    val_loss = defaultdict(float)
    
    for phase in ["train", "val"]:
        if phase == "train":
            model.train()
        else:
            model.eval()
        
        running_loss = defaultdict(float)
        for index, batch in enumerate(dataloaders[phase]):
            images = batch[0].to(device)
            targets = batch[1]
            filenames = batch[2]
            
            with torch.set_grad_enabled(phase == "train"): # phase가 train 일때만 gradient 추적기능을 킨다.
                predictions = model(images) #prediction shape=> B,12,7,7
#             print(f"predictions:{predictions}, \ntargets: {targets}\n")
            loss, (obj_loss, noobj_loss, bbox_loss, cls_loss) = criterion(predictions, targets)
#             print(f"loss:{loss}, obj_loss:{obj_loss}, noobj_loss:{noobj_loss}\nbbox_loss:{bbox_loss}, cls_loss:{cls_loss}\n--------------\n")
            if phase == "train":
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                # 현재 epoch단계에서 loss가 얼마인지 running loss 가출력
                running_loss["total_loss"] += loss.item()
                running_loss["obj_loss"] += obj_loss
                running_loss["noobj_loss"] += noobj_loss
                running_loss["bbox_loss"] += bbox_loss
                running_loss["cls_loss"] += cls_loss
                
                train_loss["total_loss"] += loss.item()
                train_loss["obj_loss"] += obj_loss
                train_loss["noobj_loss"] += noobj_loss
                train_loss["bbox_loss"] += bbox_loss
                train_loss["cls_loss"] += cls_loss
                
                if (index > 0) and (index % VERBOSE_FREQ) == 0:
                    text = f"<<<iteration:[{index}/{len(dataloaders[phase])}] - "
                    for k, v in running_loss.items():
                        text += f"{k}: {v/VERBOSE_FREQ:.4f}  "
                        running_loss[k] = 0.
                    print(text)
            else:
                val_loss["total_loss"] += loss.item()
                val_loss["obj_loss"] += obj_loss
                val_loss["noobj_loss"] += noobj_loss
                val_loss["bbox_loss"] += bbox_loss
                val_loss["cls_loss"] += cls_loss

    for k in train_loss.keys():
        train_loss[k] /= len(dataloaders["train"])
        val_loss[k] /= len(dataloaders["val"])
    return train_loss, val_loss

In [14]:
def build_dataloader(part, NECK_PATH, BODY_PATH, batch_size=2, aug_factor=0):
    IMAGE_SIZE = 448
    transformer = A.Compose([
            A.Resize(height=IMAGE_SIZE, width=IMAGE_SIZE),
            A.Normalize(mean=(0.485, 0.456, 0.406),std=(0.229, 0.224, 0.225)),
            ToTensorV2(),
        ],
        bbox_params=A.BboxParams(format='yolo', label_fields=['class_ids']),
    )
    augmentator=A.Compose([
    #     A.Resize(height=IMAGE_SIZE, width=IMAGE_SIZE),
        A.HorizontalFlip(p=0.7),
    #     A.Sharpen(p=0.7),
        A.BBoxSafeRandomCrop(p=0.6),
        A.VerticalFlip (p=0.6),
        A.HueSaturationValue(p=0.6),
        A.Resize(height=IMAGE_SIZE, width=IMAGE_SIZE),
        A.Normalize(mean=(0.485, 0.456, 0.406),std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
        ],
        bbox_params=A.BboxParams(format='yolo', label_fields=['class_ids']),
    )
    
    dataloaders = {}
#     train_dataset = Detection_dataset(data_dir=data_dir, phase="train", transformer=transformer)
    train_dataset=PET_dataset(part ,neck_dir=NECK_PATH,body_dir=BODY_PATH,phase='train', transformer=transformer, aug=augmentator, aug_factor=aug_factor)
    dataloaders["train"] = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

#     val_dataset = Detection_dataset(data_dir=data_dir, phase="val", transformer=transformer)
    val_dataset=PET_dataset(part ,neck_dir=NECK_PATH,body_dir=BODY_PATH,phase='valid', transformer=transformer, aug=augmentator, aug_factor=aug_factor)
    dataloaders["val"] = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
    return dataloaders

In [15]:
# data_dir = "/content/drive/MyDrive/fastCamMedicalProj/DATASET/DATASET/Detection/"
NECK_PATH = '/home/host_data/PET_data/Neck'
BODY_PATH = '/home/host_data/PET_data/Body'
is_cuda = True

NUM_CLASSES = 2
IMAGE_SIZE = 448
BATCH_SIZE = 16
VERBOSE_FREQ = 20
LR=0.0001
AUG_FACTOR=20
BACKBONE="RESNET18"
PART="neck"
num_epochs = 100
# DEVICE = torch.device('cuda' if torch.cuda.is_available and is_cuda else 'cpu')

dataloaders = build_dataloader(part=PART,NECK_PATH=NECK_PATH,BODY_PATH=BODY_PATH,batch_size=BATCH_SIZE, aug_factor=AUG_FACTOR)
model = YOLO_RESNET18(num_classes=NUM_CLASSES)
model = model.to(device)
criterion = YOLO_LOSS(num_classes=NUM_CLASSES, device=device)
optimizer = torch.optim.SGD(model.parameters(), lr=LR)

start making augmented images-- augmented factor:20
total length of augmented images: 4200
start making augmented images-- augmented factor:20
total length of augmented images: 720


In [16]:
import wandb
import random

# start a new wandb run to track this script
wandb.init(
    # set the wandb project where this run will be logged
    project="yolo_resnet_neck",
    
    # track hyperparameters and run metadata
    config={
    "learning_rate": LR,
    "batch_size": BATCH_SIZE,
    "architecture": BACKBONE,
    "dataset": PART,
    "epochs": num_epochs,
    "aug factor":AUG_FACTOR,
    }
)

[34m[1mwandb[0m: Currently logged in as: [33mgomduribo[0m ([33murp[0m). Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113778666670744, max=1.0…

In [17]:
best_epoch = 0
best_score = float('inf')
train_losses = []
val_losses = []

for epoch in range(num_epochs):
    train_loss, val_loss = train_one_epoch(dataloaders, model, criterion, optimizer, device)
    train_losses.append(train_loss)
    val_losses.append(val_loss)
#     train_loss["obj_loss"] += obj_loss
#     train_loss["noobj_loss"] += noobj_loss
#     train_loss["bbox_loss"] += bbox_loss
#     train_loss["cls_loss"] += cls_loss
    wandb.log({"Train Loss": train_loss['total_loss'],
               "Train obj Loss":train_loss["obj_loss"],
               "Train bbox Loss":train_loss["bbox_loss"],
               "Train class Loss":train_loss["cls_loss"],
               "Val Loss": val_loss['total_loss'],
               "Val obj Loss":val_loss["obj_loss"],
               "Val bbox Loss":val_loss["bbox_loss"],
               "Val class Loss":val_loss["cls_loss"],})
    print(f"\nepoch:{epoch+1}/{num_epochs} - Train Loss: {train_loss['total_loss']:.4f}, Val Loss: {val_loss['total_loss']:.4f}\n")
    
    if (epoch+1) % 10 == 0:
        save_model(model.state_dict(), f'model_{epoch+1}.pth', save_dir=f"./trained_model/{BACKBONE}_{PART}_LR{LR}_AUG{AUG_FACTOR}")
wandb.finish()

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


<<<iteration:[20/263] - total_loss: 5.7435  obj_loss: 0.0715  noobj_loss: 4.7979  bbox_loss: 0.4961  cls_loss: 0.7924  
<<<iteration:[40/263] - total_loss: 4.2361  obj_loss: 0.0634  noobj_loss: 3.5838  bbox_loss: 0.3766  cls_loss: 0.4978  
<<<iteration:[60/263] - total_loss: 3.6727  obj_loss: 0.0619  noobj_loss: 3.0938  bbox_loss: 0.3348  cls_loss: 0.3897  
<<<iteration:[80/263] - total_loss: 3.4641  obj_loss: 0.0438  noobj_loss: 2.8264  bbox_loss: 0.3382  cls_loss: 0.3160  
<<<iteration:[100/263] - total_loss: 3.4294  obj_loss: 0.0498  noobj_loss: 2.6097  bbox_loss: 0.3553  cls_loss: 0.2984  
<<<iteration:[120/263] - total_loss: 3.1805  obj_loss: 0.0387  noobj_loss: 2.3963  bbox_loss: 0.3319  cls_loss: 0.2839  
<<<iteration:[140/263] - total_loss: 2.9631  obj_loss: 0.0439  noobj_loss: 2.2691  bbox_loss: 0.3046  cls_loss: 0.2615  
<<<iteration:[160/263] - total_loss: 2.7794  obj_loss: 0.0400  noobj_loss: 2.1793  bbox_loss: 0.2786  cls_loss: 0.2567  
<<<iteration:[180/263] - total_loss:

<<<iteration:[40/263] - total_loss: 0.5834  obj_loss: 0.0553  noobj_loss: 0.3390  bbox_loss: 0.0555  cls_loss: 0.0813  
<<<iteration:[60/263] - total_loss: 0.5569  obj_loss: 0.0621  noobj_loss: 0.3186  bbox_loss: 0.0481  cls_loss: 0.0949  
<<<iteration:[80/263] - total_loss: 0.5542  obj_loss: 0.0605  noobj_loss: 0.3248  bbox_loss: 0.0516  cls_loss: 0.0734  
<<<iteration:[100/263] - total_loss: 0.5231  obj_loss: 0.0663  noobj_loss: 0.3140  bbox_loss: 0.0441  cls_loss: 0.0791  
<<<iteration:[120/263] - total_loss: 0.5305  obj_loss: 0.0599  noobj_loss: 0.3051  bbox_loss: 0.0472  cls_loss: 0.0821  
<<<iteration:[140/263] - total_loss: 0.5279  obj_loss: 0.0673  noobj_loss: 0.3035  bbox_loss: 0.0453  cls_loss: 0.0826  
<<<iteration:[160/263] - total_loss: 0.5334  obj_loss: 0.0700  noobj_loss: 0.2893  bbox_loss: 0.0483  cls_loss: 0.0772  
<<<iteration:[180/263] - total_loss: 0.5446  obj_loss: 0.0657  noobj_loss: 0.2800  bbox_loss: 0.0521  cls_loss: 0.0783  
<<<iteration:[200/263] - total_loss

<<<iteration:[60/263] - total_loss: 0.3126  obj_loss: 0.0924  noobj_loss: 0.1236  bbox_loss: 0.0225  cls_loss: 0.0458  
<<<iteration:[80/263] - total_loss: 0.3256  obj_loss: 0.1002  noobj_loss: 0.1254  bbox_loss: 0.0218  cls_loss: 0.0538  
<<<iteration:[100/263] - total_loss: 0.3308  obj_loss: 0.0956  noobj_loss: 0.1240  bbox_loss: 0.0241  cls_loss: 0.0526  
<<<iteration:[120/263] - total_loss: 0.3331  obj_loss: 0.1092  noobj_loss: 0.1244  bbox_loss: 0.0214  cls_loss: 0.0548  
<<<iteration:[140/263] - total_loss: 0.3270  obj_loss: 0.1100  noobj_loss: 0.1252  bbox_loss: 0.0208  cls_loss: 0.0505  
<<<iteration:[160/263] - total_loss: 0.3571  obj_loss: 0.1177  noobj_loss: 0.1237  bbox_loss: 0.0238  cls_loss: 0.0586  
<<<iteration:[180/263] - total_loss: 0.3345  obj_loss: 0.0983  noobj_loss: 0.1226  bbox_loss: 0.0238  cls_loss: 0.0561  
<<<iteration:[200/263] - total_loss: 0.3221  obj_loss: 0.0981  noobj_loss: 0.1191  bbox_loss: 0.0223  cls_loss: 0.0530  
<<<iteration:[220/263] - total_los

<<<iteration:[80/263] - total_loss: 0.2815  obj_loss: 0.1241  noobj_loss: 0.0903  bbox_loss: 0.0160  cls_loss: 0.0324  
<<<iteration:[100/263] - total_loss: 0.2844  obj_loss: 0.1208  noobj_loss: 0.0923  bbox_loss: 0.0151  cls_loss: 0.0418  
<<<iteration:[120/263] - total_loss: 0.3035  obj_loss: 0.1322  noobj_loss: 0.0902  bbox_loss: 0.0152  cls_loss: 0.0501  
<<<iteration:[140/263] - total_loss: 0.2901  obj_loss: 0.1276  noobj_loss: 0.0916  bbox_loss: 0.0154  cls_loss: 0.0397  
<<<iteration:[160/263] - total_loss: 0.2796  obj_loss: 0.1170  noobj_loss: 0.0937  bbox_loss: 0.0154  cls_loss: 0.0387  
<<<iteration:[180/263] - total_loss: 0.2809  obj_loss: 0.1283  noobj_loss: 0.0925  bbox_loss: 0.0138  cls_loss: 0.0373  
<<<iteration:[200/263] - total_loss: 0.2875  obj_loss: 0.1325  noobj_loss: 0.0906  bbox_loss: 0.0148  cls_loss: 0.0356  
<<<iteration:[220/263] - total_loss: 0.2944  obj_loss: 0.1243  noobj_loss: 0.0946  bbox_loss: 0.0155  cls_loss: 0.0455  
<<<iteration:[240/263] - total_lo

<<<iteration:[100/263] - total_loss: 0.2822  obj_loss: 0.1445  noobj_loss: 0.0809  bbox_loss: 0.0133  cls_loss: 0.0307  
<<<iteration:[120/263] - total_loss: 0.2829  obj_loss: 0.1308  noobj_loss: 0.0807  bbox_loss: 0.0145  cls_loss: 0.0395  
<<<iteration:[140/263] - total_loss: 0.2491  obj_loss: 0.1312  noobj_loss: 0.0854  bbox_loss: 0.0108  cls_loss: 0.0213  
<<<iteration:[160/263] - total_loss: 0.2814  obj_loss: 0.1378  noobj_loss: 0.0826  bbox_loss: 0.0139  cls_loss: 0.0331  
<<<iteration:[180/263] - total_loss: 0.2935  obj_loss: 0.1543  noobj_loss: 0.0840  bbox_loss: 0.0140  cls_loss: 0.0273  
<<<iteration:[200/263] - total_loss: 0.2981  obj_loss: 0.1462  noobj_loss: 0.0873  bbox_loss: 0.0153  cls_loss: 0.0317  
<<<iteration:[220/263] - total_loss: 0.3005  obj_loss: 0.1542  noobj_loss: 0.0837  bbox_loss: 0.0144  cls_loss: 0.0323  
<<<iteration:[240/263] - total_loss: 0.2712  obj_loss: 0.1285  noobj_loss: 0.0831  bbox_loss: 0.0129  cls_loss: 0.0364  
<<<iteration:[260/263] - total_l

<<<iteration:[120/263] - total_loss: 0.2725  obj_loss: 0.1439  noobj_loss: 0.0823  bbox_loss: 0.0121  cls_loss: 0.0270  
<<<iteration:[140/263] - total_loss: 0.2724  obj_loss: 0.1462  noobj_loss: 0.0810  bbox_loss: 0.0110  cls_loss: 0.0306  
<<<iteration:[160/263] - total_loss: 0.2741  obj_loss: 0.1520  noobj_loss: 0.0848  bbox_loss: 0.0101  cls_loss: 0.0290  
<<<iteration:[180/263] - total_loss: 0.2710  obj_loss: 0.1447  noobj_loss: 0.0797  bbox_loss: 0.0111  cls_loss: 0.0312  
<<<iteration:[200/263] - total_loss: 0.2720  obj_loss: 0.1453  noobj_loss: 0.0817  bbox_loss: 0.0113  cls_loss: 0.0292  
<<<iteration:[220/263] - total_loss: 0.2706  obj_loss: 0.1422  noobj_loss: 0.0796  bbox_loss: 0.0122  cls_loss: 0.0274  
<<<iteration:[240/263] - total_loss: 0.2703  obj_loss: 0.1454  noobj_loss: 0.0814  bbox_loss: 0.0113  cls_loss: 0.0276  
<<<iteration:[260/263] - total_loss: 0.2562  obj_loss: 0.1407  noobj_loss: 0.0797  bbox_loss: 0.0102  cls_loss: 0.0247  

epoch:26/100 - Train Loss: 0.27

<<<iteration:[140/263] - total_loss: 0.2923  obj_loss: 0.1614  noobj_loss: 0.0878  bbox_loss: 0.0110  cls_loss: 0.0319  
<<<iteration:[160/263] - total_loss: 0.2643  obj_loss: 0.1486  noobj_loss: 0.0862  bbox_loss: 0.0105  cls_loss: 0.0200  
<<<iteration:[180/263] - total_loss: 0.2721  obj_loss: 0.1501  noobj_loss: 0.0853  bbox_loss: 0.0105  cls_loss: 0.0268  
<<<iteration:[200/263] - total_loss: 0.2755  obj_loss: 0.1599  noobj_loss: 0.0850  bbox_loss: 0.0099  cls_loss: 0.0238  
<<<iteration:[220/263] - total_loss: 0.2592  obj_loss: 0.1463  noobj_loss: 0.0808  bbox_loss: 0.0098  cls_loss: 0.0235  
<<<iteration:[240/263] - total_loss: 0.2683  obj_loss: 0.1574  noobj_loss: 0.0831  bbox_loss: 0.0094  cls_loss: 0.0226  
<<<iteration:[260/263] - total_loss: 0.2645  obj_loss: 0.1452  noobj_loss: 0.0844  bbox_loss: 0.0101  cls_loss: 0.0263  

epoch:31/100 - Train Loss: 0.2708, Val Loss: 0.2968

<<<iteration:[20/263] - total_loss: 0.2710  obj_loss: 0.1482  noobj_loss: 0.0866  bbox_loss: 0.0118

<<<iteration:[160/263] - total_loss: 0.2688  obj_loss: 0.1584  noobj_loss: 0.0878  bbox_loss: 0.0092  cls_loss: 0.0207  
<<<iteration:[180/263] - total_loss: 0.2472  obj_loss: 0.1446  noobj_loss: 0.0844  bbox_loss: 0.0087  cls_loss: 0.0172  
<<<iteration:[200/263] - total_loss: 0.2677  obj_loss: 0.1563  noobj_loss: 0.0856  bbox_loss: 0.0089  cls_loss: 0.0243  
<<<iteration:[220/263] - total_loss: 0.2571  obj_loss: 0.1519  noobj_loss: 0.0857  bbox_loss: 0.0081  cls_loss: 0.0216  
<<<iteration:[240/263] - total_loss: 0.2730  obj_loss: 0.1642  noobj_loss: 0.0891  bbox_loss: 0.0086  cls_loss: 0.0213  
<<<iteration:[260/263] - total_loss: 0.2728  obj_loss: 0.1661  noobj_loss: 0.0860  bbox_loss: 0.0095  cls_loss: 0.0162  

epoch:36/100 - Train Loss: 0.2642, Val Loss: 0.2851

<<<iteration:[20/263] - total_loss: 0.2782  obj_loss: 0.1709  noobj_loss: 0.0931  bbox_loss: 0.0084  cls_loss: 0.0189  
<<<iteration:[40/263] - total_loss: 0.2541  obj_loss: 0.1550  noobj_loss: 0.0856  bbox_loss: 0.0076 

<<<iteration:[180/263] - total_loss: 0.2740  obj_loss: 0.1679  noobj_loss: 0.0887  bbox_loss: 0.0087  cls_loss: 0.0183  
<<<iteration:[200/263] - total_loss: 0.2545  obj_loss: 0.1554  noobj_loss: 0.0882  bbox_loss: 0.0081  cls_loss: 0.0147  
<<<iteration:[220/263] - total_loss: 0.2651  obj_loss: 0.1627  noobj_loss: 0.0905  bbox_loss: 0.0081  cls_loss: 0.0169  
<<<iteration:[240/263] - total_loss: 0.2568  obj_loss: 0.1552  noobj_loss: 0.0885  bbox_loss: 0.0082  cls_loss: 0.0163  
<<<iteration:[260/263] - total_loss: 0.2608  obj_loss: 0.1583  noobj_loss: 0.0863  bbox_loss: 0.0085  cls_loss: 0.0171  

epoch:41/100 - Train Loss: 0.2629, Val Loss: 0.2794

<<<iteration:[20/263] - total_loss: 0.2892  obj_loss: 0.1755  noobj_loss: 0.0973  bbox_loss: 0.0089  cls_loss: 0.0207  
<<<iteration:[40/263] - total_loss: 0.2704  obj_loss: 0.1618  noobj_loss: 0.0901  bbox_loss: 0.0089  cls_loss: 0.0192  
<<<iteration:[60/263] - total_loss: 0.2561  obj_loss: 0.1552  noobj_loss: 0.0900  bbox_loss: 0.0075  

<<<iteration:[200/263] - total_loss: 0.2562  obj_loss: 0.1530  noobj_loss: 0.0925  bbox_loss: 0.0078  cls_loss: 0.0180  
<<<iteration:[220/263] - total_loss: 0.2585  obj_loss: 0.1534  noobj_loss: 0.0901  bbox_loss: 0.0089  cls_loss: 0.0156  
<<<iteration:[240/263] - total_loss: 0.2640  obj_loss: 0.1605  noobj_loss: 0.0923  bbox_loss: 0.0080  cls_loss: 0.0174  
<<<iteration:[260/263] - total_loss: 0.2766  obj_loss: 0.1573  noobj_loss: 0.0945  bbox_loss: 0.0100  cls_loss: 0.0221  

epoch:46/100 - Train Loss: 0.2631, Val Loss: 0.2869

<<<iteration:[20/263] - total_loss: 0.2738  obj_loss: 0.1605  noobj_loss: 0.0968  bbox_loss: 0.0093  cls_loss: 0.0184  
<<<iteration:[40/263] - total_loss: 0.2830  obj_loss: 0.1749  noobj_loss: 0.0899  bbox_loss: 0.0089  cls_loss: 0.0184  
<<<iteration:[60/263] - total_loss: 0.2583  obj_loss: 0.1494  noobj_loss: 0.0925  bbox_loss: 0.0091  cls_loss: 0.0173  
<<<iteration:[80/263] - total_loss: 0.2606  obj_loss: 0.1631  noobj_loss: 0.0910  bbox_loss: 0.0078  c

<<<iteration:[220/263] - total_loss: 0.2616  obj_loss: 0.1618  noobj_loss: 0.0958  bbox_loss: 0.0069  cls_loss: 0.0172  
<<<iteration:[240/263] - total_loss: 0.2573  obj_loss: 0.1591  noobj_loss: 0.0943  bbox_loss: 0.0075  cls_loss: 0.0134  
<<<iteration:[260/263] - total_loss: 0.2602  obj_loss: 0.1615  noobj_loss: 0.0966  bbox_loss: 0.0070  cls_loss: 0.0153  

epoch:51/100 - Train Loss: 0.2601, Val Loss: 0.2728

<<<iteration:[20/263] - total_loss: 0.2758  obj_loss: 0.1765  noobj_loss: 0.1013  bbox_loss: 0.0068  cls_loss: 0.0147  
<<<iteration:[40/263] - total_loss: 0.2621  obj_loss: 0.1645  noobj_loss: 0.0970  bbox_loss: 0.0069  cls_loss: 0.0146  
<<<iteration:[60/263] - total_loss: 0.2478  obj_loss: 0.1521  noobj_loss: 0.0962  bbox_loss: 0.0067  cls_loss: 0.0140  
<<<iteration:[80/263] - total_loss: 0.2523  obj_loss: 0.1591  noobj_loss: 0.0926  bbox_loss: 0.0068  cls_loss: 0.0128  
<<<iteration:[100/263] - total_loss: 0.2673  obj_loss: 0.1700  noobj_loss: 0.0945  bbox_loss: 0.0068  c

<<<iteration:[240/263] - total_loss: 0.2579  obj_loss: 0.1641  noobj_loss: 0.1019  bbox_loss: 0.0063  cls_loss: 0.0115  
<<<iteration:[260/263] - total_loss: 0.2606  obj_loss: 0.1661  noobj_loss: 0.1010  bbox_loss: 0.0063  cls_loss: 0.0127  

epoch:56/100 - Train Loss: 0.2588, Val Loss: 0.2724

<<<iteration:[20/263] - total_loss: 0.2797  obj_loss: 0.1790  noobj_loss: 0.1064  bbox_loss: 0.0071  cls_loss: 0.0121  
<<<iteration:[40/263] - total_loss: 0.2551  obj_loss: 0.1592  noobj_loss: 0.0974  bbox_loss: 0.0065  cls_loss: 0.0145  
<<<iteration:[60/263] - total_loss: 0.2657  obj_loss: 0.1693  noobj_loss: 0.1018  bbox_loss: 0.0067  cls_loss: 0.0122  
<<<iteration:[80/263] - total_loss: 0.2610  obj_loss: 0.1693  noobj_loss: 0.1003  bbox_loss: 0.0058  cls_loss: 0.0124  
<<<iteration:[100/263] - total_loss: 0.2667  obj_loss: 0.1727  noobj_loss: 0.0978  bbox_loss: 0.0063  cls_loss: 0.0134  
<<<iteration:[120/263] - total_loss: 0.2485  obj_loss: 0.1484  noobj_loss: 0.1016  bbox_loss: 0.0070  c

<<<iteration:[260/263] - total_loss: 0.2435  obj_loss: 0.1542  noobj_loss: 0.0964  bbox_loss: 0.0061  cls_loss: 0.0105  

epoch:61/100 - Train Loss: 0.2578, Val Loss: 0.2636

<<<iteration:[20/263] - total_loss: 0.2757  obj_loss: 0.1755  noobj_loss: 0.1052  bbox_loss: 0.0070  cls_loss: 0.0127  
<<<iteration:[40/263] - total_loss: 0.2580  obj_loss: 0.1629  noobj_loss: 0.0989  bbox_loss: 0.0063  cls_loss: 0.0140  
<<<iteration:[60/263] - total_loss: 0.2427  obj_loss: 0.1516  noobj_loss: 0.0976  bbox_loss: 0.0064  cls_loss: 0.0105  
<<<iteration:[80/263] - total_loss: 0.2663  obj_loss: 0.1723  noobj_loss: 0.1042  bbox_loss: 0.0064  cls_loss: 0.0098  
<<<iteration:[100/263] - total_loss: 0.2628  obj_loss: 0.1638  noobj_loss: 0.1033  bbox_loss: 0.0064  cls_loss: 0.0151  
<<<iteration:[120/263] - total_loss: 0.2612  obj_loss: 0.1590  noobj_loss: 0.1043  bbox_loss: 0.0067  cls_loss: 0.0167  
<<<iteration:[140/263] - total_loss: 0.2615  obj_loss: 0.1647  noobj_loss: 0.1043  bbox_loss: 0.0062  c


epoch:66/100 - Train Loss: 0.2593, Val Loss: 0.2612

<<<iteration:[20/263] - total_loss: 0.2740  obj_loss: 0.1799  noobj_loss: 0.1101  bbox_loss: 0.0054  cls_loss: 0.0121  
<<<iteration:[40/263] - total_loss: 0.2516  obj_loss: 0.1541  noobj_loss: 0.1050  bbox_loss: 0.0062  cls_loss: 0.0142  
<<<iteration:[60/263] - total_loss: 0.2616  obj_loss: 0.1657  noobj_loss: 0.1069  bbox_loss: 0.0063  cls_loss: 0.0110  
<<<iteration:[80/263] - total_loss: 0.2620  obj_loss: 0.1653  noobj_loss: 0.1069  bbox_loss: 0.0060  cls_loss: 0.0130  
<<<iteration:[100/263] - total_loss: 0.2543  obj_loss: 0.1653  noobj_loss: 0.1068  bbox_loss: 0.0051  cls_loss: 0.0103  
<<<iteration:[120/263] - total_loss: 0.2444  obj_loss: 0.1565  noobj_loss: 0.1056  bbox_loss: 0.0049  cls_loss: 0.0104  
<<<iteration:[140/263] - total_loss: 0.2667  obj_loss: 0.1720  noobj_loss: 0.1051  bbox_loss: 0.0059  cls_loss: 0.0126  
<<<iteration:[160/263] - total_loss: 0.2590  obj_loss: 0.1636  noobj_loss: 0.1039  bbox_loss: 0.0063  c

<<<iteration:[40/263] - total_loss: 0.2508  obj_loss: 0.1571  noobj_loss: 0.1040  bbox_loss: 0.0054  cls_loss: 0.0147  
<<<iteration:[60/263] - total_loss: 0.2637  obj_loss: 0.1645  noobj_loss: 0.1098  bbox_loss: 0.0059  cls_loss: 0.0147  
<<<iteration:[80/263] - total_loss: 0.2536  obj_loss: 0.1626  noobj_loss: 0.1087  bbox_loss: 0.0051  cls_loss: 0.0114  
<<<iteration:[100/263] - total_loss: 0.2560  obj_loss: 0.1671  noobj_loss: 0.1034  bbox_loss: 0.0052  cls_loss: 0.0112  
<<<iteration:[120/263] - total_loss: 0.2511  obj_loss: 0.1617  noobj_loss: 0.1067  bbox_loss: 0.0051  cls_loss: 0.0107  
<<<iteration:[140/263] - total_loss: 0.2440  obj_loss: 0.1522  noobj_loss: 0.1066  bbox_loss: 0.0057  cls_loss: 0.0102  
<<<iteration:[160/263] - total_loss: 0.2652  obj_loss: 0.1717  noobj_loss: 0.1107  bbox_loss: 0.0056  cls_loss: 0.0104  
<<<iteration:[180/263] - total_loss: 0.2472  obj_loss: 0.1558  noobj_loss: 0.1021  bbox_loss: 0.0061  cls_loss: 0.0099  
<<<iteration:[200/263] - total_loss

<<<iteration:[60/263] - total_loss: 0.2545  obj_loss: 0.1627  noobj_loss: 0.1060  bbox_loss: 0.0054  cls_loss: 0.0117  
<<<iteration:[80/263] - total_loss: 0.2628  obj_loss: 0.1674  noobj_loss: 0.1159  bbox_loss: 0.0052  cls_loss: 0.0113  
<<<iteration:[100/263] - total_loss: 0.2584  obj_loss: 0.1674  noobj_loss: 0.1150  bbox_loss: 0.0049  cls_loss: 0.0090  
<<<iteration:[120/263] - total_loss: 0.2418  obj_loss: 0.1490  noobj_loss: 0.1100  bbox_loss: 0.0056  cls_loss: 0.0098  
<<<iteration:[140/263] - total_loss: 0.2534  obj_loss: 0.1514  noobj_loss: 0.1066  bbox_loss: 0.0076  cls_loss: 0.0109  
<<<iteration:[160/263] - total_loss: 0.2547  obj_loss: 0.1657  noobj_loss: 0.0998  bbox_loss: 0.0058  cls_loss: 0.0098  
<<<iteration:[180/263] - total_loss: 0.2551  obj_loss: 0.1649  noobj_loss: 0.1054  bbox_loss: 0.0055  cls_loss: 0.0100  
<<<iteration:[200/263] - total_loss: 0.2460  obj_loss: 0.1541  noobj_loss: 0.1111  bbox_loss: 0.0052  cls_loss: 0.0102  
<<<iteration:[220/263] - total_los

<<<iteration:[80/263] - total_loss: 0.2538  obj_loss: 0.1616  noobj_loss: 0.1153  bbox_loss: 0.0051  cls_loss: 0.0091  
<<<iteration:[100/263] - total_loss: 0.2570  obj_loss: 0.1649  noobj_loss: 0.1134  bbox_loss: 0.0052  cls_loss: 0.0096  
<<<iteration:[120/263] - total_loss: 0.2568  obj_loss: 0.1684  noobj_loss: 0.1079  bbox_loss: 0.0052  cls_loss: 0.0084  
<<<iteration:[140/263] - total_loss: 0.2560  obj_loss: 0.1650  noobj_loss: 0.1145  bbox_loss: 0.0049  cls_loss: 0.0094  
<<<iteration:[160/263] - total_loss: 0.2568  obj_loss: 0.1664  noobj_loss: 0.1151  bbox_loss: 0.0044  cls_loss: 0.0108  
<<<iteration:[180/263] - total_loss: 0.2459  obj_loss: 0.1565  noobj_loss: 0.1110  bbox_loss: 0.0052  cls_loss: 0.0079  
<<<iteration:[200/263] - total_loss: 0.2650  obj_loss: 0.1737  noobj_loss: 0.1146  bbox_loss: 0.0052  cls_loss: 0.0081  
<<<iteration:[220/263] - total_loss: 0.2488  obj_loss: 0.1589  noobj_loss: 0.1136  bbox_loss: 0.0049  cls_loss: 0.0087  
<<<iteration:[240/263] - total_lo

<<<iteration:[100/263] - total_loss: 0.2546  obj_loss: 0.1594  noobj_loss: 0.1171  bbox_loss: 0.0052  cls_loss: 0.0104  
<<<iteration:[120/263] - total_loss: 0.2713  obj_loss: 0.1707  noobj_loss: 0.1206  bbox_loss: 0.0053  cls_loss: 0.0136  
<<<iteration:[140/263] - total_loss: 0.2472  obj_loss: 0.1539  noobj_loss: 0.1136  bbox_loss: 0.0051  cls_loss: 0.0111  
<<<iteration:[160/263] - total_loss: 0.2504  obj_loss: 0.1611  noobj_loss: 0.1136  bbox_loss: 0.0048  cls_loss: 0.0084  
<<<iteration:[180/263] - total_loss: 0.2444  obj_loss: 0.1524  noobj_loss: 0.1165  bbox_loss: 0.0047  cls_loss: 0.0099  
<<<iteration:[200/263] - total_loss: 0.2484  obj_loss: 0.1556  noobj_loss: 0.1209  bbox_loss: 0.0049  cls_loss: 0.0080  
<<<iteration:[220/263] - total_loss: 0.2597  obj_loss: 0.1682  noobj_loss: 0.1172  bbox_loss: 0.0050  cls_loss: 0.0081  
<<<iteration:[240/263] - total_loss: 0.2584  obj_loss: 0.1598  noobj_loss: 0.1190  bbox_loss: 0.0058  cls_loss: 0.0100  
<<<iteration:[260/263] - total_l

<<<iteration:[120/263] - total_loss: 0.2596  obj_loss: 0.1712  noobj_loss: 0.1060  bbox_loss: 0.0052  cls_loss: 0.0092  
<<<iteration:[140/263] - total_loss: 0.2434  obj_loss: 0.1521  noobj_loss: 0.1174  bbox_loss: 0.0048  cls_loss: 0.0086  
<<<iteration:[160/263] - total_loss: 0.2426  obj_loss: 0.1505  noobj_loss: 0.1222  bbox_loss: 0.0046  cls_loss: 0.0080  
<<<iteration:[180/263] - total_loss: 0.2443  obj_loss: 0.1532  noobj_loss: 0.1134  bbox_loss: 0.0052  cls_loss: 0.0086  
<<<iteration:[200/263] - total_loss: 0.2423  obj_loss: 0.1552  noobj_loss: 0.1120  bbox_loss: 0.0045  cls_loss: 0.0087  
<<<iteration:[220/263] - total_loss: 0.2443  obj_loss: 0.1562  noobj_loss: 0.1192  bbox_loss: 0.0042  cls_loss: 0.0078  
<<<iteration:[240/263] - total_loss: 0.2327  obj_loss: 0.1435  noobj_loss: 0.1102  bbox_loss: 0.0049  cls_loss: 0.0095  
<<<iteration:[260/263] - total_loss: 0.2432  obj_loss: 0.1499  noobj_loss: 0.1192  bbox_loss: 0.0049  cls_loss: 0.0092  

epoch:92/100 - Train Loss: 0.24

<<<iteration:[140/263] - total_loss: 0.2558  obj_loss: 0.1640  noobj_loss: 0.1224  bbox_loss: 0.0044  cls_loss: 0.0086  
<<<iteration:[160/263] - total_loss: 0.2317  obj_loss: 0.1449  noobj_loss: 0.1171  bbox_loss: 0.0042  cls_loss: 0.0073  
<<<iteration:[180/263] - total_loss: 0.2587  obj_loss: 0.1640  noobj_loss: 0.1187  bbox_loss: 0.0049  cls_loss: 0.0108  
<<<iteration:[200/263] - total_loss: 0.2529  obj_loss: 0.1653  noobj_loss: 0.1170  bbox_loss: 0.0044  cls_loss: 0.0071  
<<<iteration:[220/263] - total_loss: 0.2448  obj_loss: 0.1535  noobj_loss: 0.1193  bbox_loss: 0.0047  cls_loss: 0.0084  
<<<iteration:[240/263] - total_loss: 0.2433  obj_loss: 0.1502  noobj_loss: 0.1220  bbox_loss: 0.0046  cls_loss: 0.0093  
<<<iteration:[260/263] - total_loss: 0.2489  obj_loss: 0.1576  noobj_loss: 0.1189  bbox_loss: 0.0047  cls_loss: 0.0083  

epoch:97/100 - Train Loss: 0.2479, Val Loss: 0.2504

<<<iteration:[20/263] - total_loss: 0.2618  obj_loss: 0.1692  noobj_loss: 0.1262  bbox_loss: 0.0046

0,1
Train Loss,█▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Train bbox Loss,█▄▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Train class Loss,█▄▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Train obj Loss,▂▁▃▄▅▅▆▆▇▇▇▇▇███████████████████████▇███
Val Loss,█▄▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Val bbox Loss,█▄▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Val class Loss,█▆▄▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Val obj Loss,▁▁▃▅▆▇▇▇▇██▇█▇█▇███▇████▇█▇█▇▇▇▇▇▇▇▇▇▇▆▇

0,1
Train Loss,0.24857
Train bbox Loss,0.0046
Train class Loss,0.00811
Train obj Loss,0.15668
Val Loss,0.25613
Val bbox Loss,0.01434
Val class Loss,0.01602
Val obj Loss,0.10882


# Test Dataset Inference

In [20]:
import numpy as np
import os 
import pandas as pd
import cv2
import torch
import matplotlib.pyplot as plt
from ipywidgets import interact
import albumentations as A
from albumentations.pytorch import ToTensorV2
import torchvision
from torch import nn
import torchsummary
from torch.utils.data import DataLoader
from collections import defaultdict
from torchvision.utils import make_grid

In [21]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [22]:
def load_model(ckpt_path, num_classes, device):
    checkpoint = torch.load(ckpt_path, map_location=device)
    model = YOLO_SWIN(num_classes=num_classes)
    model.load_state_dict(checkpoint)
    model = model.to(device)
    model.eval()
    return model

In [23]:
IMAGE_SIZE=448
transformer = A.Compose([
            A.Resize(height=IMAGE_SIZE, width=IMAGE_SIZE),
            A.Normalize(mean=(0.485, 0.456, 0.406),std=(0.229, 0.224, 0.225)),
            ToTensorV2(),
        ],
        bbox_params=A.BboxParams(format='yolo', label_fields=['class_ids']),
)

In [24]:
# ckpt_path="./trained_model/YOLO_SWIN_T_body_LR0.0001_AUG30/model_90.pth"
ckpt_path="/workspace/Plastic_Bottle_defect_detection/trained_model/YOLO_SWIN_T_neck_LR0.0001_AUG20/model_100.pth"
model = load_model(ckpt_path, NUM_CLASSES, device)

In [25]:
NECK_PATH = '/home/host_data/PET_data/Neck'
BODY_PATH = '/home/host_data/PET_data/Body'
test_dataset=PET_dataset("neck" ,neck_dir=NECK_PATH,body_dir=BODY_PATH,phase='test', transformer=transformer, aug=None)
test_dataloaders = DataLoader(test_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn)

start making augmented images-- augmented factor:0
total length of augmented images: 0


In [26]:
len(test_dataset)

25

In [27]:
@torch.no_grad()
def model_predict(image, model, conf_thres=0.2, iou_threshold=0.1):
    predictions = model(image)
    prediction = predictions.detach().cpu().squeeze(dim=0)
#     print(prediction.shape)
    
    grid_size = prediction.shape[-1]
    y_grid, x_grid = torch.meshgrid(torch.arange(grid_size), torch.arange(grid_size))
    stride_size = IMAGE_SIZE/grid_size

    conf = prediction[[0,5], ...].reshape(1, -1)
    xc = (prediction[[1,6], ...] * IMAGE_SIZE + x_grid*stride_size).reshape(1,-1)
    yc = (prediction[[2,7], ...] * IMAGE_SIZE + y_grid*stride_size).reshape(1,-1)
    w = (prediction[[3,8], ...] * IMAGE_SIZE).reshape(1,-1)
    h = (prediction[[4,9], ...] * IMAGE_SIZE).reshape(1,-1)
    cls = torch.max(prediction[10:, ...].reshape(NUM_CLASSES, -1), dim=0).indices.tile(1,2)
    
    x_min = xc - w/2
    y_min = yc - h/2
    x_max = xc + w/2
    y_max = yc + h/2

    prediction_res = torch.cat([x_min, y_min, x_max, y_max, conf, cls], dim=0)
    prediction_res = prediction_res.transpose(0,1)

    # x_min과 y_min이 음수가 되지않고, x_max와 y_max가 이미지 크기를 넘지 않게 제한
    prediction_res[:, 2].clip(min=0, max=image.shape[1]) 
    prediction_res[:, 3].clip(min=0, max=image.shape[0])
        
    pred_res = prediction_res[prediction_res[:, 4] > conf_thres]
    nms_index = torchvision.ops.nms(boxes=pred_res[:, 0:4], scores=pred_res[:, 4], iou_threshold=iou_threshold)
    pred_res_ = pred_res[nms_index].numpy()
    
    n_obj = pred_res_.shape[0]
    bboxes = np.zeros(shape=(n_obj, 4), dtype=np.float32)
    bboxes[:, 0:2] = (pred_res_[:, 0:2] + pred_res_[:, 2:4]) / 2
    bboxes[:, 2:4] = pred_res_[:, 2:4] - pred_res_[:, 0:2]
    scores = pred_res_[:, 4]
    class_ids = pred_res_[:, 5]
    
    # 이미지 값이 들어가면 모델을 통해서, 후처리까지 포함된 yolo 포멧의 box좌표, 그 좌표에 대한 confidence score
    # 그리고 class id를 반환
    return bboxes, scores, class_ids

In [28]:
pred_images = []
pred_labels =[]

for index, batch in enumerate(test_dataloaders):
    images = batch[0].to(device)
    bboxes, scores, class_ids = model_predict(images, model, conf_thres=0.1, iou_threshold=0.1)
    
    if len(bboxes) > 0:
        prediction_yolo = np.concatenate([bboxes, scores[:, np.newaxis], class_ids[:, np.newaxis]], axis=1)
    else:
        prediction_yolo = np.array([])
    
    # 텐서형의 이미지를 다시 unnormalize를 시키고, 다시 chw를 hwc로 바꾸고 넘파이로 바꾼다.
    np_image = make_grid(images[0], normalize=True).cpu().permute(1,2,0).numpy()
    pred_images.append(np_image)
    pred_labels.append(prediction_yolo)

    

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [29]:
from ipywidgets import interact

@interact(index=(0,len(pred_images)-1))
def show_result(index=0):
    print(pred_labels[index])
    if len(pred_labels[index]) > 0:
        result = visualize(pred_images[index], pred_labels[index][:, 0:4], pred_labels[index][:, 5])
    else:
        result = pred_images[index]
        
    plt.figure(figsize=(6,6))
    plt.imshow(result)
    plt.show()

interactive(children=(IntSlider(value=0, description='index', max=24), Output()), _dom_classes=('widget-intera…