In [1]:
import numpy as np
import os 
import pandas as pd
import cv2
import torch
import matplotlib.pyplot as plt
from ipywidgets import interact
import albumentations as A
from albumentations.pytorch import ToTensorV2
import torchvision
from torch import nn
import torchsummary
from torch.utils.data import DataLoader
from collections import defaultdict
from torchvision.utils import make_grid

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

# Utils

In [3]:
CLASS_NAME_TO_ID = {'stray': 0, 'target': 1}
CLASS_ID_TO_NAME = {0: 'stray', 1: 'target'}
BOX_COLOR = {'stray':(200, 0, 0), 'target':(0, 0, 200)}
TEXT_COLOR = (255, 255, 255)

def save_model(model_state, model_name, save_dir="./trained_model"):
    os.makedirs(save_dir, exist_ok=True)
    torch.save(model_state, os.path.join(save_dir, model_name))


def visualize_bbox(image, bbox, class_name, color=BOX_COLOR, thickness=2):
    x_center, y_center, w, h = bbox
    x_min = int(x_center - w/2)
    y_min = int(y_center - h/2)
    x_max = int(x_center + w/2)
    y_max = int(y_center + h/2)
    
    cv2.rectangle(image, (x_min, y_min), (x_max, y_max), color=color[class_name], thickness=thickness)
    
    ((text_width, text_height), _) = cv2.getTextSize(class_name, cv2.FONT_HERSHEY_SIMPLEX, 0.35, 1)    
    cv2.rectangle(image, (x_min, y_min - int(1.3 * text_height)), (x_min + text_width, y_min), color[class_name], -1)
    cv2.putText(
        image,
        text=class_name,
        org=(x_min, y_min - int(0.3 * text_height)),
        fontFace=cv2.FONT_HERSHEY_SIMPLEX,
        fontScale=0.35, 
        color=TEXT_COLOR, 
        lineType=cv2.LINE_AA,
    )
    return image


def visualize(image, bboxes, category_ids):
    img = image.copy()
    for bbox, category_id in zip(bboxes, category_ids):
#         print('category_id: ',category_id)
        class_name = CLASS_ID_TO_NAME[category_id.item()]
        img = visualize_bbox(img, bbox, class_name)
    return img

# Datasets

In [4]:
class Radio_dataset():
    def __init__(self, path, phase, transformer=None, aug=None, aug_factor=0):
        self.path=path
        self.phase=phase
        self.transformer=transformer
        self.aug=aug
        self.aug_factor=aug_factor
        
        self.image_files = sorted([fn for fn in os.listdir(self.path+"/"+self.phase+"/image") if fn.endswith("jpg")])
        self.label_files= sorted([lab for lab in os.listdir(self.path+"/"+self.phase+"/label") if lab.endswith("txt")])
        
        self.auged_img_list, self.auged_label_list=self.make_aug_list(self.image_files, self.label_files)
        
    def __getitem__(self,index):
        if(self.aug==None):
            filename, image = self.get_image(index)
            bboxes, class_ids = self.get_label(index)

            if(self.transformer):
                transformed_data=self.transformer(image=image, bboxes=bboxes, class_ids=class_ids)
                image = transformed_data['image']
                bboxes = np.array(transformed_data['bboxes'])
                class_ids = np.array(transformed_data['class_ids'])


            target = {}
    #         print(f'bboxes:{bboxes}\nclass_ids:{class_ids}\nlen_bboxes:{len(bboxes)}\nlen_class_ids:{len(class_ids)}')
    #         print(f'filename: {filename}')
            target["boxes"] = torch.Tensor(bboxes).float()
            target["labels"] = torch.Tensor(class_ids).long()

            ###
            bboxes=torch.Tensor(bboxes).float()
            class_ids=torch.Tensor(class_ids).long()
#             print(f"bboxes:{bboxes}\nclass_ids:{class_ids}\n---------")
            target = np.concatenate((bboxes, class_ids[:, np.newaxis]), axis=1)
            ###
        else:
            image=self.auged_img_list[index][1]
            target=self.auged_label_list[index]
            filename=self.auged_img_list[index][0]
        return image, target, filename
    
    def __len__(self, ):
        length=0
        if(self.aug==None):
            length=len(self.image_files)
        else:
            length=len(self.auged_img_list)
        return length
    
    def make_aug_list(self,ori_image_list,ori_label_files):
        aug_image_list=[]
        aug_label_list=[]
        
        print(f"start making augmented images-- augmented factor:{self.aug_factor}")
        for i in range(len(ori_image_list)):
            filename, ori_image = self.get_image(i)
            ori_bboxes, ori_class_ids = self.get_label(i)
            for j in range(self.aug_factor):
                auged_data=self.aug(image=ori_image, bboxes=ori_bboxes, class_ids=ori_class_ids)
                image = auged_data['image']
                bboxes = np.array(auged_data['bboxes'])
                class_ids = np.array(auged_data['class_ids'])
                
                bboxes=torch.Tensor(bboxes).float()
                class_ids=torch.Tensor(class_ids).long()
                
                aug_image_list.append((filename, image))
#                 print(f"{filename}, {bboxes}, {class_ids[:, np.newaxis]}")
                aug_label_list.append(np.concatenate((bboxes, class_ids[:, np.newaxis]), axis=1))
        
        print(f"total length of augmented images: {len(aug_image_list)}")
        
        return aug_image_list, aug_label_list
        
    
    def get_image(self, index): # 이미지 불러오는 함수
        filename = self.image_files[index]
        
        image_path = self.path+"/"+self.phase+"/image/"+filename
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        return filename, image
    
    def get_label(self, index): # label (box좌표, class_id) 불러오는 함수
        label_filename=self.label_files[index]
        label_path = self.path+"/"+self.phase+"/label/"+label_filename
        with open(label_path, 'r') as file:
            labels = file.readlines()
        
        class_ids=[]
        bboxes=[]
        for label in labels:
            label=label.replace("\n", "")
            obj=label.split(' ')[0]
            coor=label.split(' ')[1:]
            obj=int(obj)
            coor=list(map(float, coor))
            class_ids.append(obj)
            bboxes.append(coor)
            
        return bboxes, class_ids
    

In [9]:
IMAGE_SIZE = 448

transformer = A.Compose([ 
        # bounding box의 변환, augmentation에서 albumentations는 Detection 학습을 할 때 굉장히 유용하다. 
        A.Resize(height=IMAGE_SIZE, width=IMAGE_SIZE),
#         A.Normalize(mean=(0.485, 0.456, 0.406),std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
        # albumentations 라이브러리에서는 Normalization을 먼저 진행해 주고 tensor화를 진행해 주어야한다.
    ],
    # box 위치에 대한 transformation도 함께 진행된다. 
    bbox_params=A.BboxParams(format='yolo', label_fields=['class_ids']),
)

augmentator=A.Compose([
#     A.Resize(height=IMAGE_SIZE, width=IMAGE_SIZE),
    A.HorizontalFlip(p=0.7),
    A.VerticalFlip (p=0.5),
    A.HueSaturationValue(p=0.5),
    A.Resize(height=IMAGE_SIZE, width=IMAGE_SIZE),
    A.Normalize(mean=(0.485, 0.456, 0.406),std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
    ],
    bbox_params=A.BboxParams(format='yolo', label_fields=['class_ids']),
)

def collate_fn(batch):
    image_list = []
    target_list = []
    filename_list = []
    
    for a,b,c in batch:
        image_list.append(a)
        target_list.append(b)
        filename_list.append(c)

    return torch.stack(image_list, dim=0), target_list, filename_list


In [14]:
# NECK_PATH = '/home/host_data/PET_data/Neck'
# BODY_PATH = '/home/host_data/PET_data/Body'

NECK_PATH = '/home/host_data/PET_data_IP_AUG/aug_patched_Neck/'
BODY_PATH = '/home/host_data/PET_data_image_patching/Body'

PATH='/home/host_data/radio_signal_data_organized/'

# trainset_yes_aug=PET_dataset(part='neck',neck_dir=NECK_PATH,body_dir=BODY_PATH,phase='train', transformer=transformer, aug=augmentator, aug_factor=5)
trainset_no_aug=Radio_dataset(path=PATH, phase='valid', transformer=transformer, aug=None, aug_factor=0)


start making augmented images-- augmented factor:0
total length of augmented images: 0


In [15]:
len(trainset_no_aug)

565

In [16]:
@interact(index=(0, len(trainset_no_aug)-1))

def show_sample(index=0):
    image, target, filename = trainset_no_aug[index]
    image=image.permute(1,2,0).numpy()
    img_H, img_W, _ = image.shape
    print(filename)
    print(image.shape)
#     print(image)

#     bboxes = target['boxes']
#     class_ids = target["labels"]
    
    ###
    bboxes = target[:, 0:4]
    class_ids = target[:, 4]
    ###
    bboxes[:, [0,2]] *= img_W
    bboxes[:, [1,3]] *= img_H

    canvas = visualize(image, bboxes, class_ids)
    plt.figure(figsize=(6,6))
    plt.imshow(canvas)
    plt.axis('off')
    plt.show()

# show_sample()

interactive(children=(IntSlider(value=0, description='index', max=564), Output()), _dom_classes=('widget-inter…

In [13]:
@interact(index=(0, len(trainset_yes_aug)-1))

def show_sample(index=0):
    image, target, filename = trainset_yes_aug[index]
    image=image.permute(1,2,0).numpy()
    img_H, img_W, _ = image.shape
    print(filename)
    print(image.shape)
#     print(image)

#     bboxes = target['boxes']
#     class_ids = target["labels"]
    ###
    bboxes = target[:, 0:4]
    class_ids = target[:, 4]
    ###
    bboxes[:, [0,2]] *= img_W
    bboxes[:, [1,3]] *= img_H
    print(bboxes)

    canvas = visualize(image, bboxes, class_ids)
    plt.figure(figsize=(6,6))
    plt.imshow(canvas)
    plt.axis('off')
    plt.show()

# show_sample()

NameError: name 'trainset_yes_aug' is not defined

## Model

In [6]:
class YOLO_SWIN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()

        self.num_classes = num_classes
        self.num_bboxes = 2
        self.grid_size = 7

#         resnet18 = torchvision.models.resnet18(pretrained = True)
        swin=torchvision.models.swin_v2_t(weights='IMAGENET1K_V1')
        layers = [m for m in swin.children()] #Resnet에서 Yolo에서 가져올수 있을만한 layer만 선별적으로 가져오기 위해서

        # 기존 Resnet18의 layer들중에서 맨 뒤에 두개만 제외하고 다 가져와서 Backbone으로 사용
        self.backbone = nn.Sequential(*layers[:-3]) 
        self.head = nn.Sequential(
                nn.Conv2d(in_channels=768, out_channels=1024, kernel_size=1, padding=0,bias=False),
                nn.BatchNorm2d(1024),
                nn.ReLU(inplace=True),
                nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=3, padding=1,bias=False),
                nn.BatchNorm2d(1024),
                nn.ReLU(inplace=True),
                nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=3, padding=1,bias=False),
                nn.BatchNorm2d(1024),
                nn.ReLU(inplace=True),
                nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=3, padding=1,bias=False),
                nn.BatchNorm2d(1024),
                nn.ReLU(inplace=True),

                nn.Conv2d(in_channels=1024, out_channels=(4+1)*self.num_bboxes+num_classes, kernel_size=1, padding=0, bias=False),
                nn.AdaptiveAvgPool2d(output_size=(self.grid_size, self.grid_size))
            )

    def forward(self, x):
        out = self.backbone(x)
        # out = self.neck(out)
        out = self.head(out) # input (batch, 3, 448, 448) -> output feature (batch, 12, 7, 7)
        return out


In [7]:
NUM_CLASSES = 2
model = YOLO_SWIN(num_classes=NUM_CLASSES)
model.to(device)

YOLO_SWIN(
  (backbone): Sequential(
    (0): Sequential(
      (0): Sequential(
        (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
        (1): Permute()
        (2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
      )
      (1): Sequential(
        (0): SwinTransformerBlockV2(
          (norm1): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
          (attn): ShiftedWindowAttentionV2(
            (qkv): Linear(in_features=96, out_features=288, bias=True)
            (proj): Linear(in_features=96, out_features=96, bias=True)
            (cpb_mlp): Sequential(
              (0): Linear(in_features=2, out_features=512, bias=True)
              (1): ReLU(inplace=True)
              (2): Linear(in_features=512, out_features=3, bias=False)
            )
          )
          (stochastic_depth): StochasticDepth(p=0.0, mode=row)
          (norm2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
          (mlp): MLP(
            (0): Linear(in_features=96, out_f

In [8]:
torchsummary.summary(model, (3,448,448))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 96, 112, 112]           4,704
           Permute-2         [-1, 112, 112, 96]               0
         LayerNorm-3         [-1, 112, 112, 96]             192
            Linear-4          [-1, 15, 15, 512]           1,536
              ReLU-5          [-1, 15, 15, 512]               0
            Linear-6            [-1, 15, 15, 3]           1,536
ShiftedWindowAttentionV2-7         [-1, 112, 112, 96]               0
         LayerNorm-8         [-1, 112, 112, 96]             192
   StochasticDepth-9         [-1, 112, 112, 96]               0
           Linear-10        [-1, 112, 112, 384]          37,248
             GELU-11        [-1, 112, 112, 384]               0
          Dropout-12        [-1, 112, 112, 384]               0
           Linear-13         [-1, 112, 112, 96]          36,960
          Dropout-14         [-1,

In [9]:
x = torch.randn(1, 3, 448, 448).to(device)
with torch.no_grad():
    y = model(x)
print(y.shape)

torch.Size([1, 12, 7, 7])


# Loss func

In [10]:
class YOLO_LOSS():
    def __init__(self, num_classes, device, lambda_coord=5., lambda_noobj=0.5):
        self.num_classes = num_classes
        self.device = device
        self.grid_size = 7
        self.lambda_coord = lambda_coord
        self.lambda_noobj = lambda_noobj
        self.mse_loss = nn.MSELoss(reduction="sum")

    def __call__(self, predictions, targets):
        self.batch_size, _, _, _ = predictions.shape
        groundtruths = self.build_batch_target_grid(targets)
        groundtruths = groundtruths.to(self.device)
        
        with torch.no_grad():
            iou1 = self.get_IoU(predictions[:, 1:5, ...], groundtruths[:, 1:5, ...])
            iou2 = self.get_IoU(predictions[:, 6:10, ...], groundtruths[:, 1:5, ...])

        ious = torch.stack([iou1, iou2], dim=1)
        max_iou, best_box = ious.max(dim=1, keepdim=True)
        max_iou = torch.cat([max_iou, max_iou], dim=1)
        best_box = torch.cat([best_box.eq(0), best_box.eq(1)], dim=1)

        predictions_ = predictions[:, :5*2, ...].reshape(self.batch_size, 2, 5, self.grid_size, self.grid_size)
        obj_pred = predictions_[:, :, 0, ...]
        xy_pred = predictions_[:, :, 1:3, ...]
        wh_pred = predictions_[:, :, 3:5, ...]
        cls_pred = predictions[:, 5*2:, ...]

        groundtruths_ = groundtruths[:, :5, ...].reshape(self.batch_size, 1, 5, self.grid_size, self.grid_size)
        obj_target = groundtruths_[:, :, 0, ...]
        xy_target = groundtruths_[:, :, 1:3, ...]
        wh_target= groundtruths_[:, :, 3:5, ...]
        cls_target = groundtruths[:, 5:, ...]
        
        positive = obj_target * best_box

        obj_loss = self.mse_loss(positive * obj_pred, positive * ious)
        noobj_loss = self.mse_loss((1 - positive) * obj_pred, ious*0)
        xy_loss = self.mse_loss(positive.unsqueeze(dim=2) * xy_pred, positive.unsqueeze(dim=2) * xy_target)
        wh_loss = self.mse_loss(positive.unsqueeze(dim=2) * (wh_pred.sign() * (wh_pred.abs() + 1e-8).sqrt()),
                           positive.unsqueeze(dim=2) * (wh_target + 1e-8).sqrt())
        cls_loss = self.mse_loss(obj_target * cls_pred, cls_target)
        
        obj_loss /= self.batch_size
        noobj_loss /= self.batch_size
        bbox_loss = (xy_loss+wh_loss) / self.batch_size
        cls_loss /= self.batch_size
        
        total_loss = obj_loss + self.lambda_noobj*noobj_loss + self.lambda_coord*bbox_loss + cls_loss
        return total_loss, (obj_loss.item(), noobj_loss.item(), bbox_loss.item(), cls_loss.item())
    
    def build_target_grid(self, target):
        target_grid = torch.zeros((1+4+self.num_classes, self.grid_size, self.grid_size), device=self.device)

        for gt in target:
            xc, yc, w, h, cls_id = gt
            xn = (xc % (1/self.grid_size))
            yn = (yc % (1/self.grid_size))
            cls_id = int(cls_id)

            i_grid = int(xc * self.grid_size)
            j_grid = int(yc * self.grid_size)
            target_grid[0, j_grid, i_grid] = 1
            target_grid[1:5, j_grid, i_grid] = torch.Tensor([xn,yn,w,h])
#             print(5+cls_id, j_grid, i_grid)
            target_grid[5+cls_id, j_grid, i_grid] = 1

        return target_grid
    
    def build_batch_target_grid(self, targets):
        target_grid_batch = torch.stack([self.build_target_grid(target) for target in targets], dim=0)
        return target_grid_batch
    
    def get_IoU(self, cbox1, cbox2):
        box1 = self.xywh_to_xyxy(cbox1)
        box2 = self.xywh_to_xyxy(cbox2)

        x1 = torch.max(box1[:, 0, ...], box2[:, 0, ...])
        y1 = torch.max(box1[:, 1, ...], box2[:, 1, ...])
        x2 = torch.min(box1[:, 2, ...], box2[:, 2, ...])
        y2 = torch.min(box1[:, 3, ...], box2[:, 3, ...])

        intersection = (x2-x1).clamp(min=0) * (y2-y1).clamp(min=0)
        union = abs(cbox1[:, 2, ...]*cbox1[:, 3, ...]) + \
                abs(cbox2[:, 2, ...]*cbox2[:, 3, ...]) - intersection

        intersection[intersection.gt(0)] = intersection[intersection.gt(0)] / union[intersection.gt(0)]
        return intersection
    
    def generate_xy_normed_grid(self):
        y_offset, x_offset = torch.meshgrid(torch.arange(self.grid_size), torch.arange(self.grid_size))
        xy_grid = torch.stack([x_offset, y_offset], dim=0)
        xy_normed_grid = xy_grid / self.grid_size
        return xy_normed_grid.to(self.device)

    def xywh_to_xyxy(self, bboxes):
        xy_normed_grid = self.generate_xy_normed_grid()
        xcyc = bboxes[:,0:2,...] + xy_normed_grid.tile(self.batch_size, 1,1,1)
        wh = bboxes[:,2:4,...]
        x1y1 = xcyc - (wh/2)
        x2y2 = xcyc + (wh/2)
        return torch.cat([x1y1, x2y2], dim=1)

## Train

In [11]:
def train_one_epoch(dataloaders, model, criterion, optimizer, device):
    train_loss = defaultdict(float)
    val_loss = defaultdict(float)
    
    for phase in ["train", "val"]:
        if phase == "train":
            model.train()
        else:
            model.eval()
        
        running_loss = defaultdict(float)
        for index, batch in enumerate(dataloaders[phase]):
            images = batch[0].to(device)
            targets = batch[1]
            filenames = batch[2]
            
            with torch.set_grad_enabled(phase == "train"): # phase가 train 일때만 gradient 추적기능을 킨다.
                predictions = model(images) #prediction shape=> B,12,7,7
#             print(f"predictions:{predictions}, \ntargets: {targets}\n")
            loss, (obj_loss, noobj_loss, bbox_loss, cls_loss) = criterion(predictions, targets)
#             print(f"loss:{loss}, obj_loss:{obj_loss}, noobj_loss:{noobj_loss}\nbbox_loss:{bbox_loss}, cls_loss:{cls_loss}\n--------------\n")
            if phase == "train":
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                # 현재 epoch단계에서 loss가 얼마인지 running loss 가출력
                running_loss["total_loss"] += loss.item()
                running_loss["obj_loss"] += obj_loss
                running_loss["noobj_loss"] += noobj_loss
                running_loss["bbox_loss"] += bbox_loss
                running_loss["cls_loss"] += cls_loss
                
                train_loss["total_loss"] += loss.item()
                train_loss["obj_loss"] += obj_loss
                train_loss["noobj_loss"] += noobj_loss
                train_loss["bbox_loss"] += bbox_loss
                train_loss["cls_loss"] += cls_loss
                
                if (index > 0) and (index % VERBOSE_FREQ) == 0:
                    text = f"<<<iteration:[{index}/{len(dataloaders[phase])}] - "
                    for k, v in running_loss.items():
                        text += f"{k}: {v/VERBOSE_FREQ:.4f}  "
                        running_loss[k] = 0.
                    print(text)
            else:
                val_loss["total_loss"] += loss.item()
                val_loss["obj_loss"] += obj_loss
                val_loss["noobj_loss"] += noobj_loss
                val_loss["bbox_loss"] += bbox_loss
                val_loss["cls_loss"] += cls_loss

    for k in train_loss.keys():
        train_loss[k] /= len(dataloaders["train"])
        val_loss[k] /= len(dataloaders["val"])
    return train_loss, val_loss

In [12]:
def build_dataloader(PATH, batch_size=2, aug_factor=0):
    IMAGE_SIZE = 448
    transformer = A.Compose([
            A.Resize(height=IMAGE_SIZE, width=IMAGE_SIZE),
            A.Normalize(mean=(0.485, 0.456, 0.406),std=(0.229, 0.224, 0.225)),
            ToTensorV2(),
        ],
        bbox_params=A.BboxParams(format='yolo', label_fields=['class_ids']),
    )
    augmentator=A.Compose([
        A.HorizontalFlip(p=0.7),
        A.VerticalFlip (p=0.5),
        A.HueSaturationValue(p=0.5),
        A.Resize(height=IMAGE_SIZE, width=IMAGE_SIZE),
        A.Normalize(mean=(0.485, 0.456, 0.406),std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
        ],
        bbox_params=A.BboxParams(format='yolo', label_fields=['class_ids']),
    )
    
#     Radio_dataset(path=PATH, phase='train', transformer=transformer, aug=None)
#     trainset_no_aug=Radio_dataset(path=PATH, phase='train', transformer=transformer, aug=augmentator, aug_factor=2)
    dataloaders = {}
    train_dataset=Radio_dataset(path=PATH, phase='train', transformer=transformer, aug=augmentator, aug_factor=2)
    dataloaders["train"] = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

    val_dataset=Radio_dataset(path=PATH, phase='valid', transformer=transformer, aug=augmentator, aug_factor=2)
    dataloaders["val"] = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
    print(f"trainset:{len(train_dataset)} validset:{len(val_dataset)}")
    return dataloaders

In [13]:
# data_dir = "/content/drive/MyDrive/fastCamMedicalProj/DATASET/DATASET/Detection/"
# NECK_PATH = '/home/host_data/PET_data/Neck'
# BODY_PATH = '/home/host_data/PET_data/Body'
# NECK_PATH = '/home/host_data/PET_data_IP_AUG/aug_patched_Neck'
# BODY_PATH = '/home/host_data/PET_data_IP_AUG/Body'

path='/home/host_data/radio_signal_data_organized/'
is_cuda = True

NUM_CLASSES = 2
IMAGE_SIZE = 448
BATCH_SIZE = 16
VERBOSE_FREQ = 20
LR=0.0001
AUG_FACTOR=4
PATCH_FACTOR=50
BACKBONE="YOLO_SWIN_T"
PART="RADIO"
num_epochs = 100
# DEVICE = torch.device('cuda' if torch.cuda.is_available and is_cuda else 'cpu')

dataloaders = build_dataloader(PATH=path,batch_size=BATCH_SIZE, aug_factor=AUG_FACTOR)
model = YOLO_SWIN(num_classes=NUM_CLASSES)
model = model.to(device)
criterion = YOLO_LOSS(num_classes=NUM_CLASSES, device=device)
optimizer = torch.optim.SGD(model.parameters(), lr=LR)

start making augmented images-- augmented factor:2
total length of augmented images: 3898
start making augmented images-- augmented factor:2
total length of augmented images: 1130
trainset:3898 validset:1130


In [14]:
import wandb
import random

# start a new wandb run to track this script
wandb.init(
    # set the wandb project where this run will be logged
    project="yolo_swin_RADIO",
    
    # track hyperparameters and run metadata
    config={
    "learning_rate": LR,
    "batch_size": BATCH_SIZE,
    "architecture": BACKBONE,
    "dataset": PART,
    "epochs": num_epochs,
    "patch factor":PATCH_FACTOR,
    "aug factor":AUG_FACTOR,
    }
)

[34m[1mwandb[0m: Currently logged in as: [33mgomduribo[0m ([33murp[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [15]:
best_epoch = 0
best_score = float('inf')
train_losses = []
val_losses = []

for epoch in range(num_epochs):
    train_loss, val_loss = train_one_epoch(dataloaders, model, criterion, optimizer, device)
    train_losses.append(train_loss)
    val_losses.append(val_loss)
#     train_loss["obj_loss"] += obj_loss
#     train_loss["noobj_loss"] += noobj_loss
#     train_loss["bbox_loss"] += bbox_loss
#     train_loss["cls_loss"] += cls_loss
    wandb.log({"Train Loss": train_loss['total_loss'],
               "Train obj Loss":train_loss["obj_loss"],
               "Train bbox Loss":train_loss["bbox_loss"],
               "Train class Loss":train_loss["cls_loss"],
               "Val Loss": val_loss['total_loss'],
               "Val obj Loss":val_loss["obj_loss"],
               "Val bbox Loss":val_loss["bbox_loss"],
               "Val class Loss":val_loss["cls_loss"],})
    print(f"\nepoch:{epoch+1}/{num_epochs} - Train Loss: {train_loss['total_loss']:.4f}, Val Loss: {val_loss['total_loss']:.4f}\n")
    
    if (epoch+1) % 10 == 0:
        save_model(model.state_dict(), f'model_{epoch+1}.pth', save_dir=f"./trained_model/{BACKBONE}_{PART}_LR{LR}_IP{PATCH_FACTOR}_AUG{AUG_FACTOR}")
wandb.finish()

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


<<<iteration:[20/244] - total_loss: 8.3863  obj_loss: 0.1300  noobj_loss: 4.1590  bbox_loss: 1.1092  cls_loss: 0.6306  
<<<iteration:[40/244] - total_loss: 3.1869  obj_loss: 0.0331  noobj_loss: 2.5107  bbox_loss: 0.2963  cls_loss: 0.4168  
<<<iteration:[60/244] - total_loss: 2.4855  obj_loss: 0.0258  noobj_loss: 2.1914  bbox_loss: 0.2101  cls_loss: 0.3134  
<<<iteration:[80/244] - total_loss: 2.3666  obj_loss: 0.0188  noobj_loss: 2.0186  bbox_loss: 0.2095  cls_loss: 0.2907  
<<<iteration:[100/244] - total_loss: 2.2551  obj_loss: 0.0177  noobj_loss: 1.7921  bbox_loss: 0.2086  cls_loss: 0.2983  
<<<iteration:[120/244] - total_loss: 2.0739  obj_loss: 0.0193  noobj_loss: 1.6154  bbox_loss: 0.1973  cls_loss: 0.2605  
<<<iteration:[140/244] - total_loss: 2.3663  obj_loss: 0.0208  noobj_loss: 1.8065  bbox_loss: 0.2327  cls_loss: 0.2787  
<<<iteration:[160/244] - total_loss: 1.9459  obj_loss: 0.0163  noobj_loss: 1.3542  bbox_loss: 0.1976  cls_loss: 0.2647  
<<<iteration:[180/244] - total_loss:

<<<iteration:[140/244] - total_loss: 0.3418  obj_loss: 0.0430  noobj_loss: 0.1404  bbox_loss: 0.0177  cls_loss: 0.1401  
<<<iteration:[160/244] - total_loss: 0.3682  obj_loss: 0.0439  noobj_loss: 0.1529  bbox_loss: 0.0158  cls_loss: 0.1688  
<<<iteration:[180/244] - total_loss: 0.3392  obj_loss: 0.0538  noobj_loss: 0.1357  bbox_loss: 0.0141  cls_loss: 0.1468  
<<<iteration:[200/244] - total_loss: 0.3855  obj_loss: 0.0458  noobj_loss: 0.1322  bbox_loss: 0.0179  cls_loss: 0.1840  
<<<iteration:[220/244] - total_loss: 0.3840  obj_loss: 0.0411  noobj_loss: 0.1338  bbox_loss: 0.0168  cls_loss: 0.1919  
<<<iteration:[240/244] - total_loss: 0.3417  obj_loss: 0.0490  noobj_loss: 0.1274  bbox_loss: 0.0138  cls_loss: 0.1599  

epoch:6/100 - Train Loss: 0.3673, Val Loss: 0.3350

<<<iteration:[20/244] - total_loss: 0.3196  obj_loss: 0.0405  noobj_loss: 0.1294  bbox_loss: 0.0171  cls_loss: 0.1290  
<<<iteration:[40/244] - total_loss: 0.3146  obj_loss: 0.0471  noobj_loss: 0.1178  bbox_loss: 0.0159  


epoch:11/100 - Train Loss: 0.2819, Val Loss: 0.2731

<<<iteration:[20/244] - total_loss: 0.3066  obj_loss: 0.0605  noobj_loss: 0.0739  bbox_loss: 0.0106  cls_loss: 0.1562  
<<<iteration:[40/244] - total_loss: 0.2781  obj_loss: 0.0596  noobj_loss: 0.0687  bbox_loss: 0.0102  cls_loss: 0.1332  
<<<iteration:[60/244] - total_loss: 0.2731  obj_loss: 0.0626  noobj_loss: 0.0781  bbox_loss: 0.0109  cls_loss: 0.1169  
<<<iteration:[80/244] - total_loss: 0.2516  obj_loss: 0.0478  noobj_loss: 0.0665  bbox_loss: 0.0100  cls_loss: 0.1207  
<<<iteration:[100/244] - total_loss: 0.2753  obj_loss: 0.0615  noobj_loss: 0.0710  bbox_loss: 0.0094  cls_loss: 0.1315  
<<<iteration:[120/244] - total_loss: 0.2790  obj_loss: 0.0602  noobj_loss: 0.0612  bbox_loss: 0.0095  cls_loss: 0.1405  
<<<iteration:[140/244] - total_loss: 0.2547  obj_loss: 0.0580  noobj_loss: 0.0628  bbox_loss: 0.0083  cls_loss: 0.1239  
<<<iteration:[160/244] - total_loss: 0.2758  obj_loss: 0.0570  noobj_loss: 0.0638  bbox_loss: 0.0105  c

<<<iteration:[140/244] - total_loss: 0.2288  obj_loss: 0.0639  noobj_loss: 0.0480  bbox_loss: 0.0065  cls_loss: 0.1084  
<<<iteration:[160/244] - total_loss: 0.2678  obj_loss: 0.0653  noobj_loss: 0.0531  bbox_loss: 0.0066  cls_loss: 0.1429  
<<<iteration:[180/244] - total_loss: 0.2294  obj_loss: 0.0678  noobj_loss: 0.0560  bbox_loss: 0.0072  cls_loss: 0.0978  
<<<iteration:[200/244] - total_loss: 0.2439  obj_loss: 0.0697  noobj_loss: 0.0534  bbox_loss: 0.0077  cls_loss: 0.1088  
<<<iteration:[220/244] - total_loss: 0.2557  obj_loss: 0.0586  noobj_loss: 0.0626  bbox_loss: 0.0092  cls_loss: 0.1197  
<<<iteration:[240/244] - total_loss: 0.2594  obj_loss: 0.0673  noobj_loss: 0.0513  bbox_loss: 0.0083  cls_loss: 0.1249  

epoch:17/100 - Train Loss: 0.2493, Val Loss: 0.2377

<<<iteration:[20/244] - total_loss: 0.2356  obj_loss: 0.0623  noobj_loss: 0.0552  bbox_loss: 0.0077  cls_loss: 0.1070  
<<<iteration:[40/244] - total_loss: 0.2159  obj_loss: 0.0584  noobj_loss: 0.0513  bbox_loss: 0.0073 


epoch:22/100 - Train Loss: 0.2266, Val Loss: 0.2235

<<<iteration:[20/244] - total_loss: 0.2305  obj_loss: 0.0719  noobj_loss: 0.0501  bbox_loss: 0.0062  cls_loss: 0.1027  
<<<iteration:[40/244] - total_loss: 0.2226  obj_loss: 0.0595  noobj_loss: 0.0451  bbox_loss: 0.0070  cls_loss: 0.1055  
<<<iteration:[60/244] - total_loss: 0.2249  obj_loss: 0.0679  noobj_loss: 0.0480  bbox_loss: 0.0070  cls_loss: 0.0981  
<<<iteration:[80/244] - total_loss: 0.2656  obj_loss: 0.0792  noobj_loss: 0.0450  bbox_loss: 0.0063  cls_loss: 0.1326  
<<<iteration:[100/244] - total_loss: 0.2244  obj_loss: 0.0665  noobj_loss: 0.0426  bbox_loss: 0.0059  cls_loss: 0.1070  
<<<iteration:[120/244] - total_loss: 0.2303  obj_loss: 0.0655  noobj_loss: 0.0449  bbox_loss: 0.0064  cls_loss: 0.1103  
<<<iteration:[140/244] - total_loss: 0.2436  obj_loss: 0.0641  noobj_loss: 0.0509  bbox_loss: 0.0063  cls_loss: 0.1227  
<<<iteration:[160/244] - total_loss: 0.2205  obj_loss: 0.0616  noobj_loss: 0.0479  bbox_loss: 0.0059  c

<<<iteration:[140/244] - total_loss: 0.1986  obj_loss: 0.0636  noobj_loss: 0.0413  bbox_loss: 0.0049  cls_loss: 0.0900  
<<<iteration:[160/244] - total_loss: 0.2233  obj_loss: 0.0697  noobj_loss: 0.0455  bbox_loss: 0.0063  cls_loss: 0.0995  
<<<iteration:[180/244] - total_loss: 0.1976  obj_loss: 0.0702  noobj_loss: 0.0415  bbox_loss: 0.0056  cls_loss: 0.0786  
<<<iteration:[200/244] - total_loss: 0.2155  obj_loss: 0.0673  noobj_loss: 0.0421  bbox_loss: 0.0057  cls_loss: 0.0985  
<<<iteration:[220/244] - total_loss: 0.2092  obj_loss: 0.0662  noobj_loss: 0.0423  bbox_loss: 0.0054  cls_loss: 0.0946  
<<<iteration:[240/244] - total_loss: 0.2049  obj_loss: 0.0692  noobj_loss: 0.0425  bbox_loss: 0.0059  cls_loss: 0.0848  

epoch:28/100 - Train Loss: 0.2134, Val Loss: 0.2165

<<<iteration:[20/244] - total_loss: 0.2199  obj_loss: 0.0720  noobj_loss: 0.0459  bbox_loss: 0.0059  cls_loss: 0.0954  
<<<iteration:[40/244] - total_loss: 0.2372  obj_loss: 0.0771  noobj_loss: 0.0488  bbox_loss: 0.0056 


epoch:33/100 - Train Loss: 0.2054, Val Loss: 0.2120

<<<iteration:[20/244] - total_loss: 0.2080  obj_loss: 0.0765  noobj_loss: 0.0416  bbox_loss: 0.0053  cls_loss: 0.0844  
<<<iteration:[40/244] - total_loss: 0.2062  obj_loss: 0.0698  noobj_loss: 0.0392  bbox_loss: 0.0053  cls_loss: 0.0902  
<<<iteration:[60/244] - total_loss: 0.2032  obj_loss: 0.0637  noobj_loss: 0.0415  bbox_loss: 0.0053  cls_loss: 0.0921  
<<<iteration:[80/244] - total_loss: 0.2101  obj_loss: 0.0616  noobj_loss: 0.0396  bbox_loss: 0.0059  cls_loss: 0.0993  
<<<iteration:[100/244] - total_loss: 0.2289  obj_loss: 0.0659  noobj_loss: 0.0395  bbox_loss: 0.0047  cls_loss: 0.1196  
<<<iteration:[120/244] - total_loss: 0.1944  obj_loss: 0.0685  noobj_loss: 0.0408  bbox_loss: 0.0054  cls_loss: 0.0785  
<<<iteration:[140/244] - total_loss: 0.2297  obj_loss: 0.0678  noobj_loss: 0.0389  bbox_loss: 0.0055  cls_loss: 0.1150  
<<<iteration:[160/244] - total_loss: 0.2228  obj_loss: 0.0737  noobj_loss: 0.0400  bbox_loss: 0.0051  c

<<<iteration:[140/244] - total_loss: 0.1850  obj_loss: 0.0641  noobj_loss: 0.0389  bbox_loss: 0.0055  cls_loss: 0.0740  
<<<iteration:[160/244] - total_loss: 0.2030  obj_loss: 0.0595  noobj_loss: 0.0399  bbox_loss: 0.0068  cls_loss: 0.0893  
<<<iteration:[180/244] - total_loss: 0.1850  obj_loss: 0.0716  noobj_loss: 0.0385  bbox_loss: 0.0069  cls_loss: 0.0598  
<<<iteration:[200/244] - total_loss: 0.2045  obj_loss: 0.0591  noobj_loss: 0.0396  bbox_loss: 0.0053  cls_loss: 0.0988  
<<<iteration:[220/244] - total_loss: 0.2061  obj_loss: 0.0670  noobj_loss: 0.0435  bbox_loss: 0.0070  cls_loss: 0.0825  
<<<iteration:[240/244] - total_loss: 0.2061  obj_loss: 0.0713  noobj_loss: 0.0400  bbox_loss: 0.0060  cls_loss: 0.0848  

epoch:39/100 - Train Loss: 0.2013, Val Loss: 0.2121

<<<iteration:[20/244] - total_loss: 0.2073  obj_loss: 0.0700  noobj_loss: 0.0412  bbox_loss: 0.0057  cls_loss: 0.0882  
<<<iteration:[40/244] - total_loss: 0.2244  obj_loss: 0.0805  noobj_loss: 0.0402  bbox_loss: 0.0052 


epoch:44/100 - Train Loss: 0.1957, Val Loss: 0.2076

<<<iteration:[20/244] - total_loss: 0.1832  obj_loss: 0.0672  noobj_loss: 0.0413  bbox_loss: 0.0057  cls_loss: 0.0670  
<<<iteration:[40/244] - total_loss: 0.1819  obj_loss: 0.0748  noobj_loss: 0.0375  bbox_loss: 0.0047  cls_loss: 0.0648  
<<<iteration:[60/244] - total_loss: 0.1817  obj_loss: 0.0663  noobj_loss: 0.0374  bbox_loss: 0.0051  cls_loss: 0.0712  
<<<iteration:[80/244] - total_loss: 0.1880  obj_loss: 0.0734  noobj_loss: 0.0373  bbox_loss: 0.0048  cls_loss: 0.0717  
<<<iteration:[100/244] - total_loss: 0.1961  obj_loss: 0.0762  noobj_loss: 0.0413  bbox_loss: 0.0047  cls_loss: 0.0755  
<<<iteration:[120/244] - total_loss: 0.2370  obj_loss: 0.0719  noobj_loss: 0.0443  bbox_loss: 0.0048  cls_loss: 0.1189  
<<<iteration:[140/244] - total_loss: 0.2187  obj_loss: 0.0725  noobj_loss: 0.0393  bbox_loss: 0.0044  cls_loss: 0.1044  
<<<iteration:[160/244] - total_loss: 0.1923  obj_loss: 0.0691  noobj_loss: 0.0395  bbox_loss: 0.0048  c

<<<iteration:[140/244] - total_loss: 0.2005  obj_loss: 0.0693  noobj_loss: 0.0410  bbox_loss: 0.0050  cls_loss: 0.0859  
<<<iteration:[160/244] - total_loss: 0.2111  obj_loss: 0.0687  noobj_loss: 0.0378  bbox_loss: 0.0046  cls_loss: 0.1006  
<<<iteration:[180/244] - total_loss: 0.2059  obj_loss: 0.0635  noobj_loss: 0.0425  bbox_loss: 0.0057  cls_loss: 0.0925  
<<<iteration:[200/244] - total_loss: 0.1572  obj_loss: 0.0615  noobj_loss: 0.0399  bbox_loss: 0.0054  cls_loss: 0.0489  
<<<iteration:[220/244] - total_loss: 0.1907  obj_loss: 0.0848  noobj_loss: 0.0357  bbox_loss: 0.0044  cls_loss: 0.0660  
<<<iteration:[240/244] - total_loss: 0.2111  obj_loss: 0.0756  noobj_loss: 0.0383  bbox_loss: 0.0055  cls_loss: 0.0890  

epoch:50/100 - Train Loss: 0.1997, Val Loss: 0.2152

<<<iteration:[20/244] - total_loss: 0.2009  obj_loss: 0.0699  noobj_loss: 0.0395  bbox_loss: 0.0060  cls_loss: 0.0810  
<<<iteration:[40/244] - total_loss: 0.1978  obj_loss: 0.0715  noobj_loss: 0.0361  bbox_loss: 0.0055 


epoch:55/100 - Train Loss: 0.1880, Val Loss: 0.2093

<<<iteration:[20/244] - total_loss: 0.1959  obj_loss: 0.0689  noobj_loss: 0.0397  bbox_loss: 0.0045  cls_loss: 0.0848  
<<<iteration:[40/244] - total_loss: 0.1975  obj_loss: 0.0786  noobj_loss: 0.0392  bbox_loss: 0.0047  cls_loss: 0.0757  
<<<iteration:[60/244] - total_loss: 0.2087  obj_loss: 0.0776  noobj_loss: 0.0431  bbox_loss: 0.0052  cls_loss: 0.0836  
<<<iteration:[80/244] - total_loss: 0.1690  obj_loss: 0.0646  noobj_loss: 0.0398  bbox_loss: 0.0048  cls_loss: 0.0607  
<<<iteration:[100/244] - total_loss: 0.2057  obj_loss: 0.0589  noobj_loss: 0.0378  bbox_loss: 0.0044  cls_loss: 0.1061  
<<<iteration:[120/244] - total_loss: 0.1800  obj_loss: 0.0600  noobj_loss: 0.0384  bbox_loss: 0.0044  cls_loss: 0.0790  
<<<iteration:[140/244] - total_loss: 0.1692  obj_loss: 0.0778  noobj_loss: 0.0368  bbox_loss: 0.0047  cls_loss: 0.0493  
<<<iteration:[160/244] - total_loss: 0.1762  obj_loss: 0.0696  noobj_loss: 0.0389  bbox_loss: 0.0051  c

<<<iteration:[140/244] - total_loss: 0.2046  obj_loss: 0.0788  noobj_loss: 0.0387  bbox_loss: 0.0047  cls_loss: 0.0829  
<<<iteration:[160/244] - total_loss: 0.2040  obj_loss: 0.0712  noobj_loss: 0.0385  bbox_loss: 0.0045  cls_loss: 0.0910  
<<<iteration:[180/244] - total_loss: 0.1958  obj_loss: 0.0746  noobj_loss: 0.0376  bbox_loss: 0.0042  cls_loss: 0.0813  
<<<iteration:[200/244] - total_loss: 0.1592  obj_loss: 0.0632  noobj_loss: 0.0356  bbox_loss: 0.0039  cls_loss: 0.0589  
<<<iteration:[220/244] - total_loss: 0.1866  obj_loss: 0.0698  noobj_loss: 0.0380  bbox_loss: 0.0043  cls_loss: 0.0765  
<<<iteration:[240/244] - total_loss: 0.1900  obj_loss: 0.0710  noobj_loss: 0.0416  bbox_loss: 0.0043  cls_loss: 0.0767  

epoch:61/100 - Train Loss: 0.1885, Val Loss: 0.2068

<<<iteration:[20/244] - total_loss: 0.1813  obj_loss: 0.0748  noobj_loss: 0.0397  bbox_loss: 0.0044  cls_loss: 0.0645  
<<<iteration:[40/244] - total_loss: 0.1743  obj_loss: 0.0617  noobj_loss: 0.0387  bbox_loss: 0.0046 


epoch:66/100 - Train Loss: 0.1833, Val Loss: 0.2089

<<<iteration:[20/244] - total_loss: 0.1689  obj_loss: 0.0736  noobj_loss: 0.0466  bbox_loss: 0.0045  cls_loss: 0.0496  
<<<iteration:[40/244] - total_loss: 0.1777  obj_loss: 0.0671  noobj_loss: 0.0359  bbox_loss: 0.0043  cls_loss: 0.0711  
<<<iteration:[60/244] - total_loss: 0.2016  obj_loss: 0.0770  noobj_loss: 0.0408  bbox_loss: 0.0041  cls_loss: 0.0836  
<<<iteration:[80/244] - total_loss: 0.2077  obj_loss: 0.0770  noobj_loss: 0.0394  bbox_loss: 0.0037  cls_loss: 0.0926  
<<<iteration:[100/244] - total_loss: 0.1797  obj_loss: 0.0721  noobj_loss: 0.0400  bbox_loss: 0.0040  cls_loss: 0.0679  
<<<iteration:[120/244] - total_loss: 0.1806  obj_loss: 0.0770  noobj_loss: 0.0350  bbox_loss: 0.0037  cls_loss: 0.0675  
<<<iteration:[140/244] - total_loss: 0.1743  obj_loss: 0.0688  noobj_loss: 0.0330  bbox_loss: 0.0039  cls_loss: 0.0696  
<<<iteration:[160/244] - total_loss: 0.1704  obj_loss: 0.0689  noobj_loss: 0.0414  bbox_loss: 0.0045  c

<<<iteration:[140/244] - total_loss: 0.1516  obj_loss: 0.0624  noobj_loss: 0.0397  bbox_loss: 0.0040  cls_loss: 0.0495  
<<<iteration:[160/244] - total_loss: 0.1749  obj_loss: 0.0717  noobj_loss: 0.0386  bbox_loss: 0.0041  cls_loss: 0.0633  
<<<iteration:[180/244] - total_loss: 0.1950  obj_loss: 0.0719  noobj_loss: 0.0369  bbox_loss: 0.0041  cls_loss: 0.0841  
<<<iteration:[200/244] - total_loss: 0.1884  obj_loss: 0.0744  noobj_loss: 0.0386  bbox_loss: 0.0041  cls_loss: 0.0742  
<<<iteration:[220/244] - total_loss: 0.1824  obj_loss: 0.0734  noobj_loss: 0.0366  bbox_loss: 0.0039  cls_loss: 0.0710  
<<<iteration:[240/244] - total_loss: 0.1864  obj_loss: 0.0695  noobj_loss: 0.0362  bbox_loss: 0.0043  cls_loss: 0.0774  

epoch:72/100 - Train Loss: 0.1804, Val Loss: 0.2034

<<<iteration:[20/244] - total_loss: 0.2134  obj_loss: 0.0805  noobj_loss: 0.0403  bbox_loss: 0.0048  cls_loss: 0.0888  
<<<iteration:[40/244] - total_loss: 0.1437  obj_loss: 0.0649  noobj_loss: 0.0366  bbox_loss: 0.0040 


epoch:77/100 - Train Loss: 0.1751, Val Loss: 0.2050

<<<iteration:[20/244] - total_loss: 0.2144  obj_loss: 0.0758  noobj_loss: 0.0431  bbox_loss: 0.0039  cls_loss: 0.0976  
<<<iteration:[40/244] - total_loss: 0.1704  obj_loss: 0.0729  noobj_loss: 0.0359  bbox_loss: 0.0035  cls_loss: 0.0618  
<<<iteration:[60/244] - total_loss: 0.1861  obj_loss: 0.0723  noobj_loss: 0.0412  bbox_loss: 0.0040  cls_loss: 0.0731  
<<<iteration:[80/244] - total_loss: 0.1573  obj_loss: 0.0675  noobj_loss: 0.0380  bbox_loss: 0.0034  cls_loss: 0.0536  
<<<iteration:[100/244] - total_loss: 0.1942  obj_loss: 0.0701  noobj_loss: 0.0387  bbox_loss: 0.0045  cls_loss: 0.0824  
<<<iteration:[120/244] - total_loss: 0.1862  obj_loss: 0.0593  noobj_loss: 0.0339  bbox_loss: 0.0042  cls_loss: 0.0890  
<<<iteration:[140/244] - total_loss: 0.1844  obj_loss: 0.0733  noobj_loss: 0.0365  bbox_loss: 0.0036  cls_loss: 0.0747  
<<<iteration:[160/244] - total_loss: 0.1687  obj_loss: 0.0683  noobj_loss: 0.0352  bbox_loss: 0.0035  c

<<<iteration:[140/244] - total_loss: 0.1815  obj_loss: 0.0652  noobj_loss: 0.0358  bbox_loss: 0.0038  cls_loss: 0.0795  
<<<iteration:[160/244] - total_loss: 0.1599  obj_loss: 0.0702  noobj_loss: 0.0381  bbox_loss: 0.0034  cls_loss: 0.0539  
<<<iteration:[180/244] - total_loss: 0.1871  obj_loss: 0.0796  noobj_loss: 0.0469  bbox_loss: 0.0053  cls_loss: 0.0577  
<<<iteration:[200/244] - total_loss: 0.1913  obj_loss: 0.0713  noobj_loss: 0.0395  bbox_loss: 0.0037  cls_loss: 0.0816  
<<<iteration:[220/244] - total_loss: 0.1732  obj_loss: 0.0697  noobj_loss: 0.0391  bbox_loss: 0.0036  cls_loss: 0.0661  
<<<iteration:[240/244] - total_loss: 0.1708  obj_loss: 0.0692  noobj_loss: 0.0395  bbox_loss: 0.0038  cls_loss: 0.0628  

epoch:83/100 - Train Loss: 0.1760, Val Loss: 0.1993

<<<iteration:[20/244] - total_loss: 0.1843  obj_loss: 0.0645  noobj_loss: 0.0365  bbox_loss: 0.0040  cls_loss: 0.0817  
<<<iteration:[40/244] - total_loss: 0.1640  obj_loss: 0.0638  noobj_loss: 0.0420  bbox_loss: 0.0040 


epoch:88/100 - Train Loss: 0.1735, Val Loss: 0.2013

<<<iteration:[20/244] - total_loss: 0.1508  obj_loss: 0.0705  noobj_loss: 0.0394  bbox_loss: 0.0036  cls_loss: 0.0427  
<<<iteration:[40/244] - total_loss: 0.1543  obj_loss: 0.0713  noobj_loss: 0.0365  bbox_loss: 0.0037  cls_loss: 0.0464  
<<<iteration:[60/244] - total_loss: 0.1915  obj_loss: 0.0691  noobj_loss: 0.0364  bbox_loss: 0.0034  cls_loss: 0.0873  
<<<iteration:[80/244] - total_loss: 0.1636  obj_loss: 0.0685  noobj_loss: 0.0353  bbox_loss: 0.0035  cls_loss: 0.0598  
<<<iteration:[100/244] - total_loss: 0.1784  obj_loss: 0.0645  noobj_loss: 0.0375  bbox_loss: 0.0038  cls_loss: 0.0760  
<<<iteration:[120/244] - total_loss: 0.1886  obj_loss: 0.0713  noobj_loss: 0.0458  bbox_loss: 0.0039  cls_loss: 0.0747  
<<<iteration:[140/244] - total_loss: 0.1849  obj_loss: 0.0665  noobj_loss: 0.0382  bbox_loss: 0.0038  cls_loss: 0.0804  
<<<iteration:[160/244] - total_loss: 0.1645  obj_loss: 0.0654  noobj_loss: 0.0388  bbox_loss: 0.0039  c

<<<iteration:[140/244] - total_loss: 0.1614  obj_loss: 0.0683  noobj_loss: 0.0381  bbox_loss: 0.0038  cls_loss: 0.0550  
<<<iteration:[160/244] - total_loss: 0.1697  obj_loss: 0.0710  noobj_loss: 0.0380  bbox_loss: 0.0042  cls_loss: 0.0589  
<<<iteration:[180/244] - total_loss: 0.1789  obj_loss: 0.0722  noobj_loss: 0.0393  bbox_loss: 0.0043  cls_loss: 0.0654  
<<<iteration:[200/244] - total_loss: 0.1435  obj_loss: 0.0729  noobj_loss: 0.0379  bbox_loss: 0.0038  cls_loss: 0.0324  
<<<iteration:[220/244] - total_loss: 0.1911  obj_loss: 0.0799  noobj_loss: 0.0409  bbox_loss: 0.0039  cls_loss: 0.0714  
<<<iteration:[240/244] - total_loss: 0.1933  obj_loss: 0.0710  noobj_loss: 0.0398  bbox_loss: 0.0041  cls_loss: 0.0821  

epoch:94/100 - Train Loss: 0.1789, Val Loss: 0.2020

<<<iteration:[20/244] - total_loss: 0.1868  obj_loss: 0.0703  noobj_loss: 0.0433  bbox_loss: 0.0046  cls_loss: 0.0721  
<<<iteration:[40/244] - total_loss: 0.1774  obj_loss: 0.0731  noobj_loss: 0.0354  bbox_loss: 0.0034 


epoch:99/100 - Train Loss: 0.1711, Val Loss: 0.2050

<<<iteration:[20/244] - total_loss: 0.1695  obj_loss: 0.0782  noobj_loss: 0.0404  bbox_loss: 0.0034  cls_loss: 0.0540  
<<<iteration:[40/244] - total_loss: 0.1784  obj_loss: 0.0801  noobj_loss: 0.0392  bbox_loss: 0.0034  cls_loss: 0.0620  
<<<iteration:[60/244] - total_loss: 0.1675  obj_loss: 0.0751  noobj_loss: 0.0404  bbox_loss: 0.0039  cls_loss: 0.0529  
<<<iteration:[80/244] - total_loss: 0.1961  obj_loss: 0.0648  noobj_loss: 0.0383  bbox_loss: 0.0042  cls_loss: 0.0912  
<<<iteration:[100/244] - total_loss: 0.1475  obj_loss: 0.0696  noobj_loss: 0.0367  bbox_loss: 0.0039  cls_loss: 0.0401  
<<<iteration:[120/244] - total_loss: 0.1644  obj_loss: 0.0729  noobj_loss: 0.0410  bbox_loss: 0.0036  cls_loss: 0.0528  
<<<iteration:[140/244] - total_loss: 0.1678  obj_loss: 0.0696  noobj_loss: 0.0388  bbox_loss: 0.0036  cls_loss: 0.0607  
<<<iteration:[160/244] - total_loss: 0.2026  obj_loss: 0.0721  noobj_loss: 0.0386  bbox_loss: 0.0036  c

VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train Loss,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Train bbox Loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Train class Loss,█▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Train obj Loss,▁▁▄▄▅▆▆▆▇▇▇▇▇▇▇▇██▇▇█▇▇▇███▇█▇▇██▇███▇██
Val Loss,█▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Val bbox Loss,█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Val class Loss,█▆▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Val obj Loss,▁▄▇█▇▇▇▇██▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▆▆▇▆

0,1
Train Loss,0.17047
Train bbox Loss,0.00357
Train class Loss,0.06195
Train obj Loss,0.07121
Val Loss,0.20422
Val bbox Loss,0.00376
Val class Loss,0.10647
Val obj Loss,0.05867


# Test Dataset Inference

In [13]:
import numpy as np
import os 
import pandas as pd
import cv2
import torch
import matplotlib.pyplot as plt
from ipywidgets import interact
import albumentations as A
from albumentations.pytorch import ToTensorV2
import torchvision
from torch import nn
import torchsummary
from torch.utils.data import DataLoader
from collections import defaultdict
from torchvision.utils import make_grid

In [14]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [39]:
class YOLO_RESNET18(nn.Module):
    def __init__(self, num_classes):
        super().__init__()

        self.num_classes = num_classes
        self.num_bboxes = 2
        self.grid_size = 7

        resnet18 = torchvision.models.resnet18(pretrained = True)
#         swin=torchvision.models.swin_v2_t(weights='IMAGENET1K_V1')
        layers = [m for m in resnet18.children()] #Resnet에서 Yolo에서 가져올수 있을만한 layer만 선별적으로 가져오기 위해서

        # 기존 Resnet18의 layer들중에서 맨 뒤에 두개만 제외하고 다 가져와서 Backbone으로 사용
        self.backbone = nn.Sequential(*layers[:-2]) 
        self.head = nn.Sequential(
                nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=1, padding=0,bias=False),
                nn.BatchNorm2d(1024),
                nn.ReLU(inplace=True),
                nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=3, padding=1,bias=False),
                nn.BatchNorm2d(1024),
                nn.ReLU(inplace=True),
                nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=3, padding=1,bias=False),
                nn.BatchNorm2d(1024),
                nn.ReLU(inplace=True),
                nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=3, padding=1,bias=False),
                nn.BatchNorm2d(1024),
                nn.ReLU(inplace=True),

                nn.Conv2d(in_channels=1024, out_channels=(4+1)*self.num_bboxes+num_classes, kernel_size=1, padding=0, bias=False),
                nn.AdaptiveAvgPool2d(output_size=(self.grid_size, self.grid_size))
            )

    def forward(self, x):
        out = self.backbone(x)
        # out = self.neck(out)
        out = self.head(out) # input (batch, 3, 448, 448) -> output feature (batch, 12, 7, 7)
        return out

In [13]:
def load_model(ckpt_path, num_classes, device):
    checkpoint = torch.load(ckpt_path, map_location=device)
    model = YOLO_SWIN(num_classes=num_classes)
#     model=YOLO_RESNET18(num_classes=num_classes)
    model.load_state_dict(checkpoint)
    model = model.to(device)
    model.eval()
    return model

In [14]:
IMAGE_SIZE=448
transformer = A.Compose([
            A.Resize(height=IMAGE_SIZE, width=IMAGE_SIZE),
            A.Normalize(mean=(0.485, 0.456, 0.406),std=(0.229, 0.224, 0.225)),
            ToTensorV2(),
        ],
        bbox_params=A.BboxParams(format='yolo', label_fields=['class_ids']),
)

In [15]:
NUM_CLASSES=2
# ckpt_path="./trained_model/YOLO_SWIN_T_body_LR0.0001_AUG30/model_90.pth"
ckpt_path="/workspace/Plastic_Bottle_defect_detection/experiments/trained_model/YOLO_SWIN_T_neck_LR0.0001_IP50_AUG4_radio_pretrained/model_100.pth"
model = load_model(ckpt_path, NUM_CLASSES, device)

In [16]:
NECK_PATH = '/home/host_data/PET_data/Neck'
BODY_PATH = '/home/host_data/PET_data/Body'
test_dataset=PET_dataset("neck" ,neck_dir=NECK_PATH,body_dir=BODY_PATH,phase='test', transformer=transformer, aug=None)
test_dataloaders = DataLoader(test_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn)

NameError: name 'PET_dataset' is not defined

In [45]:
len(test_dataset)

25

In [46]:
@torch.no_grad()
def model_predict(image, model, conf_thres=0.2, iou_threshold=0.1):
    predictions = model(image)
    prediction = predictions.detach().cpu().squeeze(dim=0)
    f_map=prediction

#     print(prediction.shape)
    
    grid_size = prediction.shape[-1]
    y_grid, x_grid = torch.meshgrid(torch.arange(grid_size), torch.arange(grid_size))
    stride_size = IMAGE_SIZE/grid_size

    conf = prediction[[0,5], ...].reshape(1, -1)
    xc = (prediction[[1,6], ...] * IMAGE_SIZE + x_grid*stride_size).reshape(1,-1)
    yc = (prediction[[2,7], ...] * IMAGE_SIZE + y_grid*stride_size).reshape(1,-1)
    w = (prediction[[3,8], ...] * IMAGE_SIZE).reshape(1,-1)
    h = (prediction[[4,9], ...] * IMAGE_SIZE).reshape(1,-1)
    cls = torch.max(prediction[10:, ...].reshape(NUM_CLASSES, -1), dim=0).indices.tile(1,2)
    
    x_min = xc - w/2
    y_min = yc - h/2
    x_max = xc + w/2
    y_max = yc + h/2

    prediction_res = torch.cat([x_min, y_min, x_max, y_max, conf, cls], dim=0)
    prediction_res = prediction_res.transpose(0,1)

    # x_min과 y_min이 음수가 되지않고, x_max와 y_max가 이미지 크기를 넘지 않게 제한
    prediction_res[:, 2].clip(min=0, max=image.shape[1]) 
    prediction_res[:, 3].clip(min=0, max=image.shape[0])
        
    pred_res = prediction_res[prediction_res[:, 4] > conf_thres]
    nms_index = torchvision.ops.nms(boxes=pred_res[:, 0:4], scores=pred_res[:, 4], iou_threshold=iou_threshold)
    pred_res_ = pred_res[nms_index].numpy()
    
    n_obj = pred_res_.shape[0]
    bboxes = np.zeros(shape=(n_obj, 4), dtype=np.float32)
    bboxes[:, 0:2] = (pred_res_[:, 0:2] + pred_res_[:, 2:4]) / 2
    bboxes[:, 2:4] = pred_res_[:, 2:4] - pred_res_[:, 0:2]
    scores = pred_res_[:, 4]
    class_ids = pred_res_[:, 5]
    
    # 이미지 값이 들어가면 모델을 통해서, 후처리까지 포함된 yolo 포멧의 box좌표, 그 좌표에 대한 confidence score
    # 그리고 class id를 반환
    return bboxes, scores, class_ids,f_map

In [47]:
pred_images = []
pred_labels =[]
feature_maps=[]

for index, batch in enumerate(test_dataloaders):
    images = batch[0].to(device)
    bboxes, scores, class_ids, fmap = model_predict(images, model, conf_thres=0.1, iou_threshold=0.1)
    
    if len(bboxes) > 0:
        prediction_yolo = np.concatenate([bboxes, scores[:, np.newaxis], class_ids[:, np.newaxis]], axis=1)
    else:
        prediction_yolo = np.array([])
    
    # 텐서형의 이미지를 다시 unnormalize를 시키고, 다시 chw를 hwc로 바꾸고 넘파이로 바꾼다.
    np_image = make_grid(images[0], normalize=True).cpu().permute(1,2,0).numpy()
    pred_images.append(np_image)
    pred_labels.append(prediction_yolo)
    feature_maps.append(fmap)

    

In [48]:
from ipywidgets import interact

@interact(index=(0,len(pred_images)-1))
def show_result(index=0):
    print(pred_labels[index])
    if len(pred_labels[index]) > 0:
        result = visualize(pred_images[index], pred_labels[index][:, 0:4], pred_labels[index][:, 5])
    else:
        result = pred_images[index]
        
    plt.figure(figsize=(6,6))
    plt.imshow(result)
    plt.show()

interactive(children=(IntSlider(value=0, description='index', max=24), Output()), _dom_classes=('widget-intera…

In [49]:
#feature map에서 0,5번쨰에 해당하는 objectness 투사
from ipywidgets import interact

@interact(index=(0,len(pred_images)-1))
def show_result(index=0):
    print(pred_labels[index])
    if len(pred_labels[index]) > 0:
        result = visualize(pred_images[index], pred_labels[index][:, 0:4], pred_labels[index][:, 5])
    else:
        result = pred_images[index]
    
    f_map=feature_maps[index]
    zero_canvas=np.zeros((448,448))

    cv_re1=cv2.resize(f_map[0,:,:].numpy(),(448,448))
    cv_re2=cv2.resize(f_map[5,:,:].numpy(),(448,448))
    zero_canvas=zero_canvas+cv_re1+cv_re2

    
    fig = plt.figure()
    rows = 1
    cols = 2
    ax1 = fig.add_subplot(rows, cols, 1)
    ax1.imshow(result)
    ax1.set_title('Detection')
    ax1.axis("off")
    
    ax2 = fig.add_subplot(rows, cols, 2)
    ax2.imshow(zero_canvas)
    ax2.set_title('feature map-objectness')
    ax2.axis("off")

    plt.show()


interactive(children=(IntSlider(value=0, description='index', max=24), Output()), _dom_classes=('widget-intera…