In [105]:
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import os
import cv2
## pytorch libraries
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import torchvision.transforms as transforms

## 0 . Fix Dataset

In [None]:
images_names = os.listdir("new_archive/new_images")
for im in images_names:
    print(im)

In [None]:
import pandas as pd
annotations = pd.read_csv("archive/annotations.csv")
new_annotations = pd.DataFrame(columns=["image", "xmin", "ymin", "xmax", "ymax", "label"])
images_names = os.listdir("archive/images")
os.mkdir("new_archive/new_images")
index = 0
for im in images_names:
    image = cv2.imread(os.path.join("archive/images",im))
    path = "new_archive/new_images/"+str(index)+".png"
    cv2.imwrite(path, image)
    image_ann = annotations[annotations["image"] == im]
    image_ann["image"] = "image-" + str(index) + ".png"
    new_annotations = new_annotations.append(image_ann)
    index += 1
new_annotations.to_csv('new_archive/new_annotation.csv') 

In [None]:
img_file =  str(2)+".png"
img_file_csv = "image-"+str(2)+".png"
image_path = os.path.join("new_archive/new_images",img_file)

annotations_data = pd.read_csv("new_archive/new_annotation.csv")
image_boxes = annotations_data[annotations_data["image"] == img_file_csv].drop(columns=["image"]).to_numpy()
im = cv2.imread(image_path)

for i in image_boxes:
    plt.plot([i[1],i[1],i[3],i[3],i[1]], [i[2],i[4],i[4],i[2],i[2]])
plt.imshow(im)
plt.show()

## 1. Build Costum data class

In [96]:
class BloodCellDataset(torch.utils.data.Dataset):
    def __init__(self, img_dir, csv_file, split, num_classes, num_bboxes):
        self.annotations = pd.read_csv(csv_file)
        self.dict_ = {"rbc":0, "wbc":1}
        self.annotations['label'] = self.annotations.label.map(self.dict_)
        self.img_dir = img_dir
        self.S = split
        self.C = num_classes
        self.B = num_bboxes
        
    def __len__(self):
        return len(os.listdir("new_archive/new_images"))
    
    def __getitem__(self, index):
        ## image_name as parameter
        bboxes = []
        image_name = str(index)+".png"
        image_name_csv = "image-"+str(index)+".png"
        
        image = cv2.imread(os.path.join("new_archive/new_images",image_name))
        boxes = self.annotations[self.annotations["image"] == image_name_csv].drop(columns=["image"]).to_numpy()
        for box in boxes:
            xmin, ymin, xmax, ymax, class_label = float(box[1]), float(box[2]), float(box[3]), float(box[4]), box[5]
            x, y = ((xmin + xmax)/2)/image.shape[1], ((ymin + ymax)/2)/image.shape[0]
            height, width = (xmax - xmin)/image.shape[1], (ymax - ymin)/image.shape[0]
            bboxes.append([class_label, x,y,width,height])
        
        img = Image.open(os.path.join("new_archive/new_images",image_name))
        bboxes = torch.Tensor([x for x in bboxes])
        
        new_transform = transforms.Compose([transforms.Resize((448, 448)), transforms.ToTensor()])
        img = new_transform(img)
        
         ## label_matrix
        label_matrix = torch.zeros((self.S, self.S, self.C + self.B * 5 ))
        
        for box in bboxes:
            class_label, x, y, width, height = box.tolist()
            i, j = int(self.S*y), int(self.S*x)
            x_cell, y_cell = self.S * x - j, self.S * y - i  
            width_cell, height_cell = (                                                                                                                
                width * self.S,
                height * self.S
            )
        
            label_matrix[i,j,2] = 1
            label_matrix[i,j,3:7] = torch.tensor([x_cell, y_cell, width_cell, height_cell])
            label_matrix[i,j,int(class_label)] = 1
        return img, label_matrix
    
    def __show_img_annotated__(self, index):
        img_file = str(index)+".png"
        img_file_csv =  "image-"+ str(index)+".png"
        image_path = os.path.join("new_archive/new_images",img_file)
        image_boxes = self.annotations[self.annotations["image"] == img_file_csv].drop(columns=["image"]).to_numpy()
        im = cv2.imread(image_path)
        
        for i in image_boxes:
            plt.plot([i[1],i[1],i[3],i[3],i[1]], [i[2],i[4],i[4],i[2],i[2]])
        plt.imshow(im)
        plt.show()

In [97]:
bloodcelldataset = BloodCellDataset("new_archive/new_images", "new_archive/new_annotation.csv", 7, 2, 1)

In [98]:
bloodcelldataset.__getitem__(0)

(tensor([[[0.7961, 0.7961, 0.7961,  ..., 0.7294, 0.7333, 0.7373],
          [0.7961, 0.7961, 0.7961,  ..., 0.7255, 0.7294, 0.7333],
          [0.8000, 0.7961, 0.7922,  ..., 0.7137, 0.7216, 0.7255],
          ...,
          [0.7059, 0.7020, 0.6980,  ..., 0.7098, 0.7059, 0.7020],
          [0.7098, 0.7098, 0.7098,  ..., 0.7059, 0.7020, 0.7020],
          [0.7098, 0.7137, 0.7176,  ..., 0.7020, 0.7020, 0.7020]],
 
         [[0.7137, 0.7216, 0.7373,  ..., 0.6235, 0.6275, 0.6314],
          [0.7137, 0.7216, 0.7373,  ..., 0.6235, 0.6235, 0.6275],
          [0.7137, 0.7216, 0.7373,  ..., 0.6235, 0.6196, 0.6196],
          ...,
          [0.4314, 0.4235, 0.4157,  ..., 0.8784, 0.8784, 0.8784],
          [0.4353, 0.4353, 0.4314,  ..., 0.8784, 0.8784, 0.8784],
          [0.4353, 0.4392, 0.4431,  ..., 0.8784, 0.8784, 0.8784]],
 
         [[0.6588, 0.6667, 0.6824,  ..., 0.6549, 0.6588, 0.6627],
          [0.6588, 0.6667, 0.6824,  ..., 0.6549, 0.6627, 0.6667],
          [0.6627, 0.6706, 0.6863,  ...,

In [99]:
# bloodcelldataset.__show_img_annotated__(0)

In [100]:
train_loader = DataLoader(
    dataset=bloodcelldataset,
    batch_size=32,
    shuffle=True
)

In [101]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x1850791dd60>

In [104]:
for idx, (X, Y) in enumerate(train_loader):
    print(X.shape)
    print(Y.shape)
    print("++++++++++++++++++++++++++++++")

torch.Size([32, 3, 448, 448])
torch.Size([32, 7, 7, 7])
++++++++++++++++++++++++++++++
torch.Size([32, 3, 448, 448])
torch.Size([32, 7, 7, 7])
++++++++++++++++++++++++++++++
torch.Size([32, 3, 448, 448])
torch.Size([32, 7, 7, 7])
++++++++++++++++++++++++++++++
torch.Size([4, 3, 448, 448])
torch.Size([4, 7, 7, 7])
++++++++++++++++++++++++++++++


In [None]:
# bloodcelldataset.__show_img_annotated__(5)

In [None]:
# img, matrix = bloodcelldataset.__getitem__("image-100.png")

In [None]:
112/4

## 2 . Build Neural network

In [None]:
Model_architecture = [
    (7, 64, 2, 3),
    "M",
    (3, 192, 1, 1),
    "M",
    (1, 128, 1, 0),
    (3, 256, 1, 1),
    (1, 256, 1, 0),
    (3, 512, 1, 1),
    "M",
    [(1, 256, 1, 0), (3, 512, 1, 1), 4],
    (1, 512, 1, 0),
    (3, 1024, 1, 1),
    "M",
    [(1, 512, 1, 0), (3, 1024, 1, 1), 2],
    (3, 1024, 1, 1),
    (3, 1024, 2, 1),
    (3, 1024, 1, 1),
    (3, 1024, 1, 1)
]

In [None]:
class Block(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
        super(Block, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding)
        self.Leakyrelu = nn.LeakyReLU(0.001)
    def forward(self, x):
        return self.Leakyrelu(self.conv(x))

In [None]:
# b = Block(1, 64, 3, 1, 1)
# b(torch.randn(1,28,28).view(-1, 1, 28,28)).shape

In [None]:
class Net(nn.Module):
    def __init__(self, network_architecture, in_channels):
        super(Net, self).__init__()
        self.network_architecture = network_architecture
        self.in_channels = in_channels
        self.convLayers = self.convLayers_()
        self.fcLayers = self.fcLayers_()
        
    def convLayers_(self):
        layers = []
        for i in self.network_architecture:
            if type(i) == str:
                layers += [nn.MaxPool2d(2, stride=2)]
                
            elif type(i) == tuple:
                layers += [Block(self.in_channels, out_channels=i[1], kernel_size=i[0], stride=i[2], padding=i[3])]
                self.in_channels = i[1]
                
            elif type(i) == list:
                for _ in range(i[-1]):
                    layers += [Block(self.in_channels, out_channels=i[0][1], kernel_size=i[0][0], stride=i[0][2], padding=i[0][3])]
                    layers += [Block(in_channels=i[0][1], out_channels=i[1][1], kernel_size=i[1][0], stride=i[1][2], padding=i[1][3])]
                    self.in_channels = i[1][1]
        return nn.Sequential(*layers)
                    
    def fcLayers_(self):
                return nn.Sequential(
                    nn.Linear(7*7*1024, 4096),
                    nn.LeakyReLU(0.001),
                    nn.Linear(4096, 30*7*7)
                )
        
    def forward(self, x):
        x = self.convLayers(x)
        x = x.view(-1)
        x = self.fcLayers(x)
        return x

In [None]:
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

In [None]:
net = Net(network_architecture=Model_architecture, in_channels=3).to(device)
print("hey")

In [None]:
random_image = torch.randn(3, 448, 448).view(-1, 3, 448, 448).to(device)

In [None]:
o = net(random_image)

In [None]:
o.shape

In [None]:
30*7*7

## Utils Functions

In [None]:
def Intersection_over_union(boxes_pred, boxes_gt):
    
        box1_x1 = boxes_pred[...,0:1] - boxes_pred[...,2:3] / 2
        box1_y1 = boxes_pred[...,1:2] - boxes_pred[...,3:4] / 2
        box1_x2 = boxes_pred[...,0:1] + boxes_pred[...,2:3] / 2
        box1_y2 = boxes_pred[...,1:2] + boxes_pred[...,3:4] / 2

        box2_x1 = boxes_gt[...,0:1] - boxes_gt[...,2:3] / 2
        box2_y1 = boxes_gt[...,1:2] - boxes_gt[...,3:4] / 2
        box2_x2 = boxes_gt[...,0:1] + boxes_gt[...,2:3] / 2
        box2_y2 = boxes_gt[...,1:2] + boxes_gt[...,3:4] / 2
        
        x1 = torch.max(box1_x1, box2_x1)
        y1 = torch.max(box1_y1, box2_y1)
        x2 = torch.min(box1_x2, box2_x2)
        y2 = torch.min(box1_y2, box2_y2)
        
        intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
    
        box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1)) 
        box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1)) 

        return intersection / (box1_area + box2_area - intersection + 1e-6)

In [None]:
torch.max(torch.tensor([[1],[2],[3]]),torch.tensor([[4],[0],[1]]))

In [None]:
def non_max_supression(bboxes,iou_threshold,threshold):
    
    # predictions = [[class_id, probability, x1,y1,x2,y2],[],...]
    assert type(bboxes) == list
    
    bboxes = [box for box in bboxes if box[1] > threshold]
    bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True)
    bboxes_after_nms = []
    
    while bboxes:
        chosen_bbox = bboxes.pop(0)
        
        bboxes = [
                    box 
                    for box in bboxes
                    if box[0] != chosen_bbox[0]
                    or intersection_over_union(
                    torch.tensor(chosen_bbox[2:]),
                    torch.tensor(box[2:]),
                    box_format = box_format
                    ) < iou_threshold
                ]
        bboxes_after_nms.append(chosen_bbox)
    return bboxes_after_nms

In [None]:
class YoloLoss(nn.Module):
    def __init__(self, S=7, B=1, C=2):
        super(YoloLoss, self).__init__()
        self.mse = nn.MSELoss(reduction="sum")
        self.S = S
        self.B = B
        self.C = C

        # These are from Yolo paper, signifying how much we should
        # pay loss for no object (noobj) and the box coordinates (coord)
        self.lambda_noobj = 0.5
        self.lambda_coord = 5

    def forward(self, predictions, target):
        # predictions are shaped (BATCH_SIZE, S*S(C+B*5) when inputted
        predictions = predictions.reshape(-1, self.S, self.S, self.C + self.B * 5)

        # Calculate IoU for the two predicted bounding boxes with target bbox
        iou_b = intersection_over_union(predictions[..., 3:7], target[..., 3:7])

        # Take the box with highest IoU out of the two prediction
        # Note that bestbox will be indices of 0, 1 for which bbox was best
        exists_box = target[..., 2].unsqueeze(3)  # in paper this is Iobj_i

        # ======================== #
        #   FOR BOX COORDINATES    #
        # ======================== #

        # Set boxes with no object in them to 0. We only take out one of the two 
        # predictions, which is the one with highest Iou calculated previously.
        box_predictions = exists_box * predictions[..., 3:7]

        box_targets = exists_box * target[..., 3:7]

        # Take sqrt of width, height of boxes to ensure that
        box_predictions[..., 2:4] = torch.sign(box_predictions[..., 2:4]) * torch.sqrt(
            torch.abs(box_predictions[..., 2:4] + 1e-6)
        )
        box_targets[..., 2:4] = torch.sqrt(box_targets[..., 2:4])

        box_loss = self.mse(
            torch.flatten(box_predictions, end_dim=-2),
            torch.flatten(box_targets, end_dim=-2),
        )
        # ==================== #
        #   FOR OBJECT LOSS    #
        # ==================== #
        # pred_box is the confidence score for the bbox with highest IoU
        pred_box = predictions[..., 3:7]

        object_loss = self.mse(
            torch.flatten(exists_box * pred_box),
            torch.flatten(exists_box * target[..., 3:7]),
        )
        # ======================= #
        #   FOR NO OBJECT LOSS    #
        # ======================= #
        no_object_loss = self.mse(
            torch.flatten((1 - exists_box) * predictions[..., 3:7], start_dim=1),
            torch.flatten((1 - exists_box) * target[..., 3:7], start_dim=1),
        )
        # ================== #
        #   FOR CLASS LOSS   #
        # ================== #
        class_loss = self.mse(
            torch.flatten(exists_box * predictions[..., :2], end_dim=-2,),
            torch.flatten(exists_box * target[..., :2], end_dim=-2,),
        )
        loss = (
            self.lambda_coord * box_loss  # first two rows in paper
            + object_loss  # third row in paper
            + self.lambda_noobj * no_object_loss  # forth row
            + class_loss  # fifth row
        )

        return loss

## Training

In [None]:
import torchvision.transforms as transforms
import torch.optim as optim
import torchvision.transforms.functional as FT
from tqdm import tqdm
from torch.utils.data import DataLoader

In [None]:
seed = 123
torch.manual_seed(seed)

# Hyperparameters etc. 
LEARNING_RATE = 2e-5
DEVICE = "cuda" if torch.cuda.is_available else "cpu"
BATCH_SIZE = 16 # 64 in original paper but I don't have that much vram, grad accum?
WEIGHT_DECAY = 0
EPOCHS = 1000
LOAD_MODEL = False
# LOAD_MODEL_FILE = "overfit.pth.tar"
# IMG_DIR = "data/images"
# LABEL_DIR = "data/labels"

In [None]:
class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, img, bboxes):
        for t in self.transforms:
            img, bboxes = t(img), bboxes

        return img, bboxes


transform = Compose([transforms.Resize((448, 448)), transforms.ToTensor(),])

In [None]:
def train_fn(train_loader, model, optimizer, loss_fn):
    print("in train function")
    mean_loss = []

    for batch_idx, (x, y) in enumerate(train_loader):
        print("loop")
        x, y = x.to(DEVICE), y.to(DEVICE)
        out = model(x)
        loss = loss_fn(out, y)
        mean_loss.append(loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Mean loss was {sum(mean_loss)/len(mean_loss)}")


In [None]:
model = Net(network_architecture=Model_architecture, in_channels=3).to(device)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
loss_fn = YoloLoss()

train_dataset = BloodCellDataset("archive/annotations.csv", "archive/images", 7, 2, 1,transform=transform)

train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    drop_last=True,
)

In [None]:
train_loader

In [None]:
for batch_idx, (x, y) in enumerate(train_loader):
        print("loop")

In [None]:
for epoch in range(EPOCHS):
    print("im here")
    train_fn(train_loader, model, optimizer, loss_fn)
    break