**Preparing libraries:**

In [None]:
!pip install fiftyone

Collecting fiftyone
  Downloading fiftyone-0.23.2-py3-none-any.whl (7.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting aiofiles (from fiftyone)
  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting argcomplete (from fiftyone)
  Downloading argcomplete-3.2.1-py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.3/42.3 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
Collecting boto3 (from fiftyone)
  Downloading boto3-1.34.7-py3-none-any.whl (139 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.3/139.3 kB[0m [31m21.2 MB/s[0m eta [36m0:00:00[0m
Collecting dacite<1.8.0,>=1.6.0 (from fiftyone)
  Downloading dacite-1.7.0-py3-none-any.whl (12 kB)
Collecting Deprecated (from fiftyone)
  Downloading Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB)
Collecting ftfy (from fiftyone)
  Downloading ftfy-6.1.3-py3-none-any.whl (53 kB)


In [None]:
import torch.nn as nn
from torch.utils.data import Dataset
import numpy as np
import torch
from PIL import Image

# Define a example CNN network
class CNN_Model(nn.Module):
    def __init__(self, in_channels=3, out_channels=1, class_num=5):
        super(CNN_Model, self).__init__()
        self.path = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=16, stride=16)
        )
        self.fc = nn.Linear(196, class_num)

    def forward(self, x):
        conv_out = self.path(x)
        flt = conv_out.view(conv_out.size(0), -1)
        fc_out = self.fc(flt)
        return fc_out

# Create my own custom dataset
class CustomDataset(Dataset):
    def __init__(self, ds, classes, trnsforms):
        self.dataset = ds
        self.classes = classes
        self.transformer = trnsforms
    # The main function
    def __getitem__(self, idx):
        sample = self.dataset[idx]
        image = Image.open(sample.filepath).convert('RGB')

        label = np.zeros(len(self.classes), dtype=np.float32)

        for detection in sample.ground_truth.detections:
            if detection.label in self.classes:
                label[self.classes.index(detection.label)] = 1.0
        image = self.transformer(image)

        label = torch.tensor(label, dtype=torch.float64)

        return image, label

    def __len__(self):
        return len(self.dataset)


In [None]:
import fiftyone as fo
import fiftyone.zoo as foz
from torch.utils.data import DataLoader
from torchvision import transforms

# Selected classes
classes = ["person", "car", "dog", "cat", "bird"]

# Load the training dataset from the COCO zoo dataset
dataset_train = foz.load_zoo_dataset(
    "coco-2017",
    split="train",
    label_types=["segmentations"],
    classes=classes,
    max_samples=10000,
)
dataset_train = list(dataset_train)

# Load the validation dataset from the COCO zoo dataset
dataset_test = foz.load_zoo_dataset(
    "coco-2017",
    split="validation",
    label_types=["segmentations"],
    classes=classes,
    max_samples=1000,
)
dataset_test = list(dataset_test)



Migrating database to v0.23.2


INFO:fiftyone.migrations.runner:Migrating database to v0.23.2


Downloading split 'train' to '/root/fiftyone/coco-2017/train' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'train' to '/root/fiftyone/coco-2017/train' if necessary


Downloading annotations to '/root/fiftyone/coco-2017/tmp-download/annotations_trainval2017.zip'


INFO:fiftyone.utils.coco:Downloading annotations to '/root/fiftyone/coco-2017/tmp-download/annotations_trainval2017.zip'


 100% |██████|    1.9Gb/1.9Gb [5.5s elapsed, 0s remaining, 377.2Mb/s]       


INFO:eta.core.utils: 100% |██████|    1.9Gb/1.9Gb [5.5s elapsed, 0s remaining, 377.2Mb/s]       


Extracting annotations to '/root/fiftyone/coco-2017/raw/instances_train2017.json'


INFO:fiftyone.utils.coco:Extracting annotations to '/root/fiftyone/coco-2017/raw/instances_train2017.json'


Downloading 10000 images


INFO:fiftyone.utils.coco:Downloading 10000 images


 100% |██████████████| 10000/10000 [32.7m elapsed, 0s remaining, 5.0 images/s]      


INFO:eta.core.utils: 100% |██████████████| 10000/10000 [32.7m elapsed, 0s remaining, 5.0 images/s]      


Writing annotations for 10000 downloaded samples to '/root/fiftyone/coco-2017/train/labels.json'


INFO:fiftyone.utils.coco:Writing annotations for 10000 downloaded samples to '/root/fiftyone/coco-2017/train/labels.json'


Dataset info written to '/root/fiftyone/coco-2017/info.json'


INFO:fiftyone.zoo.datasets:Dataset info written to '/root/fiftyone/coco-2017/info.json'


Loading 'coco-2017' split 'train'


INFO:fiftyone.zoo.datasets:Loading 'coco-2017' split 'train'


 100% |█████████████| 10000/10000 [2.7m elapsed, 0s remaining, 63.4 samples/s]      


INFO:eta.core.utils: 100% |█████████████| 10000/10000 [2.7m elapsed, 0s remaining, 63.4 samples/s]      


Dataset 'coco-2017-train-10000' created


INFO:fiftyone.zoo.datasets:Dataset 'coco-2017-train-10000' created


Downloading split 'validation' to '/root/fiftyone/coco-2017/validation' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'validation' to '/root/fiftyone/coco-2017/validation' if necessary


Found annotations at '/root/fiftyone/coco-2017/raw/instances_val2017.json'


INFO:fiftyone.utils.coco:Found annotations at '/root/fiftyone/coco-2017/raw/instances_val2017.json'


Downloading 1000 images


INFO:fiftyone.utils.coco:Downloading 1000 images


 100% |████████████████| 1000/1000 [3.3m elapsed, 0s remaining, 4.6 images/s]      


INFO:eta.core.utils: 100% |████████████████| 1000/1000 [3.3m elapsed, 0s remaining, 4.6 images/s]      


Writing annotations for 1000 downloaded samples to '/root/fiftyone/coco-2017/validation/labels.json'


INFO:fiftyone.utils.coco:Writing annotations for 1000 downloaded samples to '/root/fiftyone/coco-2017/validation/labels.json'


Dataset info written to '/root/fiftyone/coco-2017/info.json'


INFO:fiftyone.zoo.datasets:Dataset info written to '/root/fiftyone/coco-2017/info.json'


Loading 'coco-2017' split 'validation'


INFO:fiftyone.zoo.datasets:Loading 'coco-2017' split 'validation'


 100% |███████████████| 1000/1000 [17.4s elapsed, 0s remaining, 61.9 samples/s]      


INFO:eta.core.utils: 100% |███████████████| 1000/1000 [17.4s elapsed, 0s remaining, 61.9 samples/s]      


Dataset 'coco-2017-validation-1000' created


INFO:fiftyone.zoo.datasets:Dataset 'coco-2017-validation-1000' created


In [None]:
from torchvision import transforms
transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

trainset = CustomDataset(ds=dataset_train, classes=classes, trnsforms=transforms)
trainloader = DataLoader(trainset, batch_size=32, shuffle=True)

testset = CustomDataset(ds=dataset_test, classes=classes, trnsforms=transforms)
testloader = DataLoader(testset, batch_size=32, shuffle=False)

In [None]:
from torch.optim import Adam
from tqdm import tqdm

# Move the data to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Create the model
model = CNN_Model(class_num=len(classes)).to(device)
# Define optimizer and loss
criterion = nn.BCEWithLogitsLoss()
optimizer = Adam(model.parameters(), lr=0.001)
# Update learning rate
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

epochs = 6


# Training loop
for epoch in range(epochs):

    running_loss = 0.0
    overall_accuracy = 0
    accuracy_per_label = torch.zeros(len(classes), device=device)

    # tqdm added for progress visualization
    for i, data in tqdm(enumerate(trainloader, 0), total=len(trainloader), desc=f'Epoch {epoch + 1}/{epochs} - Training'):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        preds = torch.sigmoid(outputs) > 0.5
        correct_predictions = (preds == labels).float()

        accuracy_per_label += correct_predictions.sum(0) / len(labels)
        overall_accuracy += correct_predictions.sum() / (len(labels) * len(classes))

    accuracy_per_label /= len(trainloader)
    running_loss /= len(trainloader)
    overall_accuracy /= len(trainloader)

    print(f'Train Loss: {running_loss:.4f} Total Acc: {overall_accuracy:.4f}')
    print('Per Class Acc:', accuracy_per_label.tolist())

# Testing loop
with torch.no_grad():
    overall_accuracy = 0
    accuracy_per_label = torch.zeros(len(classes), device=device)

    # tqdm added for progress visualization
    for data in tqdm(testloader, total=len(testloader), desc='Testing'):
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        preds = torch.sigmoid(outputs) > 0.5
        correct_predictions = (preds == labels).float()

        accuracy_per_label += correct_predictions.sum(0) / len(labels)
        overall_accuracy += correct_predictions.sum() / (len(labels) * len(classes))

    accuracy_per_label /= len(testloader)
    overall_accuracy /= len(testloader)

    print(f'Total Acc: {overall_accuracy:.4f}')
    print('Per Class Acc:', accuracy_per_label.tolist())


Epoch 1/6 - Training: 100%|██████████| 313/313 [01:22<00:00,  3.78it/s]


Train Loss: 0.3272 Total Acc: 0.9010
Per Class Acc: [0.8458466529846191, 0.8292731642723083, 0.9356030225753784, 0.9355031847953796, 0.9589656591415405]


Epoch 2/6 - Training: 100%|██████████| 313/313 [01:15<00:00,  4.14it/s]


Train Loss: 0.2967 Total Acc: 0.9061
Per Class Acc: [0.84944087266922, 0.8335663080215454, 0.9423921704292297, 0.9458865523338318, 0.9590654969215393]


Epoch 3/6 - Training: 100%|██████████| 313/313 [01:16<00:00,  4.07it/s]


Train Loss: 0.2912 Total Acc: 0.9060
Per Class Acc: [0.8492411971092224, 0.8332667350769043, 0.9425918459892273, 0.9458865523338318, 0.9590654969215393]


Epoch 4/6 - Training: 100%|██████████| 313/313 [01:19<00:00,  3.93it/s]


Train Loss: 0.2886 Total Acc: 0.9057
Per Class Acc: [0.8491413593292236, 0.8318690061569214, 0.9425918459892273, 0.945786714553833, 0.9590654969215393]


Epoch 5/6 - Training: 100%|██████████| 313/313 [01:15<00:00,  4.15it/s]


Train Loss: 0.2869 Total Acc: 0.9055
Per Class Acc: [0.8489416837692261, 0.831569492816925, 0.942292332649231, 0.945786714553833, 0.9588658213615417]


Epoch 6/6 - Training: 100%|██████████| 313/313 [01:14<00:00,  4.19it/s]


Train Loss: 0.2859 Total Acc: 0.9056
Per Class Acc: [0.8493410348892212, 0.831569492816925, 0.9423921704292297, 0.945786714553833, 0.9590654969215393]


Testing: 100%|██████████| 32/32 [00:07<00:00,  4.09it/s]

Total Acc: 0.9029
Per Class Acc: [0.8427734375, 0.83203125, 0.947265625, 0.93359375, 0.958984375]





In [None]:
import torchvision.ops
from torch import nn


class DeformableConv2d(nn.Module):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size=3,
                 stride=1,
                 padding=1,
                 bias=False):

        super(DeformableConv2d, self).__init__()

        self.padding = padding

        self.offset_conv = nn.Conv2d(in_channels,
                                     2 * kernel_size * kernel_size,
                                     kernel_size=kernel_size,
                                     stride=stride,
                                     padding=self.padding,
                                     bias=True)

        nn.init.constant_(self.offset_conv.weight, 0.)
        nn.init.constant_(self.offset_conv.bias, 0.)

        self.modulator_conv = nn.Conv2d(in_channels,
                                     1 * kernel_size * kernel_size,
                                     kernel_size=kernel_size,
                                     stride=stride,
                                     padding=self.padding,
                                     bias=True)

        nn.init.constant_(self.modulator_conv.weight, 0.)
        nn.init.constant_(self.modulator_conv.bias, 0.)

        self.regular_conv = nn.Conv2d(in_channels=in_channels,
                                      out_channels=out_channels,
                                      kernel_size=kernel_size,
                                      stride=stride,
                                      padding=self.padding,
                                      bias=bias)

    def forward(self, x):
        h, w = x.shape[2:]
        max_offset = max(h, w)/4.

        offset = self.offset_conv(x).clamp(-max_offset, max_offset)
        modulator = 2. * torch.sigmoid(self.modulator_conv(x))

        x = torchvision.ops.deform_conv2d(input=x,
                                          offset=offset,
                                          weight=self.regular_conv.weight,
                                          bias=self.regular_conv.bias,
                                          padding=self.padding,
                                          mask=modulator
                                          )
        return x


In [None]:
# Define a example CNN network Deformable version
class CNN_Model_DV(nn.Module):
    def __init__(self, in_channels=3, out_channels=1, class_num=5):
        super(CNN_Model_DV, self).__init__()
        self.path = nn.Sequential(
            # nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1),
            DeformableConv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=16, stride=16)
        )
        self.fc = nn.Linear(196, class_num)

    def forward(self, x):
        conv_out = self.path(x)
        flt = conv_out.view(conv_out.size(0), -1)
        fc_out = self.fc(flt)
        return fc_out

In [None]:
from tqdm import tqdm

model = CNN_Model_DV(class_num=len(classes)).to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

epochs = 6

for epoch in range(epochs):
    running_loss = 0.0
    overall_accuracy = 0
    accuracy_per_label = torch.zeros(len(classes), device=device)

    # Wrap your trainloader with tqdm for the progress bar
    for i, data in enumerate(tqdm(trainloader, desc=f'Epoch {epoch+1}/{epochs}')):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        preds = torch.sigmoid(outputs) > 0.5
        correct_predictions = (preds == labels).float()
        accuracy_per_label += correct_predictions.sum(0) / len(labels)
        overall_accuracy += correct_predictions.sum() / (len(labels) * len(classes))

    accuracy_per_label /= len(trainloader)
    running_loss /= len(trainloader)
    overall_accuracy /= len(trainloader)

    print(f'Train Loss: {running_loss:.4f} Total Acc: {overall_accuracy:.4f}')
    print('Per Class Acc:', accuracy_per_label.tolist())

    # Scheduler step at the end of each epoch
    scheduler.step()


with torch.no_grad():

    overall_accuracy = 0
    accuracy_per_label = torch.zeros(len(classes), device=device)

    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        preds = torch.sigmoid(outputs) > 0.5
        correct_predictions = (preds == labels).float()

        accuracy_per_label += correct_predictions.sum(0)/(len(labels))

        overall_accuracy += correct_predictions.sum()/(len(labels)*(len(classes)))

    accuracy_per_label /= len(testloader)

    overall_accuracy /= len(testloader)

    print(f'Total Acc: {overall_accuracy:.4f}')
    print('Per Class Acc:', accuracy_per_label.tolist())


Epoch 1/6: 100%|██████████| 313/313 [01:51<00:00,  2.80it/s]


Train Loss: 0.3252 Total Acc: 0.9004
Per Class Acc: [0.8395566940307617, 0.8245806694030762, 0.9405950307846069, 0.9449880123138428, 0.952276349067688]


Epoch 2/6: 100%|██████████| 313/313 [01:40<00:00,  3.12it/s]


Train Loss: 0.2951 Total Acc: 0.9062
Per Class Acc: [0.8495407104492188, 0.8338658213615417, 0.9425918459892273, 0.945786714553833, 0.9590654969215393]


Epoch 3/6: 100%|██████████| 313/313 [01:37<00:00,  3.21it/s]


Train Loss: 0.2894 Total Acc: 0.9059
Per Class Acc: [0.8493410348892212, 0.8328673839569092, 0.9425918459892273, 0.945786714553833, 0.9589656591415405]


Epoch 4/6: 100%|██████████| 313/313 [01:38<00:00,  3.17it/s]


Train Loss: 0.2851 Total Acc: 0.9058
Per Class Acc: [0.8493410348892212, 0.8322683572769165, 0.9424920082092285, 0.9458865523338318, 0.9590654969215393]


Epoch 5/6: 100%|██████████| 313/313 [01:38<00:00,  3.18it/s]


Train Loss: 0.2847 Total Acc: 0.9057
Per Class Acc: [0.84944087266922, 0.8322683572769165, 0.9423921704292297, 0.9456868767738342, 0.9588658213615417]


Epoch 6/6: 100%|██████████| 313/313 [01:40<00:00,  3.13it/s]


Train Loss: 0.2843 Total Acc: 0.9057
Per Class Acc: [0.8492411971092224, 0.8318690061569214, 0.9423921704292297, 0.9458865523338318, 0.9589656591415405]
Total Acc: 0.9021
Per Class Acc: [0.8427734375, 0.828125, 0.947265625, 0.93359375, 0.958984375]


The result has not been changed significantly; it is not relevant to epochs number because loss is stopped. We should enhance model complexity; It is going to show us the effect of deformable convolution significantly. Here, we could not do that because the deadline is just in few hours .