<a href="https://colab.research.google.com/github/pransverse/ARK-selections/blob/main/TASK5/TASK5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader

import torchvision
from torchvision import transforms
from torchvision import datasets
from torchvision.transforms import ToTensor

import xml.etree.ElementTree as et

import matplotlib.pyplot as plt
torch.__version__, torchvision.__version__

('2.0.1+cu118', '0.15.2+cu118')

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import os
import glob
from PIL import Image

In [4]:
class Dataset(torch.utils.data.Dataset):
  def __init__(self, dataset_path, transforms=None):
    self.root_path = dataset_path
    self.files = glob.glob(self.root_path+"/*.xml")
    self.transforms = transforms
  def __len__(self):
    return len(self.files)
  def __getitem__(self, idx):
    xml_path = os.path.join(self.root_path, self.files[idx])
    root = et.parse(xml_path).getroot()
    xmin = int(int(root.find("object").find("bndbox").find("xmin").text)/4)
    ymin = int(int(root.find("object").find("bndbox").find("ymin").text)/2)
    xmax = int(int(root.find("object").find("bndbox").find("xmax").text)/4)
    ymax = int(int(root.find("object").find("bndbox").find("ymax").text)/2)
    img = Image.open(os.path.join(self.root_path, root.find("filename").text))
    if transforms:
      img = self.transforms(img)
    anotation = {"xmin":xmin,"ymin":ymin,"xmax":xmax,"ymax":ymax, "file_path":os.path.join(self.root_path, root.find("filename").text)}
    return img, anotation

In [5]:
img_transforms = transforms.Compose([
                                    transforms.Resize((416,416)),
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean = 0.5, std = 0.2)
                                    ])

In [6]:
dataset = Dataset("/content/drive/MyDrive/images", img_transforms)

In [7]:
dataset_len = len(dataset)
train_len = int(dataset_len*0.7)
test_len = int((dataset_len - train_len)*1/3)
val_len = dataset_len - train_len - test_len
train_dataset, test_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_len, test_len, val_len])

In [8]:
BATCH_SIZE = 100
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, pin_memory=True)

In [12]:
class Model(nn.Module):
    def __init__(self, input):
        super(Model, self).__init__()
        # Feature Extraction
        self.feature_extractor = nn.Sequential(
            nn.Conv2d(3, 3, 3, stride=1, padding=1),  # b, 32, 416, 416
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=2),  # b, 16, 208, 208
            nn.Conv2d(3, 8, 3, stride=1, padding=1),  # b, 16, 208, 208
            nn.ReLU(True),
            nn.Conv2d(8, 8, 3, stride=1, padding=1),  # b, 16, 208, 208
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=2),  # b, 16, 104, 104
        )
        # Inner Representation
        self.inner = nn.Sequential(
            nn.Conv2d(8, 16, 3, stride=1, padding=1),  # b, 16, 104, 104
            nn.ReLU(True),
            nn.Conv2d(16, 1, 3, stride=1, padding=1),  # b, 1, 104, 104
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=2),  # b, 1, 52, 52
        )
        # Bbox Extraction
        self.bbox = nn.Sequential(
            nn.Flatten(),
            nn.Linear(2704, 1352),
            nn.ReLU()
        )
        self.bbox_xmin =  nn.Sequential(
            nn.Linear(1352, 1),
            nn.Sigmoid()
        )
        self.bbox_ymin =  nn.Sequential(
            nn.Linear(1352, 1),
            nn.Sigmoid()
        )
        self.bbox_xmax =  nn.Sequential(
            nn.Linear(1352, 1),
            nn.Sigmoid()
        )
        self.bbox_ymax =  nn.Sequential(
            nn.Linear(1352, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        ft_ex = self.feature_extractor(x)
        in_rep = self.inner(ft_ex)
        bbox_part = self.bbox(in_rep)
        bbox_xmin = self.bbox_xmin(bbox_part)
        bbox_ymin = self.bbox_ymin(bbox_part)
        bbox_xmax = self.bbox_xmax(bbox_part)
        bbox_ymax = self.bbox_ymax(bbox_part)
        return (bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax)

In [13]:
model = Model((3,416,416)).cuda()

In [14]:
import cv2
test_image = cv2.imread("/content/drive/MyDrive/images/53.png")

In [None]:
model.forward(test_image)