In [1]:
try:
    from pycocotools.coco import COCO
except:
    !pip install pycocotools
    from pycocotools.coco import COCO
import random
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import os
import torch
from torch.utils import data
from PIL import Image
from torchvision import transforms
import torchvision

In [2]:
coco = COCO(annotation_file="/scratch/lt2316-h18-resources/coco/annotations/instances_train2017.json")

The history saving thread hit an unexpected error (OperationalError('database is locked')).History will not be written to the database.loading annotations into memory...

Done (t=19.17s)
creating index...
index created!


In [3]:
cat_cat = coco.getCatIds(catNms="cat")
horse_cat = coco.getCatIds(catNms="horse")

In [4]:
cat_cat, horse_cat

([17], [19])

In [5]:
class myOwnDataset(torch.utils.data.Dataset):
    def __init__(self, root, coco, transforms=None):
        self.root = root
        self.transforms = transforms
        self.coco = coco
        self.ids = list(sorted(self.coco.imgs.keys()))

    def __getitem__(self, index):
        # Own coco file
        coco = self.coco
        # Image ID
        img_id = self.ids[index]
        # List: get annotation id from coco
        ann_ids = coco.getAnnIds(imgIds=img_id)
        # Dictionary: target coco_annotation file for an image
        coco_annotation = coco.loadAnns(ann_ids)
        # path for input image
        path = coco.loadImgs(img_id)[0]['file_name']
        # open the input image
        img = Image.open(os.path.join(self.root, path))

        # number of objects in the image
        num_objs = len(coco_annotation)

        # Bounding boxes for objects
        # In coco format, bbox = [xmin, ymin, width, height]
        # In pytorch, the input should be [xmin, ymin, xmax, ymax]
        boxes = []
        for i in range(num_objs):
            xmin = coco_annotation[i]['bbox'][0]
            ymin = coco_annotation[i]['bbox'][1]
            xmax = xmin + coco_annotation[i]['bbox'][2]
            ymax = ymin + coco_annotation[i]['bbox'][3]
            boxes.append([xmin, ymin, xmax, ymax])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # Labels (In my case, I only one class: target class or background)
        labels = torch.ones((num_objs,), dtype=torch.int64)
        # Tensorise img_id
        img_id = torch.tensor([img_id])
        # Size of bbox (Rectangular)
        areas = []
        for i in range(num_objs):
            areas.append(coco_annotation[i]['area'])
        areas = torch.as_tensor(areas, dtype=torch.float32)
        # Iscrowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        # Annotation is in dictionary format
        my_annotation = {}
        my_annotation["boxes"] = boxes
        my_annotation["labels"] = labels
        my_annotation["image_id"] = img_id
        my_annotation["area"] = areas
        my_annotation["iscrowd"] = iscrowd

        if self.transforms is not None:
            img = self.transforms(img)

        return img, my_annotation

    def __len__(self):
        return len(self.ids)

In [None]:
def get_transform():
    custom_transforms = []
    custom_transforms.append(torchvision.transforms.ToTensor())
    return torchvision.transforms.Compose(custom_transforms)

In [None]:
# path to your own data and coco file
train_data_dir = '/scratch/lt2316-h18-resources/coco/train2017'
val_data_dir = '/scratch/lt2316-h18-resources/coco/val2017'
#train_coco = 'my_data/my_train_coco.json'

# create own Dataset
my_train_dataset = myOwnDataset(root=train_data_dir,
                          coco = coco,
                          transforms=get_transform()
                          )
my_val_dataset = myOwnDataset(root=val_data_dir,
                          coco = coco, # será necessário criar uma coco da pasta de validation?
                          transforms=get_transform()
                          )

# collate_fn needs for batch
def collate_fn(batch):
    return tuple(zip(*batch))

# Batch size
train_batch_size = 1

# own DataLoader
dataloader_train = torch.utils.data.DataLoader(my_train_dataset,
                                          batch_size=train_batch_size,
                                          shuffle=True,
                                          num_workers=4,
                                          collate_fn=collate_fn)

dataloader_val = torch.utils.data.DataLoader(my_val_dataset,
                                          batch_size=train_batch_size,
                                          shuffle=True,
                                          num_workers=4,
                                          collate_fn=collate_fn)

In [None]:
torch.cuda.is_available()

In [None]:
# select device (whether GPU or CPU)
device = torch.device('cuda:2') if torch.cuda.is_available() else torch.device('cpu')

# DataLoader is iterable over Dataset
#for imgs, annotations in data_loader:
#    imgs = list(img.to(device) for img in imgs)
#    annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
    #print(annotations)

In [None]:
try:
    from timm import create_model
except:
    !pip install timm
    from timm import create_model

# Definição do modelo ViT
def create_vit_model():
    model = create_model('vit_base_patch16_224', pretrained=True)  # ViT model
    return model

vit = create_vit_model()

In [None]:
from torchvision.datasets import CocoDetection
coco_train = CocoDetection(root= train_data_dir, annFile='/scratch/lt2316-h18-resources/coco/annotations/instances_train2017.json', transform=None)
coco_val = CocoDetection(root=val_data_dir, annFile='/scratch/lt2316-h18-resources/coco/annotations/instances_val2017.json', transform=None)


In [None]:
train_loader = torch.utils.data.DataLoader(coco_train, batch_size=32, shuffle=True)
val_loader = torch.utils.data.DataLoader(coco_val, batch_size=32, shuffle=False)


# Training

In [None]:
# Defina a função de perda (loss function) e o otimizador
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(vit.parameters(), lr=0.001)

# Treinamento do modelo
num_epochs = 10

for epoch in range(num_epochs):
    vit.train()
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = vit(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    # Avaliação do modelo
    vit_model.eval()
    with torch.no_grad():
        for images, labels in val_loader:
            outputs = vit(images)