In [1]:
!pip install PyDrive



In [3]:
import os
import torch
import torch.utils.data
import torchvision
from PIL import Image
from pycocotools.coco import COCO
import numpy as np
import cv2
import time

In [4]:
class myOwnDataset(torch.utils.data.Dataset):
    def __init__(self, root, annotation, transforms=None):
        self.root = root
        self.transforms = transforms
        self.coco = COCO(annotation)
        self.ids = list(sorted(self.coco.imgs.keys()))
        print(self.ids)

    def __getitem__(self, index):
        # Own coco file
        coco = self.coco
        # Image ID
        img_id = self.ids[index]
        print("img_id" , img_id)
        # List: get annotation id from coco
        ann_ids = coco.getAnnIds(imgIds=img_id)
        # Dictionary: target coco_annotation file for an image
        coco_annotation = coco.loadAnns(ann_ids)
        # path for input image
        path = coco.loadImgs(img_id)[0]['file_name']
        #path = coco.loadImgs(img_id)[0]['/content/sample_data/my_data']
        # open the input image
        img = Image.open(os.path.join(self.root, path))
        print(path)
        # number of objects in the image
        num_objs = len(coco_annotation)

        # Bounding boxes for objects
        # In coco format, bbox = [xmin, ymin, width, height]
        # In pytorch, the input should be [xmin, ymin, xmax, ymax]
        boxes = []
        for i in range(num_objs):
            xmin = coco_annotation[i]['bbox'][0]
            ymin = coco_annotation[i]['bbox'][1]
            xmax = xmin + coco_annotation[i]['bbox'][2]
            ymax = ymin + coco_annotation[i]['bbox'][3]
            boxes.append([xmin, ymin, xmax, ymax])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # Labels (In my case, I only one class: target class or background)
        labels = torch.ones((num_objs,), dtype=torch.int64)
        # Tensorise img_id
        img_id = torch.tensor([img_id])
        print("img_id=" , img_id)
        # Size of bbox (Rectangular)
        areas = []
        for i in range(num_objs):
            areas.append(coco_annotation[i]['area'])
        areas = torch.as_tensor(areas, dtype=torch.float32)
        # Iscrowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        # Annotation is in dictionary format
        my_annotation = {}
        my_annotation["boxes"] = boxes
        my_annotation["labels"] = labels
        my_annotation["image_id"] = img_id
        my_annotation["area"] = areas
        my_annotation["iscrowd"] = iscrowd

        if self.transforms is not None:
            img = self.transforms(img)
            #img, my_annotation = self.transforms(img, my_annotation)

        return img, my_annotation 

    def __len__(self):
        return len(self.ids)

In [5]:
# In my case, just added ToTensor
def get_transform():
    custom_transforms = []
    custom_transforms.append(torchvision.transforms.ToTensor())
    return torchvision.transforms.Compose(custom_transforms)


In [6]:
%%shell

# Download TorchVision repo to use some files from
# references/detection
git clone https://github.com/pytorch/vision.git
cd vision
git checkout v0.3.0

cp references/detection/utils.py ../
cp references/detection/transforms.py ../
cp references/detection/coco_eval.py ../
cp references/detection/engine.py ../
cp references/detection/coco_utils.py ../

fatal: destination path 'vision' already exists and is not an empty directory.
HEAD is now at be37608 version check against PyTorch's CUDA version




In [7]:
# path to your own data and coco file
train_data_dir = '/content/sample_data/my_data'
train_coco = '/content/sample_data/json_data/instances_default.json'


In [8]:
with open(train_coco) as json_file:
    lines = json_file.readlines()
for line in lines:
    print(line)

{"licenses": [{"name": "", "id": 0, "url": ""}], "info": {"contributor": "", "date_created": "", "description": "", "url": "", "version": "", "year": ""}, "categories": [{"id": 1, "name": "Facemask", "supercategory": ""}, {"id": 2, "name": "Face", "supercategory": ""}], "images": [{"id": 1, "width": 1024, "height": 600, "file_name": "pics_9_93.jpg", "license": 0, "flickr_url": "", "coco_url": "", "date_captured": 0}, {"id": 2, "width": 1024, "height": 600, "file_name": "pics_9_91.jpg", "license": 0, "flickr_url": "", "coco_url": "", "date_captured": 0}, {"id": 3, "width": 1024, "height": 600, "file_name": "pics_9_85.jpg", "license": 0, "flickr_url": "", "coco_url": "", "date_captured": 0}, {"id": 4, "width": 1024, "height": 600, "file_name": "pics_9_84.jpg", "license": 0, "flickr_url": "", "coco_url": "", "date_captured": 0}, {"id": 5, "width": 1024, "height": 600, "file_name": "pics_9_8.jpg", "license": 0, "flickr_url": "", "coco_url": "", "date_captured": 0}, {"id": 6, "width": 1024,

In [9]:
import torchvision.datasets as dset
my_dataset = dset.CocoDetection(root = train_data_dir,
                                annFile = train_coco,
                                transforms=get_transform())

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


In [35]:
# collate_fn needs for batch
def collate_fn(batch):
    return tuple(zip(*batch))

# Batch size
train_batch_size = 1
print('Number of samples: ', len(my_dataset))
my_dataset.root
my_dataset.coco.cats

Number of samples:  325


{1: {'id': 1, 'name': 'Facemask', 'supercategory': ''},
 2: {'id': 2, 'name': 'Face', 'supercategory': ''}}

In [38]:
#img, target-coco_train[0]
#print (img.size)
#(640, 480)

In [10]:
# create own Dataset
my_dataset = myOwnDataset(root=train_data_dir,
                          annotation=train_coco,
                          transforms=get_transform()
                          )

# collate_fn needs for batch
def collate_fn(batch):
    return tuple(zip(*batch))

# Batch size
train_batch_size = 1
print('Number of samples: ', len(my_dataset))
my_dataset.root
my_dataset.coco.cats



loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205,

{1: {'id': 1, 'name': 'Facemask', 'supercategory': ''},
 2: {'id': 2, 'name': 'Face', 'supercategory': ''}}

In [11]:
my_dataset[0]

img_id 1
pics_9_93.jpg
img_id= tensor([1])


(tensor([[[0.3608, 0.4314, 0.4157,  ..., 0.4941, 0.5020, 0.5216],
          [0.3843, 0.3922, 0.3490,  ..., 0.4941, 0.5059, 0.5216],
          [0.3882, 0.3686, 0.4314,  ..., 0.4941, 0.5059, 0.5216],
          ...,
          [0.4157, 0.4157, 0.4157,  ..., 0.5294, 0.5451, 0.5529],
          [0.4157, 0.4157, 0.4157,  ..., 0.5294, 0.5451, 0.5529],
          [0.4157, 0.4157, 0.4157,  ..., 0.5294, 0.5451, 0.5529]],
 
         [[0.3647, 0.4353, 0.4196,  ..., 0.4784, 0.4863, 0.5059],
          [0.3882, 0.3961, 0.3529,  ..., 0.4784, 0.4902, 0.5059],
          [0.3922, 0.3725, 0.4353,  ..., 0.4784, 0.4902, 0.5059],
          ...,
          [0.4157, 0.4157, 0.4157,  ..., 0.5176, 0.5333, 0.5412],
          [0.4157, 0.4157, 0.4157,  ..., 0.5176, 0.5333, 0.5412],
          [0.4157, 0.4157, 0.4157,  ..., 0.5176, 0.5333, 0.5412]],
 
         [[0.3451, 0.4157, 0.4000,  ..., 0.4353, 0.4431, 0.4627],
          [0.3686, 0.3765, 0.3333,  ..., 0.4353, 0.4471, 0.4627],
          [0.3725, 0.3529, 0.4157,  ...,

In [12]:
# split the dataset in train and test set - using 80% for training, 20% for validation
indices = torch.randperm(len(my_dataset)).tolist()
train_dataset = torch.utils.data.Subset(my_dataset, indices[:-50])
valid_dataset = torch.utils.data.Subset(my_dataset, indices[-50:])

In [13]:
# # define training and validation data loaders
data_loader_train = torch.utils.data.DataLoader(train_dataset,
                                          batch_size=train_batch_size,
                                          shuffle=True,
                                          num_workers=4,
                                          collate_fn=collate_fn)

In [14]:
# own DataLoader
data_loader_val = torch.utils.data.DataLoader(valid_dataset,
                                          batch_size=train_batch_size,
                                          shuffle=True,
                                          num_workers=4,
                                          collate_fn=collate_fn)

In [15]:
len(data_loader_val)

50

In [16]:
len(data_loader_train)

275

In [17]:
# select device (whether GPU or CPU)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [18]:
# select device (whether GPU or CPU)
#device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# DataLoader is iterable over Dataset
for imgs, annotations in data_loader_train:
    imgs, annotations = next(iter(data_loader_train))
    imgs = list(img.to(device) for img in imgs)
    annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
    print(annotations)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
images_3_245.jpg
img_id 10
img_id 35
img_id= tensor([100])
pics_9_600.jpg
img_id= tensor([277])
pics_9_529.jpg
img_id= tensor([10])
img_id= tensor([35])
img_id 180
pics_1_161.jpg
img_id= tensor([180])
img_id 248
images_5_22.jpg
img_id 310
images_1_157.jpg
img_id= tensor([310])
img_id= tensor([248])
[{'boxes': tensor([[643.8000, 346.6000, 702.9000, 422.9000]], device='cuda:0'), 'labels': tensor([1], device='cuda:0'), 'image_id': tensor([277], device='cuda:0'), 'area': tensor([4509.3301], device='cuda:0'), 'iscrowd': tensor([0], device='cuda:0')}]
img_id 158
pics_2_123.jpg
img_id= tensor([158])
img_id 220
output_1_153.jpg
img_id 125
img_id= tensor([220])
img_id 123
pics_2_311.jpg
pics_2_314.jpg
img_id 139
img_id= tensor([125])
img_id= tensor([123])
pics_2_250.jpg
img_id= tensor([139])
img_id 151
pics_2_155.jpg
img_id 205
img_id= tensor([151])
pics_1_109.jpg
img_id= tensor([205])
[{'boxes': tensor([[378.3600, 201.0900, 405.6

In [19]:
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# select device (whether GPU or CPU)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
torch.cuda.empty_cache()

def get_model_instance_segmentation(num_classes):
    # load an instance segmentation model pre-trained pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False)
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model
    

# 2 classes; Only target class or background
num_classes = 2
#num_classes = 7
num_epochs = 10
model = get_model_instance_segmentation(num_classes)

# move model to the right device
model.to(device)

Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth


HBox(children=(FloatProgress(value=0.0, max=102502400.0), HTML(value='')))




FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256)
          (relu): ReLU(inplace=True)
          (downsample)

In [20]:
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

In [21]:
# let's train it for 10 epochs
from engine import train_one_epoch, evaluate
import utils
import transforms as T

num_epochs = 10

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader_train, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)

img_id 149
img_id 90
pics_2_171.jpg
img_id 130
img_id 57
img_id= tensor([149])
pics_2_270.jpg
pics_7_152.jpg
img_id= tensor([57])
pics_5_227.jpg
img_id= tensor([90])
img_id= tensor([130])
img_id 82
img_id 68
pics_5_234.jpg
img_id 296
img_id 238
img_id= tensor([82])
pics_5_374.jpg
images_5_272.jpg
images_1_34.jpg
img_id= tensor([238])
img_id= tensor([68])
img_id= tensor([296])
img_id 283
images_3_18.jpg
img_id= tensor([283])


	nonzero()
Consider using one of the following signatures instead:
	nonzero(*, bool as_tuple) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:766.)
  keep = keep.nonzero().squeeze(1)


img_id 164
pics_2_103.jpg
img_id= tensor([164])
Epoch: [0]  [  0/275]  eta: 0:09:30  lr: 0.000023  loss: 1.3683 (1.3683)  loss_classifier: 0.6542 (0.6542)  loss_box_reg: 0.0000 (0.0000)  loss_objectness: 0.6923 (0.6923)  loss_rpn_box_reg: 0.0217 (0.0217)  time: 2.0752  data: 0.2980  max mem: 1404
img_id 150
pics_2_166.jpg
img_id= tensor([150])
img_id 45
pics_9_491.jpg
img_id= tensor([45])


ValueError: ignored

In [11]:
# parameters
params = [p for p in model.parameters() if p.requires_grad]
#optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

len_dataloader_train = len(data_loader_train)

itr = 1
total_train_loss = []
        
for epoch in range(num_epochs):
    start_time = time.time()
    train_loss = []
   # model.train()
   # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    #for imgs, annotations, image_ids in data_loader_train:
      #  i += 1
        imgs = list(img.to(device) for img in imgs)
        annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
        #Forward propagation
        loss_dict = model(imgs, annotations)
        losses = sum(loss for loss in loss_dict.values())
        #Reseting Gradients
        optimizer.zero_grad()
        #backward propagation
        losses.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
        optimizer.step()
        
        itr += 1

        print(f'Iteration: {i}/{len_dataloader_train}, Loss: {losses}')
        epoch_train_loss = np.mean(train_loss)
        total_train_loss.append(epoch_train_loss)
        print(f'Epoch train loss is {epoch_train_loss:.4f}')
        time_elapsed = time.time() - start_time
        print("Time elapsed: ",time_elapsed)
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': epoch_train_loss
            }, "checkpoint.pth")

IndentationError: ignored

In [12]:
# parameters
params = [p for p in model.parameters() if p.requires_grad]
#optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

len_dataloader_train = len(data_loader_train)

itr = 1
total_train_loss = []
        
for epoch in range(num_epochs):
    start_time = time.time()
    train_loss = []
   # model.train()
   # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    
        
        

NameError: ignored

In [None]:
itr = 1
total_valid_loss = []
        
for epoch in range(num_epochs):
    start_time = time.time()
    valid_loss = []
   # model.train()
   
    for imgs, annotations in data_loader_valid:
      #  i += 1
        imgs = list(img.to(device) for img in imgs)
        annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
        #Forward propagation
        loss_dict = model(imgs, annotations)
        losses = sum(loss for loss in loss_dict.values())
        #Reseting Gradients
        optimizer.zero_grad()
        #backward propagation
        losses.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
        optimizer.step()
        
        itr += 1

        print(f'Iteration: {i}/{len_dataloader_valid}, Loss: {losses}')
        epoch_valid_loss = np.mean(valid_loss)
        total_valid_loss.append(epoch_valid_loss)
        print(f'Epoch train loss is {epoch_valid_loss:.4f}')
        time_elapsed = time.time() - start_time
        print("Time elapsed: ",time_elapsed)
        