## Data Format :
train 

 |--images

 |--labelTxt


During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing:

- boxes (FloatTensor[N, 4]): the ground-truth boxes in [x1, y1, x2, y2] format, with 0 <= x1 < x2 <= W and 0 <= y1 < y2 <= H.
     
     **I think here [x1, y1, x2, y2] is [x_min, y_min, x_max, y_max]**

- labels (Int64Tensor[N]): the class label for each ground-truth box


#STEP1 : Read images as in tensor form

In [26]:
import torch
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from PIL import Image
from xml.dom.minidom import parse
%matplotlib inline

In [55]:
class MarkDataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        # load all image files, sorting them to ensure that they are aligned
        self.imgs = list(sorted(os.listdir(os.path.join(root, "images"))))
        self.bbox_xml = list(sorted(os.listdir(os.path.join(root, "Annotations_new"))))
 
    def __getitem__(self, idx):
        # load images and bbox
        img_path = os.path.join(self.root, "images", self.imgs[idx])
        bbox_xml_path = os.path.join(self.root, "Annotations_new", self.bbox_xml[idx])
        img = Image.open(img_path).convert("RGB")        
        
        # Read file, VOC format dataset label is xml format file
        dom = parse(bbox_xml_path)
        # Get Document Element Object
        data = dom.documentElement
        # Get objects
        objects = data.getElementsByTagName('object')        
        # get bounding box coordinates
        boxes = []
        labels = []
        label_codes = {'plane': 15, 'baseball-diamond' :1, 'bridge':2, 'ground-track-field':3, 'small-vehicle':4, 'large-vehicle':5, 'ship':6, 'tennis-court':7,
                      'basketball-court':8, 'storage-tank':9,  'soccer-ball-field':10, 'roundabout':11, 'harbor':12, 'swimming-pool':13, 'helicopter':14}
        for object_ in objects:
            # Get the contents of the label
            name = object_.getElementsByTagName('name')[0].childNodes[0].nodeValue  # Is label, mark_type_1 or mark_type_2
            labels.append(label_codes[name] ) # Background label is 0, mark_type_1 and mark_type_2 labels are 1 and 2, respectively
            
            bndbox = object_.getElementsByTagName('bndbox')[0]
            xmin = np.float(bndbox.getElementsByTagName('x1')[0].childNodes[0].nodeValue)
            ymin = np.float(bndbox.getElementsByTagName('y1')[0].childNodes[0].nodeValue)
            xmax = np.float(bndbox.getElementsByTagName('x2')[0].childNodes[0].nodeValue)
            ymax = np.float(bndbox.getElementsByTagName('y2')[0].childNodes[0].nodeValue)
            boxes.append([xmin, ymin, xmax, ymax])     
 
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        
        labels = torch.as_tensor(labels, dtype=torch.int64)        
 
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((len(objects),), dtype=torch.int64)
 
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        # Since you are training a target detection network, there is no target [masks] = masks in the tutorial
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd
 
        #if self.transforms is not None:
            # Note that target (including bbox) is also transformed\enhanced here, which is different from transforms from torchvision import
            # Https://github.com/pytorch/vision/tree/master/references/detectionOfTransforms.pyThere are examples of target transformations when RandomHorizontalFlip
            #img, target = self.transforms(img, target)
 
        return img, target
 
    def __len__(self):
        return len(self.imgs)

In [71]:
root = r'/content/drive/MyDrive/Dotadatasetsample/train/'

In [72]:
num_classes = 16

In [99]:
dataset = MarkDataset(root)

In [100]:
len(dataset)

13

In [101]:
dataset[0]

(<PIL.Image.Image image mode=RGB size=693x890 at 0x7F7D095478D0>,
 {'area': tensor([  1015.,    864.,    992.,   1254.,   1056.,   1224.,   1225.,   1296.,
             812., 153652.]),
  'boxes': tensor([[410., 722., 445., 751.],
          [434., 850., 466., 877.],
          [284., 557., 315., 589.],
          [387., 445., 420., 483.],
          [234., 402., 266., 435.],
          [433., 343., 467., 379.],
          [516., 286., 551., 321.],
          [498., 261., 534., 297.],
          [447., 222., 475., 251.],
          [126., 227., 485., 655.]]),
  'image_id': tensor([0]),
  'iscrowd': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
  'labels': tensor([ 4,  4,  4,  4,  4,  4,  4,  4,  4, 11])})

In [102]:
dataset_test = MarkDataset(root )

In [103]:
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=2, shuffle=True, # num_workers=4,
    collate_fn=utils.collate_fn)

In [104]:
data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=2, shuffle=False, # num_workers=4,
    collate_fn=utils.collate_fn)

In [107]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [105]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 
      
def get_object_detection_model(num_classes):
    # load an object detection model pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    
    # replace the classifier with a new one, that has num_classes which is user-defined
    num_classes = 16  # 16 class (plane ,......,helicopter) + background
 
    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
 
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
 
    return model

In [108]:
# get the model using our helper function
model = get_object_detection_model(num_classes = 16)

In [None]:
# move model to the right device
model.to(device)

In [110]:
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]

# SGD
optimizer = torch.optim.SGD(params, lr=0.0003,
                            momentum=0.9, weight_decay=0.0005)


In [111]:
# and a learning rate scheduler
# cos learning rate
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=1, T_mult=2)

# let's train it for   epochs
num_epochs = 2

In [112]:
cd '/content/drive/MyDrive/fasterRCNN/'

/content/drive/MyDrive/fasterRCNN


In [113]:
from engine import train_one_epoch, evaluate

In [114]:
for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    # Engine.pyTrain_ofOne_The epoch function takes both images and targets. to(device)
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=50)

    # update the learning rate
    lr_scheduler.step()

    # evaluate on the test dataset    
    evaluate(model, data_loader_test, device=device)    
    
    print('')
    print('==================================================')
    print('')

AttributeError: ignored

#adversarial attack

In [None]:
!pip install adversarial-robustness-toolbox

In [116]:
from art.attacks.evasion import FastGradientMethod
from art.estimators.object_detection import PyTorchFasterRCNN 
from art.estimators.classification import PyTorchClassifier

In [117]:
detector = PyTorchFasterRCNN(model = model)

In [118]:
attack = FastGradientMethod(estimator=detector, eps=0.2)

In [128]:
x_test_adv = attack.generate()

AttributeError: ignored