https://github.com/miladlink/TinyYoloV2

# Libraries

In [1]:

# import os
from PIL import Image
import numpy as np
import json
import cv2
from tqdm import tqdm
# import skimage.io as io
# import matplotlib.pyplot as plt
from pycocotools.coco import COCO
import torch
import torchvision
from torchvision import transforms
import torchvision.transforms as transforms
from torchvision.datasets.coco import CocoDetection
from torch.utils.data import DataLoader

from utils.YOLOv2 import *
from models.YOLOv3 import load_model
from attacks.FGSM import FGSM
from attacks.PGD import PGD
from attacks.CW import CW
from detect import detect_image
from utils.loss import compute_loss
from utils.utils import load_classes, rescale_boxes, non_max_suppression, print_environment_info
from utils.augmentations import TRANSFORM_TRAIN, TRANSFORM_VAL
from utils.transforms import DEFAULT_TRANSFORMS, Resize, ResizeEval

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

# Model

In [3]:
modelv = 3
img_size=416

if modelv == 2:
    model = load_model_v2(weights = './weights/yolov2-tiny-voc.weights').to(device)
    class_names = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'TVmonitor'] 
    root_train = "./data/VOC2007/JPEGImages"
    annFile_train = "./data/VOC2007/annotations/train.json"
    root_val = "./data/VOC2007/JPEGImages"
    annFile_val = "./data/VOC2007/annotations/val.json"
    
elif modelv == 3:
    model = load_model("./config/yolov3.cfg", "./weights/yolov3.weights")
    class_names = ['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
    id_list = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90])
    root_train = "./data/COCO2017/train2017"
    annFile_train = "./data/COCO2017/annotations/instances_train2017_modified.json"
    root_val = "./data/COCO2017/val2017"
    annFile_val = "./data/COCO2017/annotations/instances_val2017_modified.json"
    
else:
    print("invalid model number!")

# COCO loader

create dataloader (make different train and val later)

In [4]:
# coco_dataset_train = CocoDetection(root=root_train, annFile=annFile_train, transform=TRANSFORM_TRAIN_IMG, target_transform=TRANSFORM_TRAIN_TARGET)
coco_dataset_val = CocoDetection(root=root_val, annFile=annFile_val, transforms=TRANSFORM_VAL)
coco_dataset_eval = CocoDetection(root=root_val, annFile=annFile_val, transform=transforms.Compose([transforms.ToTensor(),]))

def collate_fn(batch):
    return tuple(zip(*batch))

# Create a DataLoader for your COCO dataset
train_loader = DataLoader(coco_dataset_val, batch_size=4, shuffle=True, collate_fn=collate_fn) # multiple images per batch
val_loader = DataLoader(coco_dataset_val, batch_size=1, shuffle=True, collate_fn=collate_fn) # one per batch
cocoeval_loader = DataLoader(coco_dataset_eval, batch_size=1, shuffle=True, collate_fn=collate_fn) # original images without transformatios
# getOneIter(val_loader)

loading annotations into memory...
Done (t=0.26s)
creating index...
index created!
loading annotations into memory...
Done (t=0.26s)
creating index...
index created!


# helper functions


In [8]:
def xyxy2xywh(x):
    # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[..., 0] = (x[..., 0] + x[..., 2]) / 2  # x center
    y[..., 1] = (x[..., 1] + x[..., 3]) / 2  # y center
    y[..., 2] = x[..., 2] - x[..., 0]  # width
    y[..., 3] = x[..., 3] - x[..., 1]  # height
    return y

def xywh2xyxy(x):
    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[..., 0] = x[..., 0] - x[..., 2] / 2  # top left x
    y[..., 1] = x[..., 1] - x[..., 3] / 2  # top left y
    y[..., 2] = x[..., 0] + x[..., 2] / 2  # bottom right x
    y[..., 3] = x[..., 1] + x[..., 3] / 2  # bottom right y
    return y

def yolo2json(boxes, img_copy, image_id):
    # * put into coco format of x_min,y_min, width, height, bbox_conf, cls
    # yolo format is x_center, y_center, w, h, bbox_conf, cls_conf, cls
    predictions = []
    for box in boxes:
        x_center, y_center, w, h, conf, cls = box
        x_min = max(0, (x_center - w / 2) * img_copy.shape[3])
        y_min = max(0, (y_center - h / 2) * img_copy.shape[2])
        width = min(img_copy.shape[3], w * img_copy.shape[3])
        height = min(img_copy.shape[2], h * img_copy.shape[2])
        # print(x_min,y_min, width, height, bbox_conf, cls)
        predictions.append({
            'image_id': image_id,
            'category_id': int(id_list[int(cls)]) if modelv == 3 else int(cls),
            'bbox': [int(x_min), int(y_min), int(width), int(height)],
            'score': round(float(conf),2)
        })
    return predictions

def nms2yolo(boxes, img_copy):
    boxes = xyxy2xywh(boxes) # convert from coco to yolo: nms returns nx6 (x1, y1, x2, y2, conf, cls), change to center
    boxes[:,0] = boxes[:,0]/img_copy.shape[3]
    boxes[:,1] = boxes[:,1]/img_copy.shape[2]
    boxes[:,2] = boxes[:,2]/img_copy.shape[3]
    boxes[:,3] = boxes[:,3]/img_copy.shape[2]
    return boxes

def saveImageWithBoxes(images, boxes, class_names, fileName):  
    to_pil = transforms.ToPILImage() 
    pil_image = to_pil(images.squeeze())
    pred_img = plot_boxes(pil_image, boxes, None, class_names)
    pred_img.save(fileName)
    
def saveImage(img):
    # * just for sanity check, output image. put the dim 3 at the back
    imageN = img.clone().detach()
    imageN = imageN.cpu().squeeze().permute(1, 2, 0).numpy() 
    imageN = cv2.cvtColor(imageN, cv2.COLOR_RGB2BGR)
    # print(imageN.shape)
    cv2.imwrite("output/mygraph.jpg", imageN*255) 
    
def getOneIter(dataloader):
    images, annotations = next(iter(dataloader))
    np.set_printoptions(linewidth=500)
    np.set_printoptions(suppress=True)
    print("dataloader out")
    print(annotations[0].numpy())

# Attack

In [7]:
# attacker = FGSM(model=model, epsilon=0.1)
attacker = PGD(model=model, epsilon=0.1)
# attacker = CW(model=model, epsilon=0.1, lr=0.02, epoch=5, target=52) # 52 is banana

for i, (images, targets) in enumerate(tqdm(val_loader)):
    #* modify inputs to be in proper shape
    images = torch.stack(images) # images.shape is [n, 3, 416, 416] (even if n=1)
    images = images.to(device)
    for i, boxes in enumerate(targets): # targets is nx6, (image,class,x,y,w,h)
        if boxes.ndim == 2: boxes[:, 0] = i # change out image_id to id in batch to conform to compute_loss. this is normally done in ListDataset -> collate_fn
    targets = torch.cat(targets, 0).to(device) # from tuples to one tensor

    #* loss
    model.train()
    
    outputs = model(images)
    loss, loss_components = compute_loss(outputs, targets, model)
    print("before: ", loss)
    images_adv = attacker.forward(images, targets) # targets are what to avoid
    outputs = model(images_adv)
    loss, loss_components = compute_loss(outputs, targets, model)
    print("after: ", loss)
    
    #* plot
    model.eval()
    
    outputs = model(images[0].unsqueeze(0))
    boxes = non_max_suppression(outputs, conf_thres=0.3, iou_thres=0.5)[0].numpy()
    boxes = nms2yolo(boxes, images)
    saveImageWithBoxes(images[0], boxes, class_names, "./output/attack_before.jpg")
    
    outputs = model(images_adv[0].unsqueeze(0))
    boxes = non_max_suppression(outputs, conf_thres=0.3, iou_thres=0.5)[0].numpy()
    boxes = nms2yolo(boxes, images_adv)
    saveImageWithBoxes(images_adv[0], boxes, class_names, "./output/attack_after.jpg")
    
    break

  0%|          | 0/5000 [00:00<?, ?it/s]

torch.Size([3, 416, 416])
before:  tensor([0.0175], device='cuda:0', grad_fn=<AddBackward0>)


  0%|          | 0/5000 [00:00<?, ?it/s]

after:  tensor([0.2566], device='cuda:0', grad_fn=<AddBackward0>)





# Eval for loss

inference and output json file containing prediction

In [7]:
model.train()
for i, (images, targets) in enumerate(tqdm(train_loader)):
    images = torch.stack(images) # images.shape is [8, 3, 416, 416]
    images = images.to(device)
    for i, boxes in enumerate(targets): boxes[:, 0] = i # change out image_id to id in batch to conform to compute_loss. this is normally done in ListDataset -> collate_fn
    targets = torch.cat(targets, 0).to(device)
    
    outputs = model(images) # outputs in xywh yolo format
    print(outputs[0].shape) # [8, 3, 13, 13, 85] -> (batch_size x num_of_anchor_boxes x grid_size x grid_size x (5 + 80 classes))
    loss, loss_components = compute_loss(outputs, targets, model)
    print(loss)
    # loss.backward()
    break

  0%|          | 0/1250 [00:00<?, ?it/s]

torch.Size([4, 3, 13, 13, 85])
tensor([0.0394], device='cuda:0', grad_fn=<AddBackward0>)





# Eval for COCOeval

In [14]:
predictions = []
img_size=416

for i, data in enumerate(tqdm(cocoeval_loader)): # * ASSUME ONE IMAGE PER BATCH FOR NOW
    with torch.no_grad():
        images, targets = data
        # print(targets[0])
        if len(targets[0]) > 0:
            model.eval()
            
            # * getting necessary values
            # for target in targets[0]: print(f"id {target['image_id']}, class {target['category_id']}, bbox {target['bbox']}")
            image_id = targets[0][0]["image_id"]
            category_id = targets[0][0]["category_id"]
            img = images[0].unsqueeze(0).to(device)
            img_copy = img.clone().detach()
            # print(img.shape) # torch.Size([1, 3, 375, 500])
            
            # * turn target into 2d numpy matrix for loss calc
            # targetsMatrix = np.zeros((len(targets[0]), 6))
            # for i, d in enumerate(targets[0]):
            #     targetsMatrix[i, 0] = d['image_id']
            #     targetsMatrix[i, 1] = d['category_id']
            #     targetsMatrix[i, 2:6] = d['bbox']
            # np.set_printoptions(suppress=True)
            # print(targetsMatrix)

            saveImage(img) # sanity check
            
            # * put images into model and get bboxes
            if modelv == 2:
                resized_tensor_image = torch.nn.functional.interpolate(img, size=(416, 416), mode='bilinear', align_corners=False)
                boxes = filtered_boxes(model, resized_tensor_image, conf_thresh=0.5, nms_thresh=0.9, device=device) # * default 0.66, 0.55, higher conf -> more strict, higher nms -> more iou needed -> more strict
                boxes = np.delete(boxes, 5, axis=1) # x_center, y_center, w, h, bbox_conf, cls_conf, cls -> delete cls_conf
            elif modelv == 3:
                img = img.cpu().squeeze().permute(1, 2, 0).numpy()
                img = transforms.Compose([DEFAULT_TRANSFORMS, ResizeEval(img_size)])((img, np.zeros((1, 5))))[0].unsqueeze(0).to(device)
                boxes = model(img)
                # print(boxes.shape, img_size)
                boxes = non_max_suppression(boxes, conf_thres=0.3, iou_thres=0.5)[0].numpy()
                # print(boxes)
                boxes = rescale_boxes(boxes, img_size, img_copy.shape[2:]) # rescale back to original proportions?
                boxes = nms2yolo(boxes, img_copy)
            else: 
                print("invalid model num!")
                
            saveImageWithBoxes(img_copy, boxes, class_names, "./output/path_to_save_image.jpg") # for sanity check
            predictions += yolo2json(boxes, img_copy, image_id)
            
        else:  continue # pics without labels
    break

with open(f'./data/results/v{modelv}predictions.json', 'w') as f:
    json.dump(predictions, f)

  0%|          | 0/5000 [00:00<?, ?it/s]


# Compare gt and prediction


In [None]:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
        
coco_gld = COCO(annFile_val) # coco
if modelv == 2:
    coco_rst = coco_gld.loadRes('./data/results/v2predictions.json')
elif modelv == 3:
    coco_rst = coco_gld.loadRes('./data/results/v3predictions.json')
cocoEval = COCOeval(coco_gld, coco_rst, iouType='bbox')
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()

loading annotations into memory...
Done (t=0.16s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.28s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=10.97s).
Accumulating evaluation results...
DONE (t=1.66s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.328
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.560
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.346
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.141
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.360
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.508
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.266
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.372
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDet