In [10]:
%load_ext autoreload
%autoreload 2
import torch
import zipfile
import torchvision
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from PIL import Image, ImageDraw, ImageFont
from ModelStructures import select_model
from math import floor, ceil

np.random.seed(1)
torch.manual_seed(1)
random.seed(1)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
# Read in Data
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
image_scenes = pd.read_csv('final_box_labels_grozi.csv')
image_scenes['image_id'] = pd.factorize(image_scenes['image_path'])[0] # Encode file to integer

# Create data sets
scene_ids = image_scenes['image_id'].unique()
scenes_copy = image_scenes.copy() # For filtering
random.Random(1).shuffle(scene_ids)
train_ids = scene_ids[:len(scene_ids)-80]
train_scenes = scenes_copy[scenes_copy['image_id'].isin(train_ids)]
val_scenes = scenes_copy[~scenes_copy['image_id'].isin(train_ids)]
print(len(train_scenes.index), len(val_scenes.index))

2363 305


In [12]:
# Custom image dataset
class DetectionImageDataset():
    def __init__(self, image_frame, transforms):
        self.image_frame = image_frame
        self.transforms = transforms        
        self.images = self.image_frame['image_path'].unique()
        
    def __getitem__(self, idx):
        img_path = self.images[idx]
        img = Image.open(img_path) # Open the image
        width, height = img.size # Get size of image
        
        image_labels = self.image_frame[self.image_frame['image_path'] == \
                                       img_path]
        
        # Get all the boxes and labels
        boxes = []
        labels = []
        for idx, row in image_labels.iterrows():
            x_min = row['bbox_x'] * width
            x_max = row['bbox_w'] * width
            y_min = row['bbox_y'] * height
            y_max = row['bbox_h'] * height
            boxes.append([x_min, y_min, x_max, y_max])
            labels.append(row['label'])
        
        image_id = image_labels['image_id'].unique()
        
        # Convert everything to tensors
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        iscrowd = torch.zeros((len(boxes),), dtype=torch.int64) # Assume all instances are not crowd
        image_id = torch.as_tensor(image_id)
        areas = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        
        # Generate targets dictionary
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['image_id'] = image_id
        target['area'] = areas
        target['iscrowd'] = iscrowd
        
        if self.transforms is not None:
            img, target = self.transforms(img, target)
        
        return img, target
        
    def __len__(self):
        return len(self.images)

In [13]:
# Import transforms helper
import transforms as T
import utils

def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    
    return T.Compose(transforms)

# Datasets
train_set = DetectionImageDataset(train_scenes, transforms=get_transform(True))
val_set = DetectionImageDataset(val_scenes, transforms=get_transform(False))

# DataLoaders
train_loader = torch.utils.data.DataLoader(train_set, batch_size=2, shuffle=True,
                                          pin_memory=True, num_workers=0,
                                          collate_fn=utils.collate_fn)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=1, shuffle=False,
                                          pin_memory=True, num_workers=0,
                                        collate_fn=utils.collate_fn)

In [14]:
# Unzip images
if not os.path.exists('./Testing'):
    test_zipped = zipfile.ZipFile('./Testing.zip', 'r')
    test_zipped.extractall()
    test_zipped.close()

In [15]:
# Load Model
n_param_g = 80
f_path = './Detection/Archive/ourweights=True_reqgrad=True_pretrain=True/plateau/'
m_dict = torch.load(f_path + 'AMATH563_Grozi_Detection_Best.pth', map_location=device)
loaded_model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False,
                                                                    progress=False,
                                                                    pretrained_backbone=False)
in_features = loaded_model.roi_heads.box_predictor.cls_score.in_features
loaded_model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(in_features, n_param_g + 1)
loaded_model.transform.max_size = 3264 # Update max size
loaded_model.to(device)
loaded_model.load_state_dict(m_dict)

# Evaluation Metrics
from engine import evaluate
evaluate(loaded_model, val_loader, device=device)

creating index...
index created!
Test:  [ 0/80]  eta: 0:00:57  loss: 1.4123 (1.4123)  model_time: 0.1058 (0.1058)  evaluator_time: 0.0072 (0.0072)  time: 0.7238  data: 0.4628  max mem: 770
Test:  [79/80]  eta: 0:00:00  loss: 0.3858 (0.5282)  model_time: 0.0971 (0.0970)  evaluator_time: 0.0036 (0.0041)  time: 0.5462  data: 0.3338  max mem: 770
Test: Total time: 0:00:44 (0.5501 s / it)
Averaged stats: loss: 0.3858 (0.5282)  model_time: 0.0971 (0.0970)  evaluator_time: 0.0036 (0.0041)
Accumulating evaluation results...
DONE (t=0.15s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.41248
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.64462
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.43571
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.00000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.00000
 Average Precision  (AP) @[ IoU=0

(<coco_eval.CocoEvaluator at 0x7fe097f962e0>,
 <utils.MetricLogger at 0x7fe0c0027d00>)

In [16]:
# Generate Test Scenarios!
if not os.path.exists('./TestingOutputs'):
    os.mkdir('./TestingOutputs')
    
import time

# Load the Grozi Mappings
grozi_mappings = pd.read_csv('./our_encoded_grozi_mappings.csv')[['label', 'label_encoded']]
grozi_mappings.drop_duplicates(inplace=True)
grozi_dict = dict(zip(grozi_mappings['label_encoded'].astype(str), grozi_mappings['label']))

font = ImageFont.truetype('AdobeVFPrototype.ttf', 36)

# device = 'cpu'
# loaded_model.to(device)
loaded_model.eval()
start = time.time()
with torch.no_grad():
    for i, item in enumerate(val_set):
        img, targets = val_set[i]
        img = img.to(device)
        prediction = loaded_model([img]) 
    end = time.time()
    mean_inf_time = round((end - start) / len(val_set), 4)
    print(mean_inf_time)
#         label_boxes = targets['boxes']
#         img = img.cpu()
#         image = Image.fromarray(img.mul(255).permute(1, 2,0).byte().numpy())
#         draw = ImageDraw.Draw(image)

#         for elem in range(len(label_boxes)):
#             draw.rectangle([(label_boxes[elem][0], label_boxes[elem][1]),
#                             (label_boxes[elem][2], label_boxes[elem][3])], 
#             outline ="green", width =3)

#         for element in range(len(prediction[0]["boxes"])):
#             boxes = prediction[0]["boxes"][element].cpu().numpy()
#             score = np.round(prediction[0]["scores"][element].cpu().numpy(),
#                                     decimals= 4)
#             label = prediction[0]['labels'][element].cpu().numpy()
#             lab_print = grozi_dict[str(label)]

#             if score > 0.5:
#                 draw.rectangle([(boxes[0], boxes[1]), (boxes[2], boxes[3])], 
#                                 outline ="red", width =3)
#                 label_text = str(score) + ', ' + lab_print
#                 draw.text((boxes[0], boxes[1]), text = label_text, font=font)   
                
#         image.save('./TestingOutputs/test_instance_' + str(i) + '.png')

0.5115
