## Will Help a lot when trying to inference from the trained model

In [None]:
import time, os, copy, argparse, collections, sys, numpy as np

import torch, torch.nn as nn, torch.optim as optim, torchvision
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torchvision import datasets, models, transforms

import model
from anchors import Anchors
from datagen import CSVDataset, collater, Resizer, AspectRatioBasedSampler, Augmenter, UnNormalizer, Normalizer
from torch.utils.data import Dataset, DataLoader

import csv_eval
import cv2
import matplotlib.pyplot as plt

assert torch.__version__.split('.')[1] == '4'
%matplotlib inline

In [None]:
torch.__version__
device = torch.device("cuda")

In [None]:
# If loading the full model as in given drive link in repo
retinanet = torch.load("new_ckpts/50_retinanet_7.pt")
retinanet.to(device)

In [None]:
# If want to load a state_dict from the training scenario in new_train.py

# checkpoint = torch.load("models/50_retinanet_2.pt")
# retinanet = model.resnet50(num_classes=dataset_test.num_classes(), pretrained=True)
# retinanet.load_state_dict(checkpoint['model_state_dict'])
# retinanet.to(device)

In [None]:
retinanet.eval()

# Inference on Test set Images

In [None]:
# Inference on Test set Images

unnormalize = UnNormalizer()

def draw_caption(image, box, caption):
    b = np.array(box).astype(int)
    cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5, color=(0, 0, 255), thickness=2)


def draw_from_images(dataloader_test):
    COLORS = np.random.uniform(0, 255, size=(dataset_test.num_classes(), 3))
    res = dict((v,k) for k,v in dataset_test.labels.items())
    for idx, data in enumerate(dataloader_test):
        if(idx > 20):
            break
        with torch.no_grad():
            st = time.time()
            print(data['img'].shape)
            scores, classification, transformed_anchors = retinanet(data['img'].cuda().float())
            print('Elapsed time: {}'.format(time.time()-st))
            print(type(data['img'].cuda().float()))
            print(data['img'].shape)
            print(data['img'].cuda().float().shape)

            idxs = np.where(scores>0.5)
            img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

            img[img<0] = 0
            img[img>255] = 255
            img = np.transpose(img, (1, 2, 0))
    #         print(img.shape)

            img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)
    #         print(img.shape)
    #         img = img.astype(np.uint8)

            for j in range(idxs[0].shape[0]):
                bbox = transformed_anchors[idxs[0][j], :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                label_name = dataset_test.labels[int(classification[idxs[0][j]])]
                color = COLORS[res[label_name]]

                draw_caption(img, (x1, y1, x2, y2), label_name)
                cv2.rectangle(img, (x1, y1), (x2, y2), color, thickness=2)

            fig = plt.figure(figsize=(13.66,768))
            img = img[:,:,::-1]
            plt.imshow(img)
            plt.show()


In [None]:
dataset_test = CSVDataset(train_file="data/test/test_annot.csv", class_list="data/class_ids.txt", transform=transforms.Compose([Normalizer(), Resizer()]))

dataloader_test = DataLoader(dataset_test, num_workers=2, collate_fn=collater)

draw_from_images(dataloader_test)

In [None]:
def resizer(image, min_side=608, max_side=1024):
    rows, cols, cns = image.shape
    smallest_side = min(rows, cols)

    # rescale the image so the smallest side is min_side
    scale = min_side / smallest_side

    # check if the largest side is now greater than max_side, which can happen
    # when images have a large aspect ratio
    largest_side = max(rows, cols)

    if largest_side * scale > max_side:
        scale = max_side / largest_side
    
    image = cv2.resize(image, (int(round(cols*scale)), int(round((rows*scale)))))
    rows, cols, cns = image.shape

    pad_w = 32 - rows%32
    pad_h = 32 - cols%32
    new_image = np.zeros((rows + pad_w, cols + pad_h, cns)).astype(np.uint8)
    new_image[:rows, :cols, :] = image.astype(np.uint8)
    return new_image

normalize = transforms.Normalize(
   mean=[0.485, 0.456, 0.406],
   std=[0.229, 0.224, 0.225]
)

transform=transforms.Compose([transforms.ToPILImage(), transforms.ToTensor(), normalize])
def camera(file, SaveVideo=1):
    COLORS = np.random.uniform(150, 255, size=(dataset_test.num_classes(), 3))
    res = dict((v,k) for k,v in dataset_test.labels.items())
    assert os.path.isfile(file), \
    'file {} does not exist'.format(file)
        
    camera = cv2.VideoCapture(file)
        
    assert camera.isOpened(), \
    'Cannot capture source'
    
    _, frame = camera.read()
    frame = resizer(frame)
    height, width, _ = frame.shape
    print(width, height)

    if SaveVideo:
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        fps = round(camera.get(cv2.CAP_PROP_FPS))
        videoWriter = cv2.VideoWriter(
            'converted_{}.mp4'.format(str(file.split("/")[-1].split(".")[0])), 
            fourcc, fps, (width, height))

    elapsed = int()
    start = time.time()
    
    with torch.no_grad():
        while camera.isOpened():
            elapsed += 1
            _, frame = camera.read()
            
            if frame is None:
                print ('\nEnd of Video')
                break
            img_tensor = transform(cv2.resize(frame,(width, height)))
#             print(img_tensor.shape)
            img_tensor.unsqueeze_(0)
            
            scores, classification, transformed_anchors = retinanet(img_tensor.cuda().float())

            idxs = np.where(scores>0.5)
            img = np.array(255 * unnormalize(img_tensor[0, :, :, :])).copy()

            img[img<0] = 0
            img[img>255] = 255

            img = np.transpose(img, (1, 2, 0))
            img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

            for j in range(idxs[0].shape[0]):
                bbox = transformed_anchors[idxs[0][j], :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                label_name = dataset_test.labels[int(classification[idxs[0][j]])]
                color = COLORS[res[label_name]]
                draw_caption(img, (x1, y1, x2, y2), label_name)
                cv2.rectangle(img, (x1, y1), (x2, y2), color, thickness=2)
    
            if SaveVideo:
                videoWriter.write(img)
            
            if elapsed % 5 == 0:
                sys.stdout.write('\r')
                sys.stdout.write('{0:3.3f} FPS'.format(
                    elapsed / (time.time() - start)))
                sys.stdout.flush()

    sys.stdout.write('\n')
    if SaveVideo:
        videoWriter.release()
    camera.release()


In [None]:
camera("test_ads/abcd.mp4")

# Code Testing Space

In [None]:
# torch.save(retinanet.state_dict(), "state_dict/retinanet_state_dict.pth")
normalize = transforms.Normalize(
   mean=[0.485, 0.456, 0.406],
   std=[0.229, 0.224, 0.225]
)
transform=transforms.Compose([transforms.ToPILImage(), transforms.ToTensor(), normalize])

import skimage
img = skimage.io.imread("test_im/birthday.jpg")
print(img.shape)
img1 = cv2.imread("test_im/birthday.jpg")
img1
# img1 = resizer(img1)
# fr = transform(img1)
# print(fr.shape)

In [None]:
# print(a)
print(a.shape)
b = a.squeeze_(0)
b = torch.transpose(b, 1, 2)
b = torch.transpose(b, 0, 1)
print(b.shape)