In [1]:
import pandas as pd
import numpy as np
import cv2
import os
import re

from PIL import Image
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torch
import torchvision

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import SequentialSampler
from torchvision.transforms import functional as F

from matplotlib import pyplot as plt
import scipy.io as sio

In [2]:
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

In [3]:
DIR_INPUT = '/Users/susanthdasari/OneDrive - Georgia State University/Research/Face Detection/Data'
DIR_TRAIN_IMG = f'{DIR_INPUT}/WIDER_train/images'
DIR_TRAIN_LABELS = f'{DIR_INPUT}/wider_face_split'

In [4]:
wider_raw = sio.loadmat(f'{DIR_TRAIN_LABELS}/wider_face_train.mat')

In [5]:
def imshow(img, bboxes=None):
    fig, ax = plt.subplots(figsize=(12, 12))
    ax.imshow(img, aspect='equal')
    
    if bboxes is not None:
        for bbox in bboxes:
            ax.add_patch(
                plt.Rectangle((bbox[0], bbox[1]),
                              bbox[2] - bbox[0],
                              bbox[3] - bbox[1], fill=False,
                              edgecolor='red', linewidth=3.5)
                )

    plt.axis('off')
    plt.tight_layout()
    plt.draw()
    plt.show()

In [6]:
# wider_img = []
# wider_target = []
# event_list = wider_raw.get('event_list')
# file_list = wider_raw.get('file_list')
# face_bbx_list = wider_raw.get('face_bbx_list')
# img_count=0
# for event_idx, event in enumerate(event_list):
#     directory = event[0][0]
#     for im_idx, im in enumerate(file_list[event_idx][0]):
#         im_name = im[0][0]
#         face_bbx = face_bbx_list[event_idx][0][im_idx][0]
#         #  print face_bbx.shape
        
#         bboxes = []

#         for i in range(face_bbx.shape[0]):
#             xmin = int(face_bbx[i][0])
#             ymin = int(face_bbx[i][1])
#             xmax = int(face_bbx[i][2]) + xmin
#             ymax = int(face_bbx[i][3]) + ymin
#             bboxes.append((xmin, ymin, xmax, ymax))

#         image_name =os.path.join(DIR_TRAIN_IMG, directory,
#                    im_name + '.jpg')
# #         print(im_name)
#         image = cv2.imread(image_name, cv2.IMREAD_COLOR)
#         image = image[:, :, (2, 1, 0)]

#         image = image.astype(np.float32)

#         image /= 255.0

#         wider_img.append(image)
#         wider_target.append(bboxes)
        
# #         imshow(im, bboxes)
        
#         img_count += 1
#         if img_count >= 10: break
#     if img_count >= 10: break

In [7]:
wider_img_list = []
wider_bboxes = []
event_list = wider_raw.get('event_list')
file_list = wider_raw.get('file_list')
face_bbx_list = wider_raw.get('face_bbx_list')
img_count=0
for event_idx, event in enumerate(event_list):
    directory = event[0][0]
    for im_idx, im in enumerate(file_list[event_idx][0]):
        im_name = im[0][0]
        face_bbx = face_bbx_list[event_idx][0][im_idx][0]
        #  print face_bbx.shape
        
        bboxes = []

        for i in range(face_bbx.shape[0]):
            xmin = int(face_bbx[i][0])
            ymin = int(face_bbx[i][1])
            xmax = int(face_bbx[i][2]) + xmin
            ymax = int(face_bbx[i][3]) + ymin
            bboxes.append((xmin, ymin, xmax, ymax))

        image_name =os.path.join(DIR_TRAIN_IMG, directory,
                   im_name + '.jpg')
#         print(im_name)
        wider_img_list.append(image_name)
        wider_bboxes.append(bboxes)
        
#         imshow(im, bboxes)
        
        if img_count >= 10: break
        img_count += 1
    if img_count >= 10: break

In [8]:
class WiderDataset(object):
    """
    Build a wider parser
    Parameters
    ----------
    image_list : path of the label file
    bboxes_list : path of the image files
    transforms : Any pytorch transforms needs to be performed
    Returns
    -------
    a wider parser
    """
    def __init__(self, image_list, bboxes_list, transforms=None):

        self.transforms = transforms
        self.image_list = image_list
        self.bboxes_list = bboxes_list


    def __getitem__(self, idx):

        image_name = self.image_list[idx]
        boxes = self.bboxes_list[idx]

        im = cv2.imread(image_name, cv2.IMREAD_COLOR)
        im = im[:, :, (2, 1, 0)]
        im = im.astype(np.float32)
        im /= 255.0
        
#         im = F.to_tensor(im)

        num_objs = len(boxes)

#         imshow(im, boxes)

    
        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        area = torch.as_tensor(area, dtype=torch.float32)
        
        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)
        image_id = torch.tensor([idx])
        
        # suppose all instances are not crowd
        iscrowd = torch.zeros((len(boxes),), dtype=torch.int64)
#         imshow(im, boxes)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target['area'] = area
        target['iscrowd'] = iscrowd

        if self.transforms is not None:
            im, target = self.transforms(im, target)

        return im, target

    def __len__(self):
        return len(self.image_list)


In [9]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [10]:
def collate_fn(batch):
    return tuple(zip(*batch))

dataset = WiderDataset(wider_img_list, wider_bboxes)
data_loader = DataLoader(dataset,
    batch_size=2,
    shuffle=True,
#     num_workers=4,
    collate_fn=collate_fn
)

In [11]:
images, targets = next(iter(data_loader))
images = list(F.to_tensor(image) for image in images)
targets = [{k: v for k, v in t.items()} for t in targets]

In [12]:
images[0].shape

torch.Size([3, 819, 1024])

In [15]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

In [16]:
num_classes = 2  # 1 class (wheat) + background

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [17]:
model(images, targets)

	nonzero()
Consider using one of the following signatures instead:
	nonzero(*, bool as_tuple) (Triggered internally at  ../torch/csrc/utils/python_arg_parser.cpp:766.)
  keep = keep.nonzero().squeeze(1)


{'loss_classifier': tensor(0.6667, grad_fn=<NllLossBackward>),
 'loss_box_reg': tensor(0.0850, grad_fn=<DivBackward0>),
 'loss_objectness': tensor(1.6552, grad_fn=<BinaryCrossEntropyWithLogitsBackward>),
 'loss_rpn_box_reg': tensor(0.4489, grad_fn=<DivBackward0>)}

In [18]:
class Averager:
    def __init__(self):
        self.current_total = 0.0
        self.iterations = 0.0

    def send(self, value):
        self.current_total += value
        self.iterations += 1

    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1.0 * self.current_total / self.iterations

    def reset(self):
        self.current_total = 0.0
        self.iterations = 0.0

## Train

In [21]:
# model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
# lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
lr_scheduler = None

num_epochs = 2

In [None]:
loss_hist = Averager()
itr = 1

for epoch in range(num_epochs):
    loss_hist.reset()
    
    for images, targets in data_loader:
        
        images = list(np.swapaxes(image,0,2) for image in images)
        targets = [{k: v for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        
        losses = sum(loss for loss in loss_dict.values())
        print(losses)
        loss_value = losses.item()

        loss_hist.send(loss_value)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if itr % 3 == 0:
            print(f"Iteration #{itr} loss: {loss_value}")

        itr += 1
    
    # update the learning rate
    if lr_scheduler is not None:
        lr_scheduler.step()

    print(f"Epoch #{epoch} loss: {loss_hist.value}")   

	nonzero()
Consider using one of the following signatures instead:
	nonzero(*, bool as_tuple) (Triggered internally at  ../torch/csrc/utils/python_arg_parser.cpp:766.)
  keep = keep.nonzero().squeeze(1)


## Train 2

In [28]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# load a model pre-trained pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = 2  # 1 class (person) + background
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [136]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
dataset = WiderDataset(wider_img_list, wider_bboxes)
data_loader = torch.utils.data.DataLoader(
 dataset, batch_size=2, shuffle=True, num_workers=4,
 collate_fn=collate_fn)

In [137]:
images,targets = next(iter(data_loader))
images = list(image for image in images)
targets = [{k: v for k, v in t.items()} for t in targets]

In [122]:
output = model(images,targets)   # Returns losses and detections

AttributeError: 'numpy.ndarray' object has no attribute 'dim'

In [121]:
# For Training
images,targets = next(iter(data_loader))
images = list(image for image in images)
targets = [{k: v for k, v in t.items()} for t in targets]
output = model(images,targets)   # Returns losses and detections
# For inference
model.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = model(x)           # Returns predictions

AttributeError: 'numpy.ndarray' object has no attribute 'dim'