In [1]:
%matplotlib inline

In [2]:
from torch.utils.data import Dataset
from os.path import join
from os import listdir 
from PIL import Image
import numpy as np 
import torch        
import numpy as np 
import torch 
from sumnist import SuMNIST


In [3]:
import matplotlib.pyplot as plt 
from utils.visualize import draw_boxes
  

In [4]:
from torchvision.transforms.functional import to_pil_image

data = SuMNIST(root="sumnist", train=True)

img, target = data[0]

draw_boxes(
    np.array(to_pil_image(img)), 
    scores=torch.ones(target["labels"].shape),
    boxes=target["boxes"].detach().cpu(),
    labels=target["labels"].detach().cpu(),
    captions = [str(x.item()) for x in target["labels"].detach().cpu()]
    )

<Axes: >

In [5]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

def my_forward(self, x: torch.Tensor) -> torch.Tensor:
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.maxpool(x)

    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)
    return x 
    
def get_model_object_detection(num_classes):
    # load a pre-trained model for classification and return
    # only the features
    # backbone = torchvision.models.mobilenet_v2(pretrained=True).features
    
    backbone = torchvision.models.resnet18(pretrained=True)

    # monkeypatching the forward method to return features 
    backbone.forward = my_forward.__get__(backbone, type(backbone))
    
    # output channels in a backbone. For mobilenet_v2, it's 1280
    # backbone.out_channels = 1280
    backbone.out_channels = 512

    # let's make the RPN generate 5 x 3 anchors per spatial
    # location, with 5 different sizes and 3 different aspect
    # ratios. We have a Tuple[Tuple[int]] because each feature
    # map could potentially have different sizes and
    # aspect ratios
    anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                       aspect_ratios=((0.5, 1.0, 2.0),))

    # let's define what are the feature maps that we will
    # use to perform the region of interest cropping, as well as
    # the size of the crop after rescaling.
    # if your backbone returns a Tensor, featmap_names is expected to
    # be [0]. More generally, the backbone should return an
    # OrderedDict[Tensor], and in featmap_names you can choose which
    # feature maps to use.
    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                    output_size=7,
                                                    sampling_ratio=2)

    # put the pieces together inside a FasterRCNN model
    model = FasterRCNN(backbone,
                       num_classes=num_classes,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)
    return model 

In [8]:
import utils.transforms as T

def get_transform(train):
    transforms = []
    # transforms.append(T.PILToTensor())
    transforms.append(T.ConvertImageDtype(torch.float))
    
    if train:
        transforms.append(T.ScaleJitter(target_size=(56,56)))
        # transforms.append(T.RandomHorizontalFlip(0.5))
    
    return T.Compose(transforms)

In [9]:
from utils.engine import train_one_epoch, evaluate
from utils.utils import collate_fn
import traceback

# use our dataset and defined transformations
dataset = SuMNIST(root="sumnist", train=True, transforms=get_transform(train=True))
dataset_test = SuMNIST(root="sumnist", train=False, transforms=get_transform(train=False))
    
model_path = "weights/model.pt"

def main():
    # train on the GPU or on the CPU, if a GPU is not available
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    # our dataset has two classes only - background and person
    num_classes = 10

    # define training and validation data loaders
    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=16, shuffle=True, num_workers=4,
        collate_fn=collate_fn)

    data_loader_test = torch.utils.data.DataLoader(
        dataset_test, batch_size=8, shuffle=False, num_workers=4,
        collate_fn=collate_fn)

    # get the model using our helper function
    # model = get_model_instance_segmentation(num_classes)
    model = get_model_object_detection(num_classes)
    
    # move model to the right device
    model.to(device)
    
    # construct an optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
    # and a learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

    # let's train it for some epochs
    num_epochs = 1

    for epoch in range(num_epochs):
        # train for one epoch, printing every 10 iterations
        train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
        # update the learning rate
        lr_scheduler.step()
        # evaluate on the test dataset
        try:
            evaluate(model, data_loader_test, device=device)
            torch.save(model, model_path)
        except Exception as e:
            print(e)
            # traceback.print_exception(e)

    print("That's it!")
    return model 


In [10]:
model = main()



Epoch: [0]  [   0/3750]  eta: 1:49:34  lr: 0.000010  loss: 3.6909 (3.6909)  loss_classifier: 2.4086 (2.4086)  loss_box_reg: 0.3646 (0.3646)  loss_objectness: 0.6649 (0.6649)  loss_rpn_box_reg: 0.2529 (0.2529)  time: 1.7533  data: 0.3279  max mem: 6236
Epoch: [0]  [  10/3750]  eta: 0:34:38  lr: 0.000060  loss: 3.6030 (3.5482)  loss_classifier: 2.3428 (2.2950)  loss_box_reg: 0.3619 (0.3631)  loss_objectness: 0.6649 (0.6638)  loss_rpn_box_reg: 0.2358 (0.2263)  time: 0.5556  data: 0.0460  max mem: 6539
Epoch: [0]  [  20/3750]  eta: 0:30:56  lr: 0.000110  loss: 3.1673 (3.1091)  loss_classifier: 1.9504 (1.8944)  loss_box_reg: 0.3788 (0.3929)  loss_objectness: 0.6551 (0.6514)  loss_rpn_box_reg: 0.1584 (0.1703)  time: 0.4350  data: 0.0165  max mem: 6540
Epoch: [0]  [  30/3750]  eta: 0:29:38  lr: 0.000160  loss: 1.9937 (2.7046)  loss_classifier: 0.8481 (1.5099)  loss_box_reg: 0.4404 (0.4150)  loss_objectness: 0.6108 (0.6309)  loss_rpn_box_reg: 0.1031 (0.1488)  time: 0.4354  data: 0.0161  max me

In [None]:
model.cuda()

with torch.no_grad():
    model.eval()

    # For Training
    for n, (images, targets) in enumerate(dataset_test):
        print(images.shape)
        if n == 0:
            predictions = model(images.unsqueeze(0).cuda())
            break 

    images = list(image for image in images.unsqueeze(0))

In [None]:
from torchvision.transforms.functional import to_pil_image

print(predictions[0]["scores"].detach().cpu())

draw_boxes(
    np.array(to_pil_image(images[0])), 
    scores=predictions[0]["scores"].detach().cpu(), 
    boxes=predictions[0]["boxes"].detach().cpu(),
    labels=predictions[0]["labels"].detach().cpu(),
    )

# Evaluate Model 

In [None]:
model.eval()

In [None]:
dataset_test = SuMNIST(root="sumnist", train=False, transforms=get_transform(train=False))
data_loader = torch.utils.data.DataLoader(dataset_test, batch_size=2, shuffle=False, num_workers=4, collate_fn=collate_fn)


gts = []
labels = []
scores = []

with torch.no_grad():
    for images, targets in data_loader:
        images = torch.stack(images)
        predictions = model(images.cuda())

        for p, t in zip(predictions, targets):
            labels.append(p["labels"])
            scores.append(p["scores"])
            gts.append(t["labels"])

In [None]:
torch.stack(gts).shape

In [None]:
y = torch.where(torch.stack(gts).sum(dim=1) == 20, 0, -1)

# Hybrid Sum 

In [None]:
sums = []

for s, l in zip(scores, labels):
    sums.append(l[s>0.8].sum())

In [None]:
y_hat = torch.where(torch.stack(sums).cpu() == 20, 0, 1)

In [None]:
y_hat

In [None]:
from pytorch_ood.utils import OODMetrics

m = OODMetrics().update(y_hat.float(), y).compute()


In [None]:
{k: f"{v * 100:.2f}" for k, v in m.items()}

# Hybrid Memory 

In [None]:
from detectors import HybridMemory

dataset = SuMNIST(root="sumnist", train=True, transforms=get_transform(train=False))



# combinations = set()
# for x, y in dataset: 
#     l = [i.item() for i in y["labels"]]
#     l.sort()
#     combinations.add(tuple(l))
    
#     outlier_scores = []

# for s, l in zip(scores, labels):
#     v = [i.item() for i in l[s>0.8]]
#     v.sort()
    
#     if tuple(v) in combinations:
#         outlier_scores.append(0)
#     else:
#         outlier_scores.append(1)
        
# combinations    

In [None]:
from pytorch_ood.utils import OODMetrics
y = torch.where(torch.stack(gts).sum(dim=1) == 20, 0, -1)
m = OODMetrics().update(torch.tensor(outlier_scores).float(), y).compute()


In [None]:
{k: f"{v * 100:.2f}" for k, v in m.items()}

In [None]:
import matplotlib.pyplot as plt 
from PIL import Image 

imgs = [f"output/0000{i}.jpg" for i in range(5)]
imgs = [Image.open(p) for p in imgs]




In [None]:
dataset_test = SuMNIST(root="sumnist", train=False, transforms=get_transform(train=False))
data_loader = torch.utils.data.DataLoader(dataset_test, batch_size=5, shuffle=False, num_workers=4, collate_fn=collate_fn)

with torch.no_grad():
    # For Training
    images, targets = next(iter(data_loader))
    images = torch.stack(images)
    predictions = model(images.cuda())
    
    fig, axs = plt.subplots(1,5, figsize=(10,25))

    for i, p, ax in zip(images, predictions, axs.flatten()):
        draw_boxes(
            i.permute(1,2,0), # np.array(to_pil_image(images[0])), 
            scores=p["scores"].detach().cpu(), 
            boxes=p["boxes"].detach().cpu(),
            # masks=predictions[0]["masks"].detach().cpu(),
            labels=p["labels"].detach().cpu(),
            captions=[str(i.item()) for i in p["labels"]],
            ax=ax
            )
        
        # ax.imshow(imgs[i])
        ax.axis('off')

    plt.tight_layout()
    # plt.axis('off')
    # plt.show()
    plt.savefig("img/predictions.png", bbox_inches="tight", dpi=300, transparent=True)
