In [None]:
%pylab inline
%load_ext autoreload
%autoreload 2
from fastai.vision import *
from pathlib import Path
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

In [None]:
path_img = Path("../data/aligned_rus/shokugeki_no_soma/31")
target_img = Path("../data/aligned_eng/shokugeki_no_soma/31")

In [None]:
mnist = untar_data(URLs.MNIST_TINY)
tfms = get_transforms(do_flip=False)

In [None]:
get_y_fn = lambda x: target_img/f'{x.stem}.jpg'

In [None]:
data = (ImageImageList.from_folder(path_img)
        #Where to find the data? -> in path_img and its subfolders
        .split_by_rand_pct()
        #How to split in train/valid? -> randomly with the default 20% in valid
        .label_from_func(get_y_fn)
        #How to label? -> use the label function on the file name of the data
        .transform(get_transforms(), tfm_y=True, size=1024)
        #Data augmentation? -> use tfms with a size of 128, also transform the label images
        .databunch(bs=8))

In [None]:
data.show_batch(rows=1, figsize=(10,10))

In [None]:
data.train_ds.y[1]

In [None]:
idx = 5
x = data.train_ds.x[idx].data.clone()
y = data.train_ds.y[idx].data.clone()
x_mean = x.mean(dim=(1,2), keepdim=True)
y_mean = y.mean(dim=(1,2), keepdim=True)
x_std = x.std(dim=(1,2), keepdim=True)
y_std = y.std(dim=(1,2), keepdim=True)

x -= x_mean
y -= y_mean
x /= x_std
y /= y_std
_,axs = plt.subplots(1,3, figsize=(20,20))
Image((x*y_std + y_mean).clamp(0, 1)).show(ax=axs[0])
data.train_ds.y[idx].show(ax=axs[1])
diff = (x - y).abs()
diff *= (diff > 2.2)
diff /= diff.max()
Image(diff).show(ax=axs[2])
x.shape

In [None]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 2

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)


In [None]:
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

# load a pre-trained model for classification and return
# only the features
backbone = torchvision.models.mobilenet_v2(pretrained=True).features
# FasterRCNN needs to know the number of
# output channels in a backbone. For mobilenet_v2, it's 1280
# so we need to add it here
backbone.out_channels = 1280

# let's make the RPN generate 5 x 3 anchors per spatial
# location, with 5 different sizes and 3 different aspect
# ratios. We have a Tuple[Tuple[int]] because each feature
# map could potentially have different sizes and
# aspect ratios
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                   aspect_ratios=((0.5, 1.0, 2.0),))

# let's define what are the feature maps that we will
# use to perform the region of interest cropping, as well as
# the size of the crop after rescaling.
# if your backbone returns a Tensor, featmap_names is expected to
# be [0]. More generally, the backbone should return an
# OrderedDict[Tensor], and in featmap_names you can choose which
# feature maps to use.
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                output_size=7,
                                                sampling_ratio=2)

# put the pieces together inside a FasterRCNN model
model = FasterRCNN(backbone,
                   num_classes=2,
                   rpn_anchor_generator=anchor_generator,
                   box_roi_pool=roi_pooler)

In [None]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor


def get_model_instance_segmentation(num_classes):
    # load an instance segmentation model pre-trained pre-trained on COCO
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)

    return model


In [None]:
import training.transforms as T

def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)


In [None]:
from dataset.dataset import ImageTextDataset


In [None]:
from training.engine import train_one_epoch, evaluate
import utils


# train on the GPU or on the CPU, if a GPU is not available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# our dataset has two classes only - background and person
num_classes = 2
# use our dataset and defined transformations
dataset = ImageTextDataset('../data/dataset', get_transform(train=True))
dataset_test = ImageTextDataset('../data/dataset', get_transform(train=False))

# split the dataset in train and test set
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-50])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])

# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=2, shuffle=True, num_workers=4,
    collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)



In [None]:
# get the model using our helper function
model = get_model_instance_segmentation(num_classes)

# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

# let's train it for 10 epochs
num_epochs = 10

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)

print("That's it!")

In [None]:
from vis import draw_annotation, show_image
idx = 0
image, targets = dataset[idx]
#image = list(img.cuda() for img in image)
show_image(targets['masks'][0][None, :, :], figsize=(10,10))
#draw_annotation(image, targets, figsize=(10,10))
print(targets['boxes'])


In [None]:
outputs = model(image)

In [None]:
print(outputs[0].keys())

In [None]:
Image(outputs[1]['masks'][0].detach().cpu()).show(figsize=(10,10))

In [None]:
doc(Image)