In [1]:
import os
import numpy as np
import torch
from PIL import Image

## 1. Make Dataset loader

In [2]:
# make a DataLoader for this dataset
class PennFudanDataset(torch.utils.data.Dataset):
    # each loader class needs 3 fnx: __init__, __len, __getitem__
    def __init(self, root, transforms):
        self.root = root;
        self.transforms = transforms;
        
        # load png files and corresponding masks, sort to make sure they aligned
        self.images = list(sorted(os.listdir(os.path.join(root, 'PNGImages'))));
        self.masks = list(sorted(os.listdir(os.path.join(root,'PedMasks'))));
        
    def __len__(self):
        return len(self.images)
    
    def __getitem(self, idx):
        # get image and mask from the idx
        
        image_path = os.path.join(self.root,'PNGImages',self.images[idx])
        image = Image.open(image_path).convert('RGB');
        
        mask_path = os.path.join(self.root,'PedMasks',self.masks[idx])
        # don't convert mask to RGB since each color correponds to a different instance
        mask = Image.open(mask_path)
        mask = np.array(mask)
        
        #instances are encoded as different colors and the first is background, so remove it
        obj_ids = np.unique(mask); obj_ids = obj_ids[1:];
        
        #split color coded masks into a set of binary masks
        masks = mask==obj_ids[:,None,None]
        
        #get bounding box coordinates for each mask
        #x-axis is dim 1, y-axis is dim 0
        num_objs = len(obj_ids);
        boxes=[];
        for i in range(num_objs):
            pos = np.where(masks[i]);
            xmin, xmax = np.min(pos[1]), np.max(pos[1]);
            ymin, ymax = np.min(pos[0]), np.max(pos[0]);
            boxes.append([xmin, ymin, xmax, ymax]);
            
        #of course then convert everything to a tensor
        boxes = torch.as_tensor(boxes,dtype=torch.float32)
        
        #there is only one class
        labels = torch.ones((num_objs,),dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)
        
        image_id = torch.as_tensor([idx])
        
        #area of masks = change in x * change in y
        area = (boxes[:,3]-boxes[:,1])*(boxes[:,2]-boxes[:,4])
        
        #[optional] - but suppose there is no crowd
        iscrowd = torch.zeros((num_objs,),dtype=torch.int64);
        
        #make target dataframe
        target = {}
        target['boxes']=boxes; target['labels']=labels; target['masks']=masks
        target['image_id']=image_id; target['area']=area; target['iscrowd']=iscrowd;
        
        if self.tranforms is not None:
            self.transforms(image, target)
            
        return image, target
        

### There are two methods of training, in addition to training a new model
1. Fine-tune an already trained model (i.e., only adjust the last parameters)
2. Change the backbone of an existing model

#### Starting with fine-tuning an already trained model

In [7]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

In [17]:
import transforms as T
def get_transform(train):
    transforms = [];
    transforms.append(T.ToTensor());
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return (T.Compose(transforms))

ModuleNotFoundError: No module named 'sgmllib'

In [8]:
def get_model_instance_segmentation(num_classes):
    # load an instance segmentation model pre-trained pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)

    return model

In [None]:
# Use built-in engines and utils for easy evaluation
from engine import train_one_epoch, evaluate
import utils

def main():
    device = torch.device('cpu');
    
    num_classes = 2; #only 2 classes
