In [1]:
%%shell
wget -q https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip .
unzip -q PennFudanPed.zip



In [2]:
import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image


class Dataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        self.imgs = list(sorted(os.listdir(os.path.join(root, "PNGImages"))))
        self.masks = list(sorted(os.listdir(os.path.join(root, "PedMasks"))))

    def __getitem__(self, idx):
        img_path = os.path.join(self.root, "PNGImages", self.imgs[idx])
        mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])
        img = Image.open(img_path).convert("RGB")
        mask = Image.open(mask_path)

        mask = np.array(mask)
        obj_ids = np.unique(mask)
        obj_ids = obj_ids[1:]


        masks = mask == obj_ids[:, None, None]
        num_objs = len(obj_ids)
        boxes = []
        for i in range(num_objs):
            pos = np.where(masks[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.ones((num_objs,), dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

In [3]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

      
def get_instance_segmentation_model(num_classes):
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)

    in_features = model.roi_heads.box_predictor.cls_score.in_features

    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)

    return model

In [4]:
%%shell
git clone https://github.com/pytorch/vision.git
cd vision
git checkout v0.3.0

cp references/detection/utils.py ../
cp references/detection/transforms.py ../
cp references/detection/coco_eval.py ../
cp references/detection/engine.py ../
cp references/detection/coco_utils.py ../

Cloning into 'vision'...
remote: Enumerating objects: 129440, done.[K
remote: Counting objects: 100% (538/538), done.[K
remote: Compressing objects: 100% (510/510), done.[K
remote: Total 129440 (delta 495), reused 48 (delta 26), pack-reused 128902[K
Receiving objects: 100% (129440/129440), 252.15 MiB | 11.99 MiB/s, done.
Resolving deltas: 100% (113402/113402), done.
Note: checking out 'v0.3.0'.

You are in 'detached HEAD' state. You can look around, make experimental
changes and commit them, and you can discard any commits you make in this
state without impacting any branches by performing another checkout.

If you want to create a new branch to retain commits you create, you may
do so (now or later) by using -b with the checkout command again. Example:

  git checkout -b <new-branch-name>

HEAD is now at be376084d version check against PyTorch's CUDA version




In [32]:
from engine import train_one_epoch, evaluate
import utils
import transforms as T


def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))

    return T.Compose(transforms)

In [33]:
dataset = Dataset('PennFudanPed', get_transform(train=True))
dataset_test = Dataset('PennFudanPed', get_transform(train=False))

torch.manual_seed(42)
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-50])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])

data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=2, shuffle=True, num_workers=4,
    collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)

  cpuset_checked))


In [4]:
DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

num_classes = 2

model = get_instance_segmentation_model(num_classes)
model.to(DEVICE)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)

lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

In [None]:
num_epochs = 10

for epoch in range(num_epochs):
    train_one_epoch(model, optimizer, data_loader, DEVICE, epoch, print_freq=10)
    lr_scheduler.step()
    evaluate(model, data_loader, device=DEVICE)
    evaluate(model, data_loader_test, device=DEVICE)

In [9]:
import matplotlib.pyplot as plt
import torchvision.transforms as T
transform = T.ToPILImage()

def predict_and_save(img):
  model.eval()

  with torch.no_grad():
    prediction = model([img.to(DEVICE)])

  masks = np.array(prediction[0]['masks'].cpu())

  boxes = []

  for i in range(len(masks)):
    pos = np.where(masks[i][0])
    xmin = np.min(pos[1])
    xmax = np.max(pos[1])
    ymin = np.min(pos[0])
    ymax = np.max(pos[0])
    boxes.append([xmin, ymin, xmax, ymax])

  c = (img.shape[1]/2, img.shape[2]/2)
  idx, boxes = min(enumerate(boxes), 
            key=lambda x: ((x[1][2]+x[1][0])/2-c[0])**2 + 
                                ((x[1][3]+x[1][1])/2-c[1])**2)
          
  
  #r = prediction[0]['masks'][idx, 0].repeat(3,1,1).permute(1,2,0).cpu()*torch.permute(img, (1, 2, 0))
  r = torch.permute(img, (1, 2, 0))
  r = transform(img)
  mask = transform(prediction[0]['masks'][idx, 0].cpu())
  r.putalpha(mask)

  r.save('res.png')


img = torch.tensor(np.array(Image.open("st.jpeg")))/255.

predict_and_save(img.permute(2,0,1))

In [11]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [12]:
torch.save(model.state_dict(), '/content/drive/MyDrive/model.pt')

In [6]:
model.load_state_dict(torch.load('../model.pt',map_location=torch.device('cpu')))

<All keys matched successfully>