In [1]:
!pip install pytorch-lightning

Collecting pytorch-lightning
[?25l  Downloading https://files.pythonhosted.org/packages/c4/99/68da5c6ca999de560036d98c492e507d17996f5eeb7e76ba64acd4bbb142/pytorch_lightning-1.2.8-py3-none-any.whl (841kB)
[K     |████████████████████████████████| 849kB 19.3MB/s 
Collecting future>=0.17.1
[?25l  Downloading https://files.pythonhosted.org/packages/45/0b/38b06fd9b92dc2b68d58b75f900e97884c45bedd2ff83203d933cf5851c9/future-0.18.2.tar.gz (829kB)
[K     |████████████████████████████████| 829kB 53.2MB/s 
Collecting fsspec[http]>=0.8.1
[?25l  Downloading https://files.pythonhosted.org/packages/e9/91/2ef649137816850fa4f4c97c6f2eabb1a79bf0aa2c8ed198e387e373455e/fsspec-2021.4.0-py3-none-any.whl (108kB)
[K     |████████████████████████████████| 112kB 54.0MB/s 
[?25hCollecting PyYAML!=5.4.*,>=5.1
[?25l  Downloading https://files.pythonhosted.org/packages/64/c2/b80047c7ac2478f9501676c988a5411ed5572f35d1beff9cae07d321512c/PyYAML-5.3.1.tar.gz (269kB)
[K     |████████████████████████████████| 27

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')  #mounting
%cd '/content/gdrive/MyDrive/DEEPLOBE/segmentation'


Mounted at /content/gdrive
/content/gdrive/MyDrive/DEEPLOBE/segmentation


In [4]:
cd 'maskrcnn_pistol_data'

/content/gdrive/My Drive/DEEPLOBE/segmentation/maskrcnn_pistol_data


In [64]:
#importing dependent libraries
import os
import torch
import torch.utils.data
import torchvision
import numpy as np
from PIL import Image
from pycocotools.coco import COCO
from torchvision import transforms as T

import matplotlib.pyplot as plt
import cv2
import random
from pathlib import Path

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

import pytorch_lightning as pl  
from pytorch_lightning.callbacks import ModelCheckpoint


import torch.nn as nn
from torch.utils.data import DataLoader

class MyOwnDataset(torch.utils.data.Dataset):
    """
    Custom class inheriting from Pytorch's Dataset utility class
    that allows applying custom transformations on user-datasets
    
    It returns transformed images and masks in an iterator object 
    that can be indexed according to the batch sizes
    in the data loading phase for passing to model
    """
    def __init__(self, root, annotation, transforms=None):
        self.root = root
        self.transforms = transforms
        self.coco = COCO(annotation)
        self.ids = list(sorted(self.coco.imgs.keys()))

    def __getitem__(self, index):
        # Own coco file
        coco = self.coco
        # Image ID
        img_id = self.ids[index]
        # List: get annotation id from coco
        ann_ids = coco.getAnnIds(imgIds=img_id)
        # Dictionary: target coco_annotation file for an image
        coco_annotation = coco.loadAnns(ann_ids)
        # filename for input image
        path = coco.loadImgs(img_id)[0]['file_name']
        # filename of input mask corresponding to the above image file
        mask_file_path = os.path.splitext(path)[0] + '.png'
        # open the input image
        img = Image.open(os.path.join(self.root, 'images',path)).convert('RGB')
        ## mask ##
        mask = Image.open(os.path.join(self.root,'masks',mask_file_path))
        
        # number of objects in the image
        num_objs = len(coco_annotation)

        # Bounding boxes for objects
        # In coco format, bbox = [xmin, ymin, width, height]
        # In pytorch, the input should be [xmin, ymin, xmax, ymax]
        boxes = []
        for i in range(num_objs):
            xmin = coco_annotation[i]['bbox'][0]
            ymin = coco_annotation[i]['bbox'][1]
            xmax = xmin + coco_annotation[i]['bbox'][2]
            ymax = ymin + coco_annotation[i]['bbox'][3]
            boxes.append([xmin, ymin, xmax, ymax])
            
        boxes = torch.as_tensor(boxes, dtype=torch.float32)

        # Tensorise img_id
        img_id = torch.tensor([img_id])
        # Size of bbox (Rectangular)
        areas = []
        iscrowd = []
        labels = []

        for i in range(num_objs):
            areas.append(coco_annotation[i]['area'])
            iscrowd.append(coco_annotation[i]['iscrowd'])
            labels.append(coco_annotation[i]['category_id'])

        areas,iscrowd,labels = map(torch.tensor, [areas,iscrowd,labels])
        
        if self.transforms is not None:
            img = self.transforms(img)
            mask = self.transforms(mask)
        mask = mask.numpy().reshape(mask.shape,order='F')
        mask = torch.as_tensor(mask, dtype=torch.uint8)

        # Annotation is in dictionary format
        my_annotation = {}
        my_annotation["boxes"] = boxes
        my_annotation["labels"] = labels
        my_annotation["image_id"] = img_id
        my_annotation["area"] = areas
        my_annotation["iscrowd"] = iscrowd
        my_annotation["masks"] = mask
        
        return img, my_annotation

    def __len__(self):
        return len(self.ids)



def get_transform(train):
    custom_transforms = []
    custom_transforms.append(T.ToTensor())
    return T.Compose(custom_transforms)


def process_data(loc):
  """
   For loading imagefiles, preprocessing like train-test split based on a specified 
    validation set size.
    
    Passing data directory and transforms for creating a dataset iterator
    
    Creates a dataset instance from the UserDataset class by passing data directory path.
    The data directory should have the following structure:

                    data_dir
             ------Image
             ---------Image1
             ---------Image2
             ---------
             ---------
                   
             ---------ImageN
             ------Mask
             ---------Mask1
             ---------Mask2
             ---------
             ---------
             ---------MaskN
  """
  path = Path(loc)
  data_dir = path
  coco_instances = path/'coco_instances.json'
  # create own Dataset
  my_dataset = MyOwnDataset(root=data_dir,
                            annotation=coco_instances,
                            transforms=get_transform(train=True)) 
  #Category dictinoray for output tagging
  instances = COCO(coco_instances)
  categories = instances.loadCats(instances.getCatIds())
  class_dict = {d['id']:d['name'] for d in categories}  
  return my_dataset, class_dict, instances

def get_class(loc):
  path = Path(loc)
  data_dir = path
  coco_instances = path/'coco_instances.json'
  instances = COCO(coco_instances)
  return instances


class DataModuleInstance(pl.LightningDataModule):
  """
  This class also calls the CustomDataset class above for creating 
  a dataset iterator and creates dataloaders using torch.utils.Dataloaders class
  """
  def __init__(self,my_dataset):
    super(DataModuleInstance,self).__init__()
    self.bs=10
    self.my_dataset=my_dataset
    samples = len(my_dataset)
    test_counts = int(samples*.2)
    train_counts = samples-test_counts
    self.train_set, self.test_set = torch.utils.data.random_split(self.my_dataset, [train_counts, test_counts])
    # loading data using Dataloader: Train, val
    def collate_fn(batch):
      return tuple(zip(*batch))
    self.collate_fn=collate_fn  
  def train_dataloader(self):
    return DataLoader(self.train_set,batch_size=self.bs,shuffle=False,collate_fn=self.collate_fn)
  def test_dataloader(self):
    return DataLoader(self.test_set,batch_size=self.bs,shuffle=False,collate_fn=self.collate_fn)



class InstanceSegment(pl.LightningModule):
  """
   
    calls a device object based on GPU availability
    with a downloaded pretrained model from torchvision library
    changes the final layer based on the target class count
    trains the model
   
    
    
  """
  def __init__(self,path):
    super(InstanceSegment,self).__init__()
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    # add model
    instance=get_class(path)
    num_classes = 1 + len(instance.getCatIds())
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,hidden_layer,num_classes)

    self.model=model 
    self.model.to(device)
    self.params = [p for p in self.model.parameters() if p.requires_grad]

  def forward(self,x):
    output = self.model(x)
    return output


  def configure_optimizers(self): # specifying optimizer and learning rate
    return torch.optim.SGD(self.params, lr=0.005, momentum=0.08)


    # Training , validating phase with lightening method
  def training_step(self, train_batch,batch_idx):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    imgs, annotations  = train_batch
    imgs = list(img.to(device) for img in imgs)
    annotations = [{key: value.to(device) for key, value in annotation.items()} for annotation in annotations]
    loss_dict = self.model(imgs, annotations)
    losses = sum(loss for loss in loss_dict.values())
    self.log('Training Loss', losses, on_step=True, on_epoch=True, sync_dist=True) 
    return losses 
  def validation_step(self, valid_batch, batch_idx):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    imgs, annotations  = valid_batch
    imgs = list(img.to(device) for img in imgs)
    annotations = [{key: value.to(device) for key, value in annotation.items()} for annotation in annotations]
    loss_dict = self.model(imgs, annotations)
    losses = sum(loss for loss in loss_dict.values())
    self.log('Validation Loss', losses, on_step=True, on_epoch=True, sync_dist=True) 
    return losses 


def train_model(epochs, my_dataset, path):
     """
     creating the model
     """
     max_epoc= 10 if epochs is None else epochs
     data_module = DataModuleInstance(my_dataset) 
     model_module = InstanceSegment(path) # change
     gpu=1 if torch.cuda.is_available() else 0 # setting gpu based on availability
     checkpoint_callback = ModelCheckpoint(monitor ='Training Loss',dirpath = path)
     trainer = pl.Trainer(max_epochs=max_epoc,gpus=gpu,default_root_dir = path,callbacks = [checkpoint_callback])
     trainer.fit(model_module, data_module) # fit model


def random_colour_masks(image):
    colours = [[0, 255, 0],[0, 0, 255],[255, 0, 0],[0, 255, 255],[255, 255, 0],[255, 0, 255],[80, 70, 180],[250, 80, 190],[245, 145, 50],[70, 150, 250],[50, 190, 190]]
    r = np.zeros_like(image).astype(np.uint8)
    g = np.zeros_like(image).astype(np.uint8)
    b = np.zeros_like(image).astype(np.uint8)
    r[image == 1], g[image == 1], b[image == 1] = colours[random.randrange(0,10)]
    coloured_mask = np.stack([r, g, b], axis=2)
    return coloured_mask

def get_video_prediction(img_file, model_weight,annotation_file_path,threshold):
    model=InstanceSegment(annotation_file_path)
    model=model.load_from_checkpoint(model_weight,path=annotation_file_path)
    instances=get_class(annotation_file_path)
    categories = instances.loadCats(instances.getCatIds())
    class_dict = {d['id']:d['name'] for d in categories} 
    img = Image.open(img_file)
    transform = T.Compose([T.ToTensor()])
    img = transform(img)
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    img = img.to(device) #cuda()
    model=model.to(device)
    model=model.eval()
    pred = model([img])
    pred_score = list(pred[0]['scores'].detach().to('cpu').numpy())
    pred_t = [pred_score.index(x) for x in pred_score if x>threshold][-1]    
    masks = (pred[0]['masks']>0.5).squeeze().detach().to('cpu').numpy()
    pred_class = [class_dict[i] for i in list(pred[0]['labels'].detach().to('cpu').numpy())]
    pred_boxes = [[(i[0], i[1]), (i[2], i[3])] for i in list(pred[0]['boxes'].detach().to('cpu').numpy())]
    masks = masks[:pred_t+1]
    boxes = pred_boxes[:pred_t+1]
    pred_cls = pred_class[:pred_t+1]
    img1 = cv2.imread(img_file)
    img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2RGB)
    for i in range(len(masks)):
        rgb_mask = random_colour_masks(masks[i])
        img1 = cv2.addWeighted(img1, 1, rgb_mask, 0.5, 0)
        cv2.rectangle(img1, boxes[i][0], boxes[i][1],color=(0, 255, 0), thickness=3)
        cv2.putText(img1,pred_cls[i], boxes[i][0], cv2.FONT_HERSHEY_SIMPLEX, 3, (0,255,0),thickness=3)
    return img1
    
def video_prediction(input_video,output_video,model_weight,annotation_file_path, threshold_value):
  video_capture = cv2.VideoCapture(input_video)
  fourcc = cv2.VideoWriter_fourcc(*"MJPG")
  out = cv2.VideoWriter(output_video,fourcc, 30, (1280,720),True)
  a=0
  while True:
      cap,frame = video_capture.read()
      if cap == False:
        break
      a+=1
      print('frame count: ',a)
      rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
      newframe = get_video_prediction(rgb_frame, model_weight,annotation_file_path,threshold)
      out.write(newframe)
  out.release()
  video_capture.release()

def image_prediction(img_file, model_weight,annotation_file_path,threshold,output_file_name):
    model=InstanceSegment(annotation_file_path)
    model=model.load_from_checkpoint(model_weight,path=annotation_file_path)
    instances=get_class(annotation_file_path)
    categories = instances.loadCats(instances.getCatIds())
    class_dict = {d['id']:d['name'] for d in categories} 
    img = Image.open(img_file)
    transform = T.Compose([T.ToTensor()])
    img = transform(img)
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    img = img.to(device) #cuda()
    model=model.to(device)
    model=model.eval()
    pred = model([img])
    pred_score = list(pred[0]['scores'].detach().to('cpu').numpy())
    pred_t = [pred_score.index(x) for x in pred_score if x>threshold][-1]    
    masks = (pred[0]['masks']>0.5).squeeze().detach().to('cpu').numpy()
    pred_class = [class_dict[i] for i in list(pred[0]['labels'].detach().to('cpu').numpy())]
    pred_boxes = [[(i[0], i[1]), (i[2], i[3])] for i in list(pred[0]['boxes'].detach().to('cpu').numpy())]
    masks = masks[:pred_t+1]
    boxes = pred_boxes[:pred_t+1]
    pred_cls = pred_class[:pred_t+1]
    img1 = cv2.imread(img_file)
    img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2RGB)
    for i in range(len(masks)):
        rgb_mask = random_colour_masks(masks[i])
        img1 = cv2.addWeighted(img1, 1, rgb_mask, 0.5, 0)
        cv2.rectangle(img1, boxes[i][0], boxes[i][1],color=(0, 255, 0), thickness=3)
        cv2.putText(img1,pred_cls[i], boxes[i][0], cv2.FONT_HERSHEY_SIMPLEX, 3, (0,255,0),thickness=3)
    plt.figure(figsize=(20,30))
    plt.imshow(img1)
    plt.xticks([])
    plt.yticks([])
    plt.show()
    filename = output_file_name + '.jpg'
    cv2.imwrite(filename, img1)


class Instance():
    def load_data(self,path):
      self.path=path
      self.my_dataset, self.class_dict, self.instances = process_data(self.path) # process data get the files needed
    def train(self,epochs):
      self.epochs=epochs
      self.model=train_model(self.epochs,self.my_dataset, self.path )
    

In [33]:
ls

[0m[01;34mdeeplobe-ai-master[0m/  deeplobe-ai-master.zip  [01;34mInstance[0m/


In [40]:
instanceseg_model = Instance()
instanceseg_model.load_data('/content/gdrive/MyDrive/Deeplobe GIT/Instance')
# instanceseg_model.train(epochs = 1) #default epochs = 10 if not mentioned
# instanceseg_model.predict("sampleimage",threshold = 0.1) #threshold ranges from 0 to 1, default = 0.1 if not mentioned
# instanceseg_model.predict_on_video("sample_video.mp4","output_video.avi",threshold = 0.7)

loading annotations into memory...
Done (t=0.05s)
creating index...
index created!
loading annotations into memory...
Done (t=0.05s)
creating index...
index created!


In [41]:
instanceseg_model.train(epochs = 1) #default epochs = 10 if not mentioned

loading annotations into memory...
Done (t=0.06s)
creating index...
index created!


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type     | Params
-----------------------------------
0 | model | MaskRCNN | 44.4 M
-----------------------------------
44.2 M    Trainable params
222 K     Non-trainable params
44.4 M    Total params
177.515   Total estimated model params size (MB)


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…



HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…




In [42]:
  model=InstanceSegment('/content/gdrive/MyDrive/Deeplobe GIT/Instance')
  model=model.load_from_checkpoint('/content/gdrive/MyDrive/Deeplobe GIT/Instance/epoch=0-step=79.ckpt',path='/content/gdrive/MyDrive/Deeplobe GIT/Instance')

loading annotations into memory...
Done (t=0.06s)
creating index...
index created!
loading annotations into memory...
Done (t=0.05s)
creating index...
index created!


In [65]:
image_prediction('/content/gdrive/MyDrive/Deeplobe GIT/Instance/images/00000000.png', '/content/gdrive/MyDrive/Deeplobe GIT/Instance/epoch=0-step=79.ckpt',
               '/content/gdrive/MyDrive/Deeplobe GIT/Instance',0.5,'test5')

Output hidden; open in https://colab.research.google.com to view.