In [1]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [2]:
!pip install pytorch-lightning
!pip install segmentation-models-pytorch

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pytorch-lightning
  Downloading pytorch_lightning-1.7.2-py3-none-any.whl (705 kB)
[K     |████████████████████████████████| 705 kB 8.0 MB/s 
Collecting pyDeprecate>=0.3.1
  Downloading pyDeprecate-0.3.2-py3-none-any.whl (10 kB)
Collecting torchmetrics>=0.7.0
  Downloading torchmetrics-0.9.3-py3-none-any.whl (419 kB)
[K     |████████████████████████████████| 419 kB 62.6 MB/s 
[?25hCollecting tensorboard>=2.9.1
  Downloading tensorboard-2.10.0-py3-none-any.whl (5.9 MB)
[K     |████████████████████████████████| 5.9 MB 61.6 MB/s 
Collecting fsspec[http]!=2021.06.0,>=2021.05.0
  Downloading fsspec-2022.7.1-py3-none-any.whl (141 kB)
[K     |████████████████████████████████| 141 kB 70.2 MB/s 
[?25hCollecting PyYAML>=5.4
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |█████████████████████

In [3]:
import pytorch_lightning as pl
import pandas as pd
import cv2
import os 
import torchvision
from torch import nn
from torch.utils.data import Dataset ,DataLoader, random_split
import numpy as np
import torch
from sklearn.model_selection import train_test_split 
from torchvision import transforms, datasets, models
import matplotlib.pyplot as plt
import torchmetrics
from torchmetrics.functional import accuracy
from pytorch_lightning.callbacks import LearningRateMonitor
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from pytorch_lightning.loggers import CSVLogger
from torchvision.utils import make_grid
import math
import torch.nn.functional as F
from torchvision.ops import box_convert
from PIL import Image
from pycocotools.coco import COCO
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

In [4]:
cfg = {
  "datasets": "/content/drive/MyDrive/Datasets/berat_kurar",

  # base path of the dataset
  "dataset_path": "/content/drive/MyDrive/Datasets/mask-image",
  "image_path": "/content/drive/MyDrive/Datasets/mask-image/original",
  "mask_path": "/content/drive/MyDrive/Datasets/mask-image/labels",

  # define the path of the images and masks dataset that will be used in training
  "train_image_dataset": "/content/drive/MyDrive/Datasets/berat_kurar/ptrain0",
  "train_mask_dataset": "/content/drive/MyDrive/Datasets/berat_kurar/pltrain0",
  "val_image_dataset": "/content/drive/MyDrive/Datasets/berat_kurar/pvalidation0",
  "val_mask_dataset":  "/content/drive/MyDrive/Datasets/berat_kurar/plvalidation0",
  "test_image_dataset": "/content/drive/MyDrive/Datasets/berat_kurar/ptest0",
  "test_mask_dataset": "/content/drive/MyDrive/Datasets/berat_kurar/pltest0",

  # determine the device to be used for training and evaluation
  "device": "cuda" if torch.cuda.is_available() else "cpu",

  # determine if we'll be pinning memory during data loading
  "pin_memory": True if torch.cuda.is_available() else False,

  # define size of the image
  "input_image_height": 224,
	"input_image_width": 224,

  # define the number of classes, input channels
  "num_class": 1,
  "num_channel": 3,

  # define the backbone
  "backbone": "resnet34",

  # define the encoder weights
  "encoder_weight": "imagenet",

  # Initialize the learning rate, number of epochs to train for and the batch size
  "lr": 0.001,
  "num_epochs": 40,
  "batch_size": 64,

  # define threshold to filter weak predictions
  "threshold": 0.5,

  # define the path that the model checkpoints, prediction will be saved
  "base_output": "/content/drive/MyDrive/output",
  "device": "cuda" if torch.cuda.is_available() else "cpu",
}

In [5]:
class SegmentationDataset(Dataset):
  def __init__(self, img_paths, mask_paths, transforms):
    self.img_paths = img_paths
    self.mask_paths = mask_paths
    self.transforms = transforms

  def __len__(self):
    return len(self.img_paths)

  def __getitem__(self, idx):
    # load the image from disk, swap its channels from BGR to RGB,
    # and read the associated mask from disk in grayscale mode
    image = cv2.imread(self.img_paths[idx])
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    mask = cv2.imread(self.mask_paths[idx], 0)

    # check to see if we are applying any transformations
    if self.transforms is not None:
      # apply the transformations to both image and its mask
      image = self.transforms(image)
      mask = self.transforms(mask)

    return image, mask

In [6]:
from imutils import paths

train_image_set = sorted(list(paths.list_images(cfg["train_image_dataset"])))
train_mask_set = sorted(list(paths.list_images(cfg["train_mask_dataset"])))
val_image_set = sorted(list(paths.list_images(cfg["val_image_dataset"])))
val_mask_set = sorted(list(paths.list_images(cfg["val_mask_dataset"])))
test_image_set = sorted(list(paths.list_images(cfg["test_image_dataset"])))
test_mask_set = sorted(list(paths.list_images(cfg["test_mask_dataset"])))

In [7]:
transform = transforms.Compose([
  transforms.ToPILImage(),
 	transforms.Resize((cfg["input_image_height"],cfg["input_image_width"])),
	transforms.ToTensor()])

In [8]:
class LitDataModule(pl.LightningDataModule):
    def __init__(self, transforms=None):
      super().__init__()
      self.save_hyperparameters()
      self.transform = transforms

    def setup(self, stage=None):
      self.train_dataset = SegmentationDataset(img_paths=train_image_set, mask_paths=train_mask_set, transforms=transform)
      self.val_dataset = SegmentationDataset(img_paths=val_image_set, mask_paths=val_mask_set, transforms=transform)
      self.test_dataset = SegmentationDataset(img_paths=test_image_set, mask_paths=test_mask_set, transforms=transform)

    def train_dataloader(self):
      return DataLoader(self.train_dataset,
                      batch_size=cfg["batch_size"],
                      shuffle=True)

    def val_dataloader(self):
      return DataLoader(self.val_dataset,
                        batch_size=cfg["batch_size"],
                        shuffle=False)

    def test_dataloader(self):
      return DataLoader(self.test_dataset,
                        batch_size=cfg["batch_size"],
                        shuffle=False)

In [9]:
import segmentation_models_pytorch as smp

class LitModel(pl.LightningModule):
  def __init__(self):
    super().__init__()
    self.model = smp.Unet(
        encoder_name = cfg["backbone"],
        encoder_weights = cfg["encoder_weight"],
        in_channels = cfg["num_channel"],
        classes = cfg["num_class"],
        activation = None
      )
    
  def forward(self, x):
    self.model.eval()
    x = self.model(x)
    return x

  def training_step(self, batch, batch_idx):
    x, y = batch
    logits = self.forward(x)
    loss = F.binary_cross_entropy_with_logits(logits, y)
    self.log("train_loss", loss)
    return loss

  def validation_step(self, batch, batch_idx):
    x, y = batch
    logits = self.forward(x)
    loss = F.binary_cross_entropy_with_logits(logits, y)
    self.log("val_loss", loss)
    return loss

  def test_step(self, batch, batch_idx):
    x, y = batch
    logits = self.forward(x)
    loss = F.binary_cross_entropy_with_logits(logits, y)
    self.log("test_loss", loss)
    return loss
     
  def configure_optimizers(self):
    return torch.optim.Adam(self.parameters(), lr=cfg["lr"])

In [10]:
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

dm = LitDataModule()
dm.setup()
model = LitModel()
trainer = pl.Trainer(max_epochs=cfg["num_epochs"],
                  accelerator="auto",
                  devices=1 if torch.cuda.is_available() else None,
                  callbacks=[LearningRateMonitor(logging_interval="step"),
                                TQDMProgressBar(refresh_rate=10),
                             EarlyStopping(monitor="val_loss", mode="min")],
)

Downloading: "https://download.pytorch.org/models/resnet34-333f7ec4.pth" to /root/.cache/torch/hub/checkpoints/resnet34-333f7ec4.pth


  0%|          | 0.00/83.3M [00:00<?, ?B/s]

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [11]:
model = torch.load(cfg["base_output"] + "/pyimagesearch_example_lightning_testLoss0_25.pth").to(cfg["device"])

In [12]:
 test_imgs = os.listdir("/content/drive/MyDrive/Datasets/mask-image/original")

In [15]:
pages = []
for page in test_imgs[25:]:
  page = os.path.join("/content/drive/MyDrive/Datasets/mask-image/original/", page)
  pages.append(page)

pages

['/content/drive/MyDrive/Datasets/mask-image/original/25.png',
 '/content/drive/MyDrive/Datasets/mask-image/original/27.png',
 '/content/drive/MyDrive/Datasets/mask-image/original/28.png',
 '/content/drive/MyDrive/Datasets/mask-image/original/29.png',
 '/content/drive/MyDrive/Datasets/mask-image/original/17.png']

In [16]:
outersize=320
trimsize=110
innersize=outersize-2*trimsize



In [17]:
def getImageArr( img , width , height):
  img = cv2.resize(img, ( width , height ))
  img = img.astype(np.float32)
  img = img/255.0

  img = np.rollaxis(img, 2, 0)
  return img

In [None]:
def make_predictions(img):
  # set the model evaluation mode
  model.eval()

  # turn off gradient tracking
  with torch.no_grad():
    # load the image from disk, swap its color channels, cast it to float data
    # type and scale its pixel values
    image = img
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = image.astype("float32") / 255.0

    # resize the image and make a copy of it for visualization
    image = cv2.resize(image, (224, 224))
    orig = image.copy()

    # find the filename and generate the path to ground truth mask
    filename = image_path.split(os.path.sep)[-1]
    #ground_truth_path = os.path.join(cfg["test_mask_dataset"], filename)
    ground_truth_path = os.path.join("/content/drive/MyDrive/Datasets/mask-image/labels", filename)
    # load the ground truth segmentation mask in grayscale mode and resize it
    gt_mask = cv2.imread(ground_truth_path, 0)
    gt_mask = cv2.resize(gt_mask, (cfg["input_image_height"], cfg["input_image_width"]))

    # make the channel axis to be the leading one, add a batch dimension, create a pytorch
    # tensor, and flash it to the current device
    image = np.transpose(image, (2, 0, 1))
    image = np.expand_dims(image, 0)
    image = torch.from_numpy(image).to(cfg['device'])

    # make the prediction, pass the results through the sigmoid function
    # and convert the result to a numpy array
    pred_mask = model(image).squeeze()
    pred_mask = torch.sigmoid(pred_mask)
    pred_mask = pred_mask.cpu().numpy()

    # filter out the weak predictions and convert them to integers
    pred_mask = (pred_mask > cfg['threshold'])*255
    pred_mask = pred_mask.astype(np.uint8)

In [None]:
def predict(img):
    X = getImageArr(img ,   , args.input_height  )
    pr = m.predict( np.array([X]) )[0]
    pr = pr.reshape(( output_height ,  output_width , n_classes ) ).argmax( axis=2 )
    seg_img = np.zeros( ( output_height , output_width , 3  ) )
    for c in range(n_classes):
        seg_img[:,:,0] += ( (pr[:,: ] == c )*( colors[c][0] )).astype('uint8')
        seg_img[:,:,1] += ((pr[:,: ] == c )*( colors[c][1] )).astype('uint8')
        seg_img[:,:,2] += ((pr[:,: ] == c )*( colors[c][2] )).astype('uint8')
    seg_img = cv2.resize(seg_img  , (input_width , input_height ))
    return seg_img

In [None]:
for path in pages:
    page=cv2.imread(path,1)
    rows,cols,ch=page.shape
    x=rows//innersize
    y=cols//innersize
    
    prows=(x+1)*innersize+2*trimsize
    pcols=(y+1)*innersize+2*trimsize
    ppage=np.zeros([prows,pcols,3])
    ppage[trimsize:rows+trimsize,trimsize:cols+trimsize,:]=page[:,:,:]
    pred=np.zeros([rows,cols,3])
    for i in range(0,prows-outersize,innersize):
        for j in range(0,pcols-outersize,innersize):
            patch=ppage[i:i+outersize,j:j+outersize,:]
            ppatch=predict(patch)
            pred[i:i+innersize,j:j+innersize,:]=ppatch[trimsize:trimsize+innersize,trimsize:trimsize+innersize,:]
    cv2.imwrite('predicts/'+path.split('/')[2],pred)