In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from glob import glob
from datasets import Dataset, load_dataset, load_metric, DatasetDict
import pandas as pd
import cv2
import albumentations as A
from albumentations.pytorch import ToTensorV2
import matplotlib.pyplot as plt
from torchvision.utils import draw_segmentation_masks
from torchvision.transforms.functional import to_pil_image
from torchvision.transforms.v2 import ToTensor
import numpy as np
import os
from torchvision.transforms import v2 as v2
from transformers import SegformerConfig, SegformerImageProcessor, SegformerForSemanticSegmentation
from PIL import Image




#### BREAK POINT ####

In [329]:
# model loading and preprocessing
image_processor = SegformerImageProcessor.from_pretrained("nvidia/mit-b0")
image_processor.do_reduce_labels = True

id2label = {0 : "Background", 1 : "Plaque"}
label2id = {"Background" : 0, "Plaque" : 1}

model = SegformerForSemanticSegmentation.from_pretrained("nvidia/mit-b0",
                                                         num_labels=2,
                                                         id2label=id2label,
                                                         label2id=label2id)


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b0 and are newly initialized: ['decode_head.classifier.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.weight', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.batch_norm.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.running_mean', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [350]:
# img = Image.open("../data/processed/train/images/1.png")
# msk = Image.open("../data/processed/train/masks/1.png")

# img2 = Image.open("../data/processed/train/images/2.png")
# msk2 = Image.open("../data/processed/train/masks/2.png")

img = cv2.imread("../data/processed/train/images/1.png")
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
msk = cv2.imread("../data/processed/train/masks/1.png")
msk = cv2.cvtColor(msk, cv2.COLOR_BGR2GRAY)

img2 = cv2.imread("../data/processed/train/images/2.png")
img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2RGB)
msk2 = cv2.imread("../data/processed/train/masks/2.png")
msk2 = cv2.cvtColor(msk2, cv2.COLOR_BGR2GRAY)

msk = msk // 255
msk2 = msk2 // 255

img_com = [img, img2]
msk_com = [msk, msk2]

# img_com = image_processor(img_com, msk_com, return_tensors = "pt")

img_com = np.array(img_com)
img_com = img_com.transpose(0, 3, 1, 2)
img_com = torch.tensor(img_com, dtype = torch.float32)
msk_com = torch.tensor(np.array(msk_com), dtype = torch.long)

len(img_com)

2

In [352]:
out = model(pixel_values = img_com, labels = msk_com)
out.loss

tensor(0.6960, grad_fn=<NllLoss2DBackward0>)

tensor(0.6960, grad_fn=<NllLoss2DBackward0>)

## Dataset Object Points

In [5]:
from torch.utils.data import Dataset, DataLoader
from transformers import TrainingArguments
from transformers import get_cosine_schedule_with_warmup

In [314]:
# model loading and preprocessing
image_processor = SegformerImageProcessor.from_pretrained("nvidia/mit-b0")
image_processor.do_reduce_labels = True

id2label = {0 : "Background", 1 : "Plaque"}
label2id = {"Background" : 0, "Plaque" : 1}

model = SegformerForSemanticSegmentation.from_pretrained("nvidia/mit-b0",
                                                         num_labels=2,
                                                         id2label=id2label,
                                                         label2id=label2id)
model.config.semantic_loss_ignore_index = 255

Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b0 and are newly initialized: ['decode_head.classifier.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.weight', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.batch_norm.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.running_mean', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [315]:
class SegmentationDataset(Dataset):
    def __init__(self, root_path, file_names, dataset_type):
        self.root_path = root_path
        self.file_names = file_names
        self.dataset_type = dataset_type
        self.image_processor = SegformerImageProcessor.from_pretrained("nvidia/mit-b0")
         
    def __len__(self):
        return len(self.file_names)
    
    def __getitem__(self, index):
        
        img_index = self.file_names[index]
        
        img_path = os.path.join(self.root_path, self.dataset_type, "images", img_index)
        msk_path = os.path.join(self.root_path, self.dataset_type, "masks",  img_index)
        
        
        # read the images
#         img = Image.open(img_path)
#         msk = Image.open(msk_path)
        
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        msk = cv2.imread(msk_path)
        msk = cv2.cvtColor(msk, cv2.COLOR_BGR2GRAY)

        # normalize the mask to have category of 0 and 1
        msk = msk // 255
        
        model_inputs = self.image_processor(images = img,
                                            segmentation_maps=msk,
                                            return_tensors = "pt")
        
        return model_inputs

In [316]:
from glob import glob

# Train dataset
train_img_files = glob("../data/processed/train/images/*")
train_msk_files = glob("../data/processed/train/masks/*")

# Test dataset
test_img_files = glob("../data/processed/test/images/*")
test_msk_files = glob("../data/processed/test/masks/*")

# Validation dataset
val_img_files = glob("../data/processed/val/images/*")
val_msk_files = glob("../data/processed/val/masks/*")

In [317]:
train_files = []
test_files = []
validation_files = []

for file in train_img_files:
    train_files.append(file.split(sep = "\\")[1])

for file in test_img_files:
    test_files.append(file.split(sep = "\\")[1])

for file in val_img_files:
    validation_files.append(file.split(sep = "\\")[1])

In [42]:
# train_files = [f"{ind}.png" for ind in range(1, 998)]
# test_files = [f"{ind}.png" for ind in range(1, 301)]
# validation_files = [f"{ind}.png" for ind in range(1, 201)]

In [318]:
def collate_fn(batch):
    """
    Collate function to process a batch of images.
    """
    pixel_values = [torch.squeeze(example['pixel_values'], dim = 0) 
                    for example in batch]
    labels = [torch.squeeze(example['labels'], dim = 0) for example in batch]
    
    # Stack images into a single tensor
    pixel_values = torch.stack(pixel_values, dim = 0)
    # Convert labels to tensor
    labels = torch.stack(labels, dim = 0)
    
    
    return {'pixel_values': pixel_values, 'labels': labels}

In [319]:
# Dataset Object
train_dataset = SegmentationDataset(root_path = "../data/processed/",
                                    file_names = train_files,
                                    dataset_type = "train")

test_dataset = SegmentationDataset(root_path = "../data/processed/",
                                   file_names = test_files,
                                   dataset_type = "test")

validation_dataset = SegmentationDataset(root_path = "../data/processed/",
                                         file_names = validation_files,
                                         dataset_type = "val")

# Dataloader object
train_dataloader = DataLoader(dataset = train_dataset,
                              batch_size = 8,
                              shuffle = True,
                              collate_fn = collate_fn)
test_dataloader = DataLoader(dataset = test_dataset,
                              batch_size = 8,
                              shuffle = True,
                              collate_fn = collate_fn)
val_dataloader = DataLoader(dataset = validation_dataset,
                            batch_size = 8,
                            shuffle = True,
                            collate_fn = collate_fn)

In [320]:
# Testing the dataloader object
batch = next(iter(val_dataloader))

print(batch["pixel_values"].shape, batch["labels"].shape)

torch.Size([8, 3, 512, 512]) torch.Size([8, 512, 512])


In [321]:
NUM_EPOCHS = 10
BATCH_SIZE = 4
TOTAL_STEPS = len(train_dataloader) * NUM_EPOCHS
LEARNING_RATE = 0.001
GRADIENT_ACCUMULATION = 2


training_arguments = TrainingArguments(
    output_dir = "../experiments/experiment1/",
    overwrite_output_dir = True,
    do_train = True,
    do_eval = True,
    do_predict = True,
    evaluation_strategy = "steps",
    save_total_limit = 2,
    prediction_loss_only = False,
    per_device_train_batch_size = BATCH_SIZE,
    per_device_eval_batch_size = BATCH_SIZE,
    gradient_accumulation_steps = GRADIENT_ACCUMULATION,
    eval_accumulation_steps = GRADIENT_ACCUMULATION,
    save_strategy = "steps", 
    eval_steps = 10,
    num_train_epochs = NUM_EPOCHS,
    lr_scheduler_type = "cosine",
    logging_strategy = "steps",
    logging_steps = 10,
    load_best_model_at_end = True,
    learning_rate = LEARNING_RATE, fp16 = True
)

In [322]:
import torch
from torch import nn
import evaluate

############################# THIS CODE IS TAKEN FROM THE HUGGINGFACE TRANSFORMERS #######################################
metric = evaluate.load("mean_iou")

def compute_metrics(eval_pred):
      with torch.no_grad():
        logits, labels = eval_pred
        logits_tensor = torch.from_numpy(logits)
        # scale the logits to the size of the label
        logits_tensor = nn.functional.interpolate(
            logits_tensor,
            size=labels.shape[-2:],
            mode="bilinear",
            align_corners=False,
        ).argmax(dim=1)

        pred_labels = logits_tensor.detach().cpu().numpy()
        # currently using _compute instead of compute
        # see this issue for more info: https://github.com/huggingface/evaluate/pull/328#issuecomment-1286866576
        metrics = metric._compute(
                predictions=pred_labels,
                references=labels,
                num_labels=len(id2label),
                ignore_index=0,
                reduce_labels=image_processor.do_reduce_labels,
            )

        # add per category metrics as individual key-value pairs
        per_category_accuracy = metrics.pop("per_category_accuracy").tolist()
        per_category_iou = metrics.pop("per_category_iou").tolist()

        metrics.update({f"accuracy_{id2label[i]}": v for i, v in enumerate(per_category_accuracy)})
        metrics.update({f"iou_{id2label[i]}": v for i, v in enumerate(per_category_iou)})

        return metrics

##############################################################################################################################

In [323]:
from transformers import Trainer

model = model.to("cuda")
trainer = Trainer(model = model,
                  args = training_arguments,
                  data_collator = collate_fn, 
                  train_dataset = train_dataset,
                  eval_dataset= validation_dataset,
                  compute_metrics = compute_metrics)

In [324]:
trainer.train()

Step,Training Loss,Validation Loss,Mean Iou,Mean Accuracy,Overall Accuracy,Accuracy Background,Accuracy Plaque,Iou Background,Iou Plaque
10,0.4351,0.183646,0.0,,,,,0.0,


  all_acc = total_area_intersect.sum() / total_area_label.sum()
  iou = total_area_intersect / total_area_union
  metrics["mean_accuracy"] = np.nanmean(acc)


KeyboardInterrupt: 

In [221]:
trainer.evaluate(eval_dataset = validation_dataset)

{'eval_loss': 0.017016639932990074,
 'eval_mean_iou': 1.0,
 'eval_mean_accuracy': 1.0,
 'eval_overall_accuracy': 1.0,
 'eval_accuracy_Background': nan,
 'eval_accuracy_Plaque': 1.0,
 'eval_iou_Background': nan,
 'eval_iou_Plaque': 1.0}

In [268]:
model = model.to("cpu")
out = model(**img_com)

In [283]:
msk = torch.tensor(msk, dtype = torch.long)
msk

tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]])

In [291]:
new_loss

tensor(0.7141)