In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as T
import torchvision
import torch.nn.functional as F
from torch.autograd import Variable

from PIL import Image
import cv2
import albumentations as A

import time
import os
from tqdm.notebook import tqdm

!pip install -q segmentation-models-pytorch
!pip install -q torchsummary

from torchsummary import summary
import segmentation_models_pytorch as smp

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
IMAGE_PATH = '/kaggle/input/the-hyper-kvasir-dataset/segmented-images/images/'
MASK_PATH = '/kaggle/input/the-hyper-kvasir-dataset/segmented-images/masks/'

In [None]:
n_classes = 2

def create_df():
    name = []
    for dirname, _, filenames in os.walk(IMAGE_PATH):
        for filename in filenames:
            name.append(filename.split('.')[0])
    
    return pd.DataFrame({'id': name}, index = np.arange(0, len(name)))

df = create_df()
print('Total Images: ', len(df))

In [None]:
#split data
X_trainval, X_test = train_test_split(df['id'].values, test_size=0.1, random_state=19)
X_train, X_val = train_test_split(X_trainval, test_size=0.15, random_state=19)

print('Train Size   : ', len(X_train))
print('Val Size     : ', len(X_val))
print('Test Size    : ', len(X_test))

In [None]:
img = Image.open(IMAGE_PATH + df['id'][100] + '.jpg')
mask = Image.open(MASK_PATH + df['id'][100] + '.jpg')
import cv2

# Load your RGB image
rgb_image = cv2.imread(MASK_PATH + df['id'][100] + '.jpg')

# Convert to grayscale using OpenCV
gray_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2GRAY)

img=np.asarray(img)
#mask= np.asarray(mask)/255
print('Image Size', np.asarray(img).shape)
print('Mask Size', np.asarray(mask).shape)

print('Mask Size', np.asarray(gray_image).shape)


In [None]:
plt.imshow(img)
plt.imshow(gray_image, alpha=0.6)
plt.title('Picture with Mask Appplied')
plt.show()

In [None]:
import json
main_path = "/kaggle/input/the-hyper-kvasir-dataset/segmented-images/"


def get_bbox(json_file, ext=".jpg", nl=256, nc=256):
    with open(json_file, "r") as fp:
        data = json.load(fp)
    bbox = {}
    c=0
    for key, value in data.items():
#         c=c+1
#         if c==1:
#             print(key)
#             print(value.shape)
        bbox[key + ext] = np.array([value["bbox"][0]["xmin"] * nl/value["width"], 
                                   value["bbox"][0]["ymin"] * nc/value["height"], 
                                   value["bbox"][0]["xmax"] * nl/value["width"], # - value["bbox"][0]["xmin"], 
                                   value["bbox"][0]["ymax"] * nc/value["height"]]) # - value["bbox"][0]["ymin"])
    return bbox

bbox_file=os.path.join(main_path, "bounding-boxes.json")

In [None]:
#final one which also works for patches
class Dataset_create(Dataset):
    
    def __init__(self, img_path, mask_path, X, mean, std, transform=None, patch=False):
        self.img_path = img_path
        self.mask_path = mask_path
        self.X = X
        self.transform = transform
        self.patches = patch
        self.mean = mean
        self.std = std
        self.bboxes = get_bbox(bbox_file)
        
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        img = cv2.imread(self.img_path + self.X[idx] + '.jpg')
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        mask = cv2.imread(self.mask_path + self.X[idx] + '.jpg', cv2.IMREAD_GRAYSCALE)
        bbox = self.bboxes[self.X[idx] + '.jpg']
        
        if self.transform is not None:
            aug = self.transform(image=img, mask=mask)
            img = Image.fromarray(aug['image'])
            mask = aug['mask']
        
        if self.transform is None:
            img = Image.fromarray(img)
        
       

        t = T.Compose([T.ToTensor(), T.Normalize(self.mean, self.std)])
        img = t(img)
        
        #extra to make 128,128 to 128,128,1
        # Add an extra dimension to convert it to (128, 128, 1)
        mask = mask[:, :, np.newaxis]
        t2 = T.Compose([T.ToTensor()])
        mask = t2(mask)
        
        
        #print(mask.shape)
        #mask = torch.from_numpy(mask).long()
        #mask = mask.long()
        
        if self.patches:
            img, mask = self.tiles(img, mask)
            
        return img, mask, bbox
    
    def tiles(self, img, mask):

        img_patches = img.unfold(1,  64,64).unfold(2,  64,64) 
        #print("ok")
        img_patches  = img_patches.contiguous().view(3,-1,  64,64) 
        #print("ok2")
        img_patches = img_patches.permute(1,0,2,3)
        #print("ok3")
        
        mask_patches = mask.unfold(1,  64,64).unfold(2,  64,64)
        mask_patches = mask_patches.contiguous().view(1,-1,  64,64)
        mask_patches = mask_patches.permute(1,0,2,3)
        
        return img_patches, mask_patches

In [None]:
mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]

t_train = A.Compose([A.Resize(256,256, interpolation=cv2.INTER_NEAREST), A.HorizontalFlip(), A.VerticalFlip(), 
                     A.GridDistortion(p=0.2), A.RandomBrightnessContrast((0,0.5),(0,0.5)),
                     A.GaussNoise()])

t_val = A.Compose([A.Resize(256,256, interpolation=cv2.INTER_NEAREST), A.HorizontalFlip(),
                   A.GridDistortion(p=0.2)])

#datasets
train_set = Dataset_create(IMAGE_PATH, MASK_PATH, X_train, mean, std, t_train, patch=False)
val_set = Dataset_create(IMAGE_PATH, MASK_PATH, X_val, mean, std, t_val, patch=False)

#dataloader
batch_size= 16

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True)    

In [None]:
import torch
from torch.utils.data import DataLoader


# Assuming each batch contains image and mask tensors
for batch_idx, (images, masks, bbox) in enumerate(train_loader):
    # Print the shape of the first batch
    if batch_idx == 0:
        print("Image batch shape:", images.shape)
        print("Mask batch shape:", masks.shape)
        print(bbox)
    
    # Break after the first batch if you only want to print one iteration
    break


In [None]:
im , mk,bbox = train_set[6]
myfiles=[im,mk]
print(mk.shape)
print(im.shape)

# model + training

In [None]:
import numpy as np
import pandas as pd
import os , glob
import matplotlib.pyplot as plt
from PIL import Image
import cv2
from tqdm import tqdm

import torch
from torch import Tensor
from torch.autograd import Function
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from keras.preprocessing import image
import albumentations as A
from albumentations.pytorch import ToTensorV2

from torch.utils.data import Dataset , DataLoader
from torchvision import transforms , utils , datasets

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
class DoubleConv(nn.Module):

    def __init__(self, in_channels, out_channels, mid_channels=None):
        
        super().__init__()
        
        if not mid_channels:
            mid_channels = out_channels
        
        self.double_conv = nn.Sequential(
            
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)
    
    
    
class Down(nn.Module):

    def __init__(self, in_channels, out_channels):
        super().__init__()
        
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )

    def forward(self, x):
        return self.maxpool_conv(x)
    
    
class Up(nn.Module):

    def __init__(self, in_channels, out_channels, bilinear=True):
        super().__init__()

        # if bilinear, use the normal convolutions to reduce the number of channels
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
            self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
        else:
            self.up = nn.ConvTranspose2d(in_channels , in_channels // 2, kernel_size=2, stride=2)
            self.conv = DoubleConv(in_channels, out_channels)


    def forward(self, x1, x2):
        x1 = self.up(x1)
        # input is CHW
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]

        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
                        diffY // 2, diffY - diffY // 2])
        # if you have padding issues, see
        # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
        # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)


class OutConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(OutConv, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        return self.conv(x)
    
    
    
class UNet(nn.Module):
    def __init__(self, n_channels, n_classes, bilinear=True):
        super(UNet, self).__init__()
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.bilinear = bilinear

        self.inc = DoubleConv(n_channels, 64)
        self.down1 = Down(64, 128)
        self.down2 = Down(128, 256)
        self.down3 = Down(256, 512)
        factor = 2 if bilinear else 1
        self.down4 = Down(512, 1024 // factor)
        self.up1 = Up(1024, 512 // factor, bilinear)
        self.up2 = Up(512, 256 // factor, bilinear)
        self.up3 = Up(256, 128 // factor, bilinear)
        self.up4 = Up(128, 64, bilinear)
        self.outc = OutConv(64, n_classes)

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        logits = self.outc(x)
        return logits

def dice_calc(gt,pred) :
    pred = torch.sigmoid(pred)
    pred = ((pred) >= .5).float()
    dice_score = (2 * (pred * gt).sum()) / ((pred + gt).sum() + 1e-8)
    
    return dice_score

In [None]:
net=smp.Unet('efficientnet-b3', encoder_weights='imagenet', classes=1, activation=None, encoder_depth=5, decoder_channels=[ 256, 128, 64, 32, 16])

In [None]:
from torchvision      import datasets, models, transforms

net2 = models.resnet50(weights="IMAGENET1K_V2")
fc_in_size = net2.fc.in_features
net2.fc = nn.Linear(fc_in_size, 4)
   

In [None]:
# Define your model, optimizer, and loss criterion
device = torch.device("cuda:0")

net.to(device=device)
net2.to(device=device)
optimizer = optim.RMSprop(net.parameters(), lr=0.0001, weight_decay=1e-8, momentum=0.9)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=2)
criterion = nn.BCEWithLogitsLoss()

#bbox
criterion2 = nn.SmoothL1Loss()
optimizer2 = optim.Adam(net.parameters(), lr=1e-4)
# if net.n_classes > 1:
#     criterion = nn.CrossEntropyLoss()
# else:
#     criterion = nn.BCEWithLogitsLoss()

In [None]:
#this is the final one patch will also work
import torch
import torch.optim as optim
import torch.nn as nn
import torchvision.transforms as transforms
import numpy as np
from tqdm import tqdm

# Define the Dice Score calculation function
def dice_calc(gt, pred):
    pred = torch.sigmoid(pred)
    pred = (pred >= 0.5).float()
    intersection = (pred * gt).sum()
    union = (pred + gt).sum() + 1e-8
    dice_score = (2 * intersection) / union
    return dice_score

# Define the IoU calculation function
def iou_calc(gt, pred):
    pred = torch.sigmoid(pred)
    pred = (pred >= 0.5).float()
    intersection = (pred * gt).sum()
    union = (pred + gt).sum() - intersection + 1e-8
    iou_score = intersection / union
    return iou_score



# Training loop
def train(epoch, epochs, tloader, patch=False):
    net.train()
    tloader.set_description(f'EPOCH {epoch}')
    epoch_loss = 0
    epoch_bbox_loss = 0
    dice_score = 0
    iou_score = 0
    
    for images, masks, bbox in tloader:
        optimizer.zero_grad()
        images = images.to(device, dtype=torch.float32)
        masks = masks.to(device, dtype=torch.float32)
        bbox = bbox.to(device)
        if patch:
            # Perform patch-wise training
            batch_size, num_patches, num_channels, height, width = images.shape
            images = images.view(-1, num_channels, height, width)
            masks = masks.view(-1, 1, height, width)
            
            
        #mask segmentation    
        mask_pred = net(images)
        loss = criterion(mask_pred, masks)
        epoch_loss += loss.item()
        loss.backward()
        optimizer.step()
        
        
        #bbox
        #bbox
        bbox_predictions = net2(images)
        loss2 = criterion2(bbox_predictions, bbox)
        loss2.backward()
        optimizer2.step()
        epoch_bbox_loss += loss2.item()
        
        
        
        
        
        
        running_DS = dice_calc(masks, mask_pred)
        dice_score += running_DS
        running_IOU = iou_calc(masks, mask_pred)
        iou_score += running_IOU
        tloader.set_postfix(loss=loss.item(), dice_score=running_DS.item(), iou_score=running_IOU.item())
        
        
        
    print(epoch_bbox_loss/len(tloader))
    print(f'Train Dice Score Epoch: {dice_score / len(tloader)}')
    print(f'Train IoU Score Epoch: {iou_score / len(tloader)}')

# Validation loop
def validation(vloader, patch=False):
    net.eval()
    net2.eval()
    vloader.set_description('Validation')
    epoch_loss_bbox = 0
    dice_score = 0
    iou_score = 0
    
    with torch.no_grad():
        for images, masks, bbox in vloader:
            images = images.to(device)
            masks = masks.to(device)
            bbox = bbox.to(device)
            if patch:
                # Perform patch-wise validation
                batch_size, num_patches, num_channels, height, width = images.shape
                images = images.view(-1, num_channels, height, width)
                masks = masks.view(-1, 1, height, width)
                
            #mask segmentation 
            mask_pred = net(images)
            
            
            
            #bbox
            #convert to 1d tensor
            predictions_bbox = net2(images)
            #compute loss
            loss_bbox = criterion2(predictions_bbox, bbox)
            #keep track of loss
            epoch_loss_bbox += loss_bbox.item()

            
            
            
            
            running_DS = dice_calc(masks, mask_pred)
            dice_score += running_DS
            running_IOU = iou_calc(masks, mask_pred)
            iou_score += running_IOU
            
            vloader.set_postfix(dice_score=running_DS.item(), iou_score=running_IOU.item())
    
    print(epoch_loss_bbox / len(vloader))
    print(f'Validation Dice Score Epoch: {dice_score / len(vloader)}')
    print(f'Validation IoU Score Epoch: {iou_score / len(vloader)}')


In [None]:
epochs = 10

for epoch in range(epochs) :
    print(epoch+1,'/',epochs)
    with tqdm(train_loader,unit='batch') as tloader : 
        train(epoch,epochs,tloader,patch=False)
    
    with tqdm(val_loader,unit='batch') as vloader:
        validation(vloader,patch=False)

In [None]:
# epochs = 40

# for epoch in range(epochs) :
#     print(epoch+1,'/',epochs)
#     with tqdm(train_loader,unit='batch') as tloader : 
#         train(epoch,epochs,tloader)
    
#     with tqdm(val_loader,unit='batch') as vloader:
#         validation(vloader)

In [None]:
#this will not work for patches
with torch.no_grad():
    for images ,masks in val_loader :
        images = images.to(device)
        print(images.shape)
        masks  = masks.to(device)

        mask_pred = net(images)
        
        img = images.cpu().numpy() 
        masks = masks.cpu().numpy()
        pred = mask_pred.cpu().numpy()
        masks_2 = (pred > 0.005).astype(int)
        
        fig, axes = plt.subplots(1, 4, figsize=(20, 20))
        
        axes[0].imshow(img[0][0])
        axes[0].set_title('Actual img')
        
        axes[1].imshow(masks[0][0])
        axes[1].set_title('Ground Truth Mask')
        
        axes[2].imshow(pred[0][0])
        axes[2].set_title('Prababilistic Mask')
        
        axes[3].imshow(masks_2[0][0])
        axes[3].set_title('Probabilistic Mask threshold')
        break