In [1]:
# !pip install torchinfo
import os
import pandas as pd
import numpy as np
import cv2
from torchvision.io import read_image
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, random_split, DataLoader, ConcatDataset
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

from torchvision.transforms import ToTensor
from PIL import Image
import os
# from torchsummary import summary
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision 
from torchvision import transforms
from torchinfo import summary
import timm
!pip install segmentation-models-pytorch
import segmentation_models_pytorch as smp




Collecting segmentation-models-pytorch
  Obtaining dependency information for segmentation-models-pytorch from https://files.pythonhosted.org/packages/cb/70/4aac1b240b399b108ce58029ae54bc14497e1bbc275dfab8fd3c84c1e35d/segmentation_models_pytorch-0.3.3-py3-none-any.whl.metadata
  Downloading segmentation_models_pytorch-0.3.3-py3-none-any.whl.metadata (30 kB)
Collecting pretrainedmodels==0.7.4 (from segmentation-models-pytorch)
  Downloading pretrainedmodels-0.7.4.tar.gz (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l- \ done
[?25hCollecting efficientnet-pytorch==0.7.1 (from segmentation-models-pytorch)
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l- done
[?25hCollecting timm==0.9.2 (from segmentation-models-pytorch)
  Obtaining dependency information for timm==0.9.2 from https://files.pytho

In [2]:
model = smp.UnetPlusPlus(
    encoder_name="resnet50",        
    encoder_weights="imagenet",     
    in_channels=3,                  
    classes=3     
)

Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 215MB/s]


In [3]:
class ImageSegmentationDataset(Dataset):
    """Dataset for loading and transforming image segmentation data."""
    def __init__(self, images_folder, masks_folder, target_size=None, augmentations=None):
        self.images_folder = images_folder
        self.masks_folder = masks_folder
        self.target_size = target_size
        self.augmentations = augmentations
        self.file_names = os.listdir(self.images_folder)

    def __len__(self):
        return len(self.file_names)

    def load_segmentation_mask(self, path_to_mask):
        segmentation_image = cv2.imread(path_to_mask)
        segmentation_image = cv2.resize(segmentation_image, self.target_size)
        segmentation_image = cv2.cvtColor(segmentation_image, cv2.COLOR_BGR2HSV)

        red_lower_bound = np.array([0, 100, 20])
        red_upper_bound = np.array([10, 255, 255])
        lower_red_mask = cv2.inRange(segmentation_image, red_lower_bound, red_upper_bound)
        upper_red_mask = cv2.inRange(segmentation_image, np.array([160, 100, 20]), np.array([179, 255, 255]))
        
        combined_red_mask = lower_red_mask + upper_red_mask
        combined_red_mask[combined_red_mask != 0] = 1

        green_mask = cv2.inRange(segmentation_image, (36, 25, 25), (70, 255, 255))
        green_mask[green_mask != 0] = 2

        complete_mask = cv2.bitwise_or(combined_red_mask, green_mask)
        complete_mask = np.expand_dims(complete_mask, axis=-1)
        complete_mask = complete_mask.astype(np.uint8)
        return complete_mask

    def __getitem__(self, index):
        path_to_image = os.path.join(self.images_folder, self.file_names[index])
        path_to_mask = os.path.join(self.masks_folder, self.file_names[index])
        input_image = cv2.imread(path_to_image)
        input_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)
        segmentation_mask = self.load_segmentation_mask(path_to_mask)
        input_image = cv2.resize(input_image, self.target_size)
        
        if self.augmentations:
            augmented = self.augmentations(image=input_image)
            input_image = augmented['image']
        
        segmentation_mask = torch.tensor(segmentation_mask, dtype=torch.float32)
        segmentation_mask = segmentation_mask.permute(2, 0, 1)

        return input_image, segmentation_mask

In [4]:
image_path = []
TRAIN_DIR = '/kaggle/input/bkai-igh-neopolyp/train/train'
for root, dirs, files in os.walk(TRAIN_DIR):
    for file in files:
        path = os.path.join(root,file)
        image_path.append(path)
len(image_path)

1000

In [5]:
mask_path = []
TRAIN_MASK_DIR = '/kaggle/input/bkai-igh-neopolyp/train_gt/train_gt'
for root, dirs, files in os.walk(TRAIN_MASK_DIR):
    for file in files:
        path = os.path.join(root,file)
        mask_path.append(path)
len(mask_path)

1000

In [6]:
batch_size = 8
aug_transform = A.Compose([
    A.HorizontalFlip(p=0.6),  
    A.VerticalFlip(p=0.4), 
    A.GaussianBlur(blur_limit=3),   
    A.RandomSnow(snow_point_lower=0.2, snow_point_upper=0.3, brightness_coeff=1.2, p=0.05), 
    A.RandomShadow(shadow_roi=(0.1, 0.2, 0.9, 0.9), p=0.15),  
    A.RandomGamma (gamma_limit=(80, 120), p=0.25),  
    A.RGBShift(p=0.25, r_shift_limit=15, g_shift_limit=15, b_shift_limit=15), 
    A.Cutout(p=0.3, max_h_size=25, max_w_size=25, fill_value=0),
    A.RandomCrop(256, 256),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])



In [7]:
ori_transform = A.Compose([
    A.Normalize(mean=(0.485, 0.456, 0.406),std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])

In [8]:
color_dict= {0: (0, 0, 0),
             1: (255, 0, 0),
             2: (0, 255, 0)}
def mask_to_rgb(mask, color_dict):
    output = np.zeros((mask.shape[0], mask.shape[1], 3))
    for k in color_dict.keys():
        output[mask==k] = color_dict[k]

    return np.uint8(output)    

In [9]:
train_dataset_not_aug = ImageSegmentationDataset(images_folder= TRAIN_DIR,
                             masks_folder= TRAIN_MASK_DIR,
                             target_size= (256,256),
                             augmentations = ori_transform)

train_dataset_aug = ImageSegmentationDataset(images_folder= TRAIN_DIR,
                             masks_folder= TRAIN_MASK_DIR,
                             target_size= (256,256),
                             augmentations = aug_transform)
train_dataset = ConcatDataset([train_dataset_not_aug, train_dataset_aug])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
print(len(train_dataset))

2000


In [10]:
torch.cuda.empty_cache()

In [11]:
learning_rate = 0.0001
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [12]:
!pip install wandb
import wandb
!wandb login 'e148ed8e0e7f84df32e1a5cd13e10de504c7be45'

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [13]:
wandb.init(
    project = 'Unet_polyp-Segmentation',
    config = {
        'learning_rate': 0.0001,
        'architecture': 'ResUnet',
        'dataset': 'Polyp',
        'epoch': 50
    }
)

[34m[1mwandb[0m: Currently logged in as: [33mthanhtruongtran23[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: wandb version 0.16.0 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade
[34m[1mwandb[0m: Tracking run with wandb version 0.15.12
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20231115_132559-asq63zvn[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mgenial-deluge-2[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/thanhtruongtran23/Unet_polyp-Segmentation[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/thanhtruongtran23/Unet_polyp-Segmentation/runs/asq63zvn[0m


In [14]:
num_epochs = 50
trainsize = 256
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
model.to(device)
criterion = nn.CrossEntropyLoss()
train_loss_array = []
best_loss = 100
# Training loop
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        # Forward pass
        labels = labels.squeeze(dim=1).long()

        outputs = model(images)

        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        total_loss += loss.item()  

    epoch_loss = total_loss / len(train_loader)
    train_loss_array.append(epoch_loss)
    wandb.log({'Loss': epoch_loss
              })
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.10f}")
    
    if epoch_loss < best_loss:
        best_loss = epoch_loss
        checkpoint = { 
            'epoch': epoch,
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'loss': epoch_loss
        }
        save_path = '/kaggle/working/submission.pth'
        torch.save(checkpoint, save_path)
        print('Save model')
wandb.finish()

Epoch [1/50], Loss: 0.3970630491
Save model
Epoch [2/50], Loss: 0.1621830199
Save model
Epoch [3/50], Loss: 0.1184547714
Save model
Epoch [4/50], Loss: 0.0920849170
Save model
Epoch [5/50], Loss: 0.0817161221
Save model
Epoch [6/50], Loss: 0.0724561898
Save model
Epoch [7/50], Loss: 0.0651862420
Save model
Epoch [8/50], Loss: 0.0597584515
Save model
Epoch [9/50], Loss: 0.0603968321
Epoch [10/50], Loss: 0.0506495759
Save model
Epoch [11/50], Loss: 0.0469875398
Save model
Epoch [12/50], Loss: 0.0457227016
Save model
Epoch [13/50], Loss: 0.0444487441
Save model
Epoch [14/50], Loss: 0.0412673634
Save model
Epoch [15/50], Loss: 0.0416696289
Epoch [16/50], Loss: 0.0405312688
Save model
Epoch [17/50], Loss: 0.0387094115
Save model
Epoch [18/50], Loss: 0.0359651122
Save model
Epoch [19/50], Loss: 0.0393700291
Epoch [20/50], Loss: 0.0350619306
Save model
Epoch [21/50], Loss: 0.0355895909
Epoch [22/50], Loss: 0.0342910846
Save model
Epoch [23/50], Loss: 0.0319241880
Save model
Epoch [24/50], Los

[34m[1mwandb[0m: Waiting for W&B process to finish... [32m(success).[0m


Epoch [50/50], Loss: 0.0204699481


[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m: Loss █▄▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m: Loss 0.02047
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mgenial-deluge-2[0m at: [34m[4mhttps://wandb.ai/thanhtruongtran23/Unet_polyp-Segmentation/runs/asq63zvn[0m
[34m[1mwandb[0m: Synced 6 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)
[34m[1mwandb[0m: Find logs at: [35m[1m./wandb/run-20231115_132559-asq63zvn/logs[0m


In [15]:
checkpoint = torch.load('/kaggle/working/submission.pth')
model.load_state_dict(checkpoint['model'])
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) 
optimizer.load_state_dict(checkpoint['optimizer'])

# Sau khi nạp trạng thái, đưa cả model và optimizer lên device
model.to(device)
for state in optimizer.state.values():
    for k, v in state.items():
        if isinstance(v, torch.Tensor):
            state[k] = v.to(device)
loss_value = checkpoint['loss']

print(f"The loss from the checkpoint is: {loss_value:.10f}")

The loss from the checkpoint is: 0.0164980138


In [16]:
!mkdir test_mask
!mkdir test_overlapmask

In [17]:
model.eval()
test_images_directory = "/kaggle/input/bkai-igh-neopolyp/test/test"
output_masks_directory = "test_mask/"
output_overlaps_directory = "test_overlapmask/"

In [18]:
for filename in os.listdir(test_images_directory):
    image_path = os.path.join(test_images_directory, filename)
    original_image = cv2.imread(image_path)
    original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
    original_width = original_image.shape[0]
    original_height = original_image.shape[1]
    
    resized_image = cv2.resize(original_image, (trainsize, trainsize))
    transformed = ori_transform(image=resized_image)
    model_input = transformed["image"]
    model_input = model_input.unsqueeze(0).to(device)
    
    with torch.no_grad():
        predicted_mask = model(model_input).squeeze(0).cpu().numpy().transpose(1, 2, 0)
    
    scaled_mask = cv2.resize(predicted_mask, (original_height, original_width))
    class_mask = np.argmax(scaled_mask, axis=2)
    colorized_mask = np.zeros((*class_mask.shape, 3)).astype(np.uint8)
    
    # Assuming mask_to_rgb is a predefined function that converts class masks to RGB images
    rgb_colored_mask = mask_to_rgb(class_mask, color_dict)
    rgb_colored_mask_corrected = cv2.cvtColor(rgb_colored_mask, cv2.COLOR_BGR2RGB)
    
    combined_image = 0.7 * original_image + 0.3 * rgb_colored_mask_corrected
    combined_image = combined_image.astype('uint8')
    combined_image = cv2.cvtColor(combined_image, cv2.COLOR_RGB2BGR)
    
    rgb_colored_mask = cv2.cvtColor(rgb_colored_mask, cv2.COLOR_RGB2BGR)
    cv2.imwrite(os.path.join(output_masks_directory, filename), rgb_colored_mask)
    cv2.imwrite(os.path.join(output_overlaps_directory, filename), combined_image)
    
    print("Processed image:", image_path)


Processed image: /kaggle/input/bkai-igh-neopolyp/test/test/f62f215f0da4ad3a7ab8df9da7386835.jpeg
Processed image: /kaggle/input/bkai-igh-neopolyp/test/test/aeeb2b535797395305af926a6f23c5d6.jpeg
Processed image: /kaggle/input/bkai-igh-neopolyp/test/test/2ed9fbb63b28163a745959c03983064a.jpeg
Processed image: /kaggle/input/bkai-igh-neopolyp/test/test/3c84417fda8019410b1fcf0625f608b4.jpeg
Processed image: /kaggle/input/bkai-igh-neopolyp/test/test/8fa8625605da2023387fd56c04414eaa.jpeg
Processed image: /kaggle/input/bkai-igh-neopolyp/test/test/cb1b387133b51209db6dcdda5cc8a788.jpeg
Processed image: /kaggle/input/bkai-igh-neopolyp/test/test/a3657e4314fe384eb2ba3adfda6c1899.jpeg
Processed image: /kaggle/input/bkai-igh-neopolyp/test/test/c695325ded465efde988dfb96d081533.jpeg
Processed image: /kaggle/input/bkai-igh-neopolyp/test/test/0fca6a4248a41e8db8b4ed633b456aaa.jpeg
Processed image: /kaggle/input/bkai-igh-neopolyp/test/test/6f4d4987ea3b4bae5672a230194c5a08.jpeg
Processed image: /kaggle/input

In [19]:
import numpy as np
import pandas as pd
import cv2
import os

def rle_to_string(runs):
    return ' '.join(str(x) for x in runs)

def rle_encode_one_mask(mask):
    pixels = mask.flatten()
    pixels[pixels > 225] = 255
    pixels[pixels <= 225] = 0
    use_padding = False
    if pixels[0] or pixels[-1]:
        use_padding = True
        pixel_padded = np.zeros([len(pixels) + 2], dtype=pixels.dtype)
        pixel_padded[1:-1] = pixels
        pixels = pixel_padded
    
    rle = np.where(pixels[1:] != pixels[:-1])[0] + 2
    if use_padding:
        rle = rle - 1
    rle[1::2] = rle[1::2] - rle[:-1:2]
    return rle_to_string(rle)

def rle2mask(mask_rle, shape=(3,3)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (width,height) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T

def mask2string(dir):
    ## mask --> string
    strings = []
    ids = []
    ws, hs = [[] for i in range(2)]
    for image_id in os.listdir(dir):
        id = image_id.split('.')[0]
        path = os.path.join(dir, image_id)
        print(path)
        img = cv2.imread(path)[:,:,::-1]
        h, w = img.shape[0], img.shape[1]
        for channel in range(2):
            ws.append(w)
            hs.append(h)
            ids.append(f'{id}_{channel}')
            string = rle_encode_one_mask(img[:,:,channel])
            strings.append(string)
    r = {
        'ids': ids,
        'strings': strings,
    }
    return r


MASK_DIR_PATH = '/kaggle/working/test_mask' # change this to the path to your output mask folder
dir = MASK_DIR_PATH
res = mask2string(dir)
df = pd.DataFrame(columns=['Id', 'Expected'])
df['Id'] = res['ids']
df['Expected'] = res['strings']

df.to_csv(r'output.csv', index=False)

/kaggle/working/test_mask/692195f853af7f8a4df1ec859759b7c8.jpeg
/kaggle/working/test_mask/a15fc656702fa602bb3c7abacdbd7e6a.jpeg
/kaggle/working/test_mask/d3694abb47953b0e4909384b57bb6a05.jpeg
/kaggle/working/test_mask/314fe384eb2ba3adfda6c1899fdc9837.jpeg
/kaggle/working/test_mask/05b78a91391adc0bb223c4eaf3372eae.jpeg
/kaggle/working/test_mask/7330398846f67b5df7cdf3f33c3ca4d5.jpeg
/kaggle/working/test_mask/a9d45c3dbc695325ded465efde988dfb.jpeg
/kaggle/working/test_mask/f7fdb2d45b21960c94b0aab4c024a573.jpeg
/kaggle/working/test_mask/0398846f67b5df7cdf3f33c3ca4d5060.jpeg
/kaggle/working/test_mask/425b976973f13dd311a65d2b46d0a608.jpeg
/kaggle/working/test_mask/e4a17af18f72c8e6166a915669c99390.jpeg
/kaggle/working/test_mask/c7e610b1531871f2fd85a04faeeb2b53.jpeg
/kaggle/working/test_mask/e9082ea2c193ac8d551c149b60f29653.jpeg
/kaggle/working/test_mask/2d9e593b6be1ac29adbe86f03d900fd1.jpeg
/kaggle/working/test_mask/be86f03d900fd197cd955fa095f97845.jpeg
/kaggle/working/test_mask/3dd311a65d2b46

In [None]:
%%sh
killall5 -9