In [1]:
import os
import sys
import warnings
warnings.filterwarnings("ignore")
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
sys.path.append("/home/pervinco/BKAI_MetaPolyp")

import cv2
import yaml
import random
import numpy as np
import tensorflow as tf
import albumentations as A
import matplotlib.pyplot as plt

from glob import glob
from data.batch_preprocess import *
from utils.utils import decode_mask, decode_image, visualize

from data.BKAIDataset import BKAIDataset
from data.BalancedBKAIDataset import BalancedBKAIDataset

In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if len(gpus) > 1:
    try:
        print("Activate Multi GPU")
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        strategy = tf.distribute.MirroredStrategy(cross_device_ops=tf.distribute.HierarchicalCopyAllReduce())
    except RuntimeError as e:
        print(e)

else:
    try:
        print("Activate Sigle GPU")
        tf.config.experimental.set_memory_growth(gpus[0], True)
        strategy = tf.distribute.experimental.CentralStorageStrategy()
    except RuntimeError as e:
        print(e)

Activate Sigle GPU
INFO:tensorflow:ParameterServerStrategy (CentralStorageStrategy if you are using a single machine) with compute_devices = ['/job:localhost/replica:0/task:0/device:GPU:0'], variable_device = '/job:localhost/replica:0/task:0/device:GPU:0'


In [3]:
with open("/home/pervinco/BKAI_MetaPolyp/config.yaml", "r") as f:
    config = yaml.safe_load(f)

data_dir = config["data_dir"]
image_dir = f"{data_dir}/train"
mask_dir = f"{data_dir}/train_mask"
gt_dir = f"{data_dir}/train_gt"

In [4]:
image_files = sorted(glob(f"{image_dir}/*"))
mask_files = sorted(glob(f"{mask_dir}/*"))
gt_files = sorted(glob(f"{gt_dir}/*"))


In [5]:
def compute_class_distribution(mask_files, num_classes):
    distribution = np.zeros(num_classes, dtype=np.int32)

    for mask_file in mask_files:
        mask = cv2.imread(mask_file)
        # mask = cv2.cvtColor(mask, cv2.COLOR_BGR2RGB)
        mask = cv2.resize(mask, (256, 256))
        mask = encode_mask(mask)
        
        for class_id in range(num_classes):
            distribution[class_id] += (mask == class_id).sum()

    return distribution


def plot_class_distribution(distribution, class_names=None):
    plt.figure(figsize=(10, 6))
    if class_names:
        plt.bar(class_names, distribution)
    else:
        plt.bar(np.arange(len(distribution)), distribution)

    plt.ylabel('Number of Pixels')
    plt.xlabel('Class')
    plt.title('Class Distribution in Semantic Segmentation')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

In [6]:
def calculate_batch_distribution(dataloader):    
    if not os.path.isdir("./images"):
        os.makedirs("./images")

    for i, (images, masks) in enumerate(dataloader):
        if i == 5:
            break

        print(i, images.shape, masks.shape)

        distribution = np.zeros(config["num_classes"], dtype=np.int32)
        for j, (image, mask) in enumerate(zip(images, masks)):
            image = image.numpy()
            image = decode_image(image)
            image = image.astype(np.uint8)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            prob_mask = np.argmax(mask, -1)
            for class_id in range(config["num_classes"]):
                distribution[class_id] += (prob_mask == class_id).sum()

            decoded_mask = decode_mask(prob_mask)
            decoded_mask = decoded_mask.astype(np.uint8)
            decoded_mask = cv2.cvtColor(decoded_mask, cv2.COLOR_BGR2RGB)

            overlay = cv2.addWeighted(image, 0.7, decoded_mask, 0.3, 0)
            result = np.hstack((image, decoded_mask, overlay))

            cv2.imwrite(f"./images/batch{i}_no{j}.png", result)

        print(distribution,"\n")

In [7]:
# image = image_files[0]
# mask = mask_files[0]

# image = cv2.imread(image)
# mask = cv2.imread(mask)

# image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# mask = cv2.cvtColor(mask, cv2.COLOR_BGR2RGB)
# visualize([image], [mask])

In [8]:
# size = config["img_size"] - 76

# test_transform = A.Compose([
#     # A.OneOf([A.RandomResizedCrop(height=size, width=size, p=0.5),
#     #          A.CenterCrop(height=size, width=size, p=0.5)
#     # ], p=1),
#     A.RandomResizedCrop(height=size, width=size, p=1),
#     # A.PadIfNeeded(p=1.0, min_height=config["img_size"], min_width=config["img_size"], pad_height_divisor=None, pad_width_divisor=None, border_mode=0, value=(0, 0, 0), mask_value=None)
# ])

# transformed = test_transform(image=image, mask=mask)
# transformed_image, transformed_mask = transformed["image"], transformed["mask"]
# visualize([transformed_image], [transformed_mask])

In [9]:
# gt_dist = compute_class_distribution(gt_files, num_classes=3)
# print(gt_dist)

In [10]:
# total_pixels = np.sum(gt_dist)
# class_weights = total_pixels / (len(gt_dist) * gt_dist)
# print(class_weights)

In [11]:
# normalized_class_weights = class_weights / np.sum(class_weights)
# print(normalized_class_weights)

In [12]:
# dist = compute_class_distribution(mask_files, num_classes=3)
# print(dist)

In [13]:
dataset = BalancedBKAIDataset(config, split="train")
print(len(dataset))

dataloader = tf.data.Dataset.from_generator(lambda: dataset, 
                                            output_signature=(tf.TensorSpec(shape=(None, config["img_size"], config["img_size"], 3), dtype=tf.float32),
                                                              tf.TensorSpec(shape=(None, config["img_size"], config["img_size"], 3), dtype=tf.float32)))

calculate_batch_distribution(dataloader)

800


0 (16, 256, 256, 3) (16, 256, 256, 3)
[966230  35584  46762] 

1 (16, 256, 256, 3) (16, 256, 256, 3)
[977310  22695  48571] 

2 (16, 256, 256, 3) (16, 256, 256, 3)
[980971  25792  41813] 

3 (16, 256, 256, 3) (16, 256, 256, 3)
[954942  26641  66993] 

4 (16, 256, 256, 3) (16, 256, 256, 3)
[992992  23993  31591] 

