In [1]:
import sys
import cv2
import torch
import torchvision
import numpy as np 
import pandas as pd
from torch import nn
from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import segmentation_models_pytorch as smp
from torchvision.transforms import transforms
from sklearn.model_selection import train_test_split

In [2]:
DEVICE = 'cpu'
IMAGE_HEIGHT = 320
IMAGE_WIDTH = 320
BATCH_SIZE = 8
ENCODER = 'efficientnet-b7'
WEIGHTS = 'imagenet'

In [4]:
data_frame = pd.read_csv('train.csv')
data_frame.tail(5)

Unnamed: 0,masks,images
138,white masked images\95.png,training samples\95.jpg
139,white masked images\96.png,training samples\96.jpg
140,white masked images\97.png,training samples\97.jpg
141,white masked images\98.png,training samples\98.jpg
142,white masked images\99.png,training samples\99.jpg


In [5]:
data_frame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 143 entries, 0 to 142
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   masks   143 non-null    object
 1   images  143 non-null    object
dtypes: object(2)
memory usage: 2.4+ KB


In [14]:
def mask_to_rgb(mask, label=False):
    color_map={
        0: (0, 0, 0), # Background
        1: (255, 0, 0), # Class 1
        2: (0, 255, 0), # Class 2
        3: (0, 0, 255), # Class 3
        4: (0, 128, 128), # Class 4
        5: (128, 0, 128), # Class 5
    }
    color_to_class = {
      (255, 0, 0): "Horse",  # Red pixel represents Horse
      (0, 255, 0): "Bench",  # Green pixel represents Bench
      (0, 0, 255): "Water Dispenser",  # Blue pixel represents Water dispenser
      (0, 128, 128): "Trash bin",  # Blue pixel represents Dust bin
      (128, 0, 128): "Stop Sign",  # Blue pixel represents stop sign
    }
    if isinstance(mask, torch.Tensor):
        mask = mask.detach().cpu().numpy()
    
    # Count the number of occurrences of each RGB color in the predicted mask.
    counts = {}
    for row in range(320):
        for column in range(320):
            class_index = mask[0][row][column]
            if class_index != 0:
                rgb = color_map[class_index]
                if rgb in counts:
                    counts[rgb] += 1
                else:
                    counts[rgb] = 1
                    
    # Determine the RGB color with the highest count that is not the background color.
    max_count = 0
    max_rgb = None
    for rgb, count in counts.items():
        if count > max_count and rgb != (0,0,0):
            max_count = count
            max_rgb = rgb
    
    # Replace all non-background class indexes in the mask with the chosen RGB color.
    if max_rgb is not None:
        rgb_mask = np.zeros((320, 320, 3), dtype=np.uint8)
        for row in range(320):
            for column in range(320):
                class_index = mask[0][row][column]
                if class_index != 0:
                    rgb = color_map[class_index]
                    if rgb == max_rgb:
                        rgb_mask[row][column] = rgb
                    else:
                        rgb_mask[row][column] = max_rgb
                else:
                    rgb_mask[row][column] = (0,0,0)
    else:
        # If all non-background colors have zero occurrences, use the original function.
        rgb_mask = np.zeros((320, 320, 3), dtype=np.uint8)
        for row in range(320):
            for column in range(320):
                class_index = mask[0][row][column]
                rgb_mask[row][column] = color_map[class_index]
  
    if label:
        return rgb_mask, color_to_class.get(max_rgb)    
    return rgb_mask


def show_image(image, mask, pred_image=None):
    image = image.permute(1, 2, 0).squeeze().numpy()
    mask = mask_to_rgb(mask)

    if pred_image is not None:
        pred_image = mask_to_rgb(pred_image)

    image = image / np.max(image)
    mask = mask / np.max(mask)
    
    if pred_image is not None:
        pred_image = pred_image / np.max(pred_image)

    if pred_image is None:
        f, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))

        ax1.set_title('IMAGE')
        ax1.imshow(image, cmap='gray')

        ax2.set_title('GROUND TRUTH')
        ax2.imshow(mask)

    else:
        f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(10, 5))

        ax1.set_title('IMAGE')
        ax1.imshow(image, cmap='gray')

        ax2.set_title('GROUND TRUTH')
        ax2.imshow(mask)

        ax3.set_title('MODEL OUTPUT')
        ax3.imshow(pred_image)

    plt.show()

        
def classIndexMask(mask):
    # Define a dictionary mapping RGB color values to class indices
    color_to_class = {
      (0, 0, 0): 0,  # Black pixel represents background
      (255, 0, 0): 1,  # Red pixel represents Horse
      (0, 255, 0): 2,  # Green pixel represents Bench
      (0, 0, 255): 3,  # Blue pixel represents Water dispenser
      (0, 128, 128): 4,  # light blue pixel represents Dust bin
      (128, 0, 128): 5,  # pink pixel represents stop sign
    }

    # Load a masked image in RGB format
    mask_image = Image.open(mask)
    mask_image = mask_image.resize((320, 320))

    # Convert the RGB image to a numpy array
    mask_array = np.array(mask_image)

    # Create a new numpy array to hold the class index mask
    class_indices = np.zeros((mask_array.shape[0], mask_array.shape[1]), dtype=np.uint8)

    # Iterate over each RGB color value and map it to a class index
    for color, class_idx in color_to_class.items():
      # Find the indices in the numpy array where the RGB color value matches the dictionary key
      color_indices = np.where(np.all(mask_array == color, axis=-1))
      # Assign the corresponding class index to the pixels at those indices
      class_indices[color_indices] = class_idx
    return class_indices

In [15]:
training_data, testing_data = train_test_split(data_frame, test_size=0.2, random_state=42)

validation_data, test_data = train_test_split(testing_data, test_size=0.5, random_state=42)

In [25]:
class SegmentationDataset(Dataset):
    
    def __init__(self, df):
        
        self.df=df
        self.transforms = transforms.Resize((IMAGE_WIDTH, IMAGE_HEIGHT))
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self ,idx):
        row = self.df.iloc[idx]
        image_path = row['images']
        mask_path = row['masks']
        image_path = dir+row.images.replace('\\', '/')
        mask_path = dir+row.masks.replace('\\', '/')
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        mask = classIndexMask(mask_path)
        mask =np.expand_dims(mask ,axis=-1)#(h,w,1)
        #(h,w,c) -> (c,h,w)
        image = np.transpose(image ,(2,0,1)).astype(np.float32)
        mask = np.transpose(mask ,(2,0,1)).astype(np.float32)
        original_image = self.transforms(torch.Tensor(image))
        original_image = original_image/255.0 # normalizing original image tensor [0,1] range
        return original_image, torch.Tensor(mask)

In [26]:
train_data_set = SegmentationDataset(training_data)
valid_data_set = SegmentationDataset(validation_data)
test_data_set = SegmentationDataset(test_data)

In [27]:
print('Size of Training dataset: {}'.format(train_data_set.__len__()))
print('Size of Testing dataset: {}'.format(test_data_set.__len__()))
print('Size of Validation dataset: {}'.format(valid_data_set.__len__()))

Size of Training dataset: 114
Size of Testing dataset: 15
Size of Validation dataset: 14


In [28]:
train_loader = DataLoader(train_data_set, batch_size=8, shuffle=True)

val_loader = DataLoader(valid_data_set, batch_size=8, shuffle=True)

test_loader = DataLoader(test_data_set, batch_size=8, shuffle=True)

In [29]:
print('Total number of batches in train data loader: {}'.format(len(train_loader)))
print('Total number of batches in test data loader: {}'.format(len(test_loader)))
print('Total number of batches in validation data loader: {}'.format(len(val_loader)))

Total number of batches in train data loader: 15
Total number of batches in test data loader: 2
Total number of batches in validation data loader: 2


In [30]:
for image , mask in train_loader:
    break
print (f'One batch image shape: {image.shape}')
print (f'One batch image shape: {mask.shape}')

TypeError: unsupported operand type(s) for +: 'builtin_function_or_method' and 'str'

In [24]:
!conda install -c conda-forge opencv=3.2.0

'conda' is not recognized as an internal or external command,
operable program or batch file.
