In [7]:
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import torch
import random
import plotly.graph_objects as go
import time
from torchvision.transforms import ToTensor
from torchvision import datasets
import colorsys
import os





In [2]:
data=pd.read_csv("train.csv")

In [3]:
print(data.columns)
np.any(data.isna())# pas d'annotation manquante
np.any(data.groupby(["id","cell_type"]).count().groupby("id").count()!=1)# each image has only one type of cells


Index(['id', 'annotation', 'width', 'height', 'cell_type', 'plate_time',
       'sample_date', 'sample_id', 'elapsed_timedelta'],
      dtype='object')


False

In [4]:
import numpy as np


def get_binary_mask(metadata,width,height):
    """
    ref : https://ccshenyltw.medium.com/run-length-encode-and-decode-a33383142e6b

    """
    mask=np.zeros((height*width))
    


    for rle in metadata["annotation"]:
        
        list_rle=rle.split()
        start_pixels=np.asarray(list_rle[0::2],dtype=int)#convert string to int
        number_of_pixels=np.asarray(list_rle[1::2],dtype=int)
        
        for start_pixel,number_of_pixel in zip(start_pixels,number_of_pixels):
            
            mask[start_pixel:start_pixel+number_of_pixel]+=1
       
    return mask.reshape((height,width))


id_image="c4121689002f"
metadata=data.loc[(data["id"]==id_image)]
width,height=metadata[["width","height"]].iloc[0]
mask=get_binary_mask(metadata,width,height)



# definition of the dataset

In [5]:
sample_ids=data.sample_id.unique()
number_of_sample=len(sample_ids)

test_fraction=0.0025
test_right_bound=int(test_fraction*number_of_sample)

test_sample_ids,train_sample_ids=sample_ids[:test_right_bound],sample_ids[test_right_bound:]

train_data=data.loc[data.sample_id.isin(train_sample_ids)]
test_data=data.loc[data.sample_id.isin(test_sample_ids)]



In [111]:


def rle_to_mask(rle,height,width):
     #custom function, not efficient (for loop)
    rle=rle.split(" ")
    mask=np.zeros((height*width))
    start_pixels=np.asarray(rle[0::2],dtype=int)-1
    number_of_pixels=np.asarray(rle[1::2],dtype=int)
    for start_pixel,number_of_pixel in zip(start_pixels,number_of_pixels):
        mask[start_pixel:start_pixel+number_of_pixel]=1
        
        mask=mask.astype(np.uint8)

    return mask.reshape((height,width))#reshape the mask 

# fonction found on kaggle 
# ref: https://www.kaggle.com/inversion/run-length-decoding-quick-start
def rle_decode(mask_rle, shape, color=1):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height, width, channels) of array to return
    color: color for the mask
    Returns numpy array (mask)
    '''
    s = mask_rle.split()

    starts = list(map(lambda x: int(x) - 1, s[0::2]))
    lengths = list(map(int, s[1::2]))
    ends = [x + y for x, y in zip(starts, lengths)]
    img = np.zeros(shape[0] * shape[1], dtype=np.float32)
    for start, end in zip(starts, ends):
        img[start : end] = color

    return img.reshape(shape)
        
def image_plus_mask(image,mask,alpha_where_mask=0.4):
    """
    mask : array (number of instances,height,width) of 0 and 1
    image : array (height,with,3) (3 channels for color)
    """

    number_of_instances=mask.shape[0]
    get_random_rgb_color=lambda :255*np.array(colorsys.hsv_to_rgb(random.uniform(0,1),random.uniform(0,1),1))
    color_of_mask=np.stack([get_random_rgb_color() for i in range(number_of_instances)])#get the color of each element of the mask
    
    colored_mask=mask.transpose(1,2,0)@color_of_mask#resulting color for the mask
    alpha_where_mask=alpha_where_mask*mask.sum(axis=0)#all the cases where there is a mask
    alpha_where_mask=np.stack([alpha_where_mask,alpha_where_mask,alpha_where_mask],axis=-1)#stack the transparancy values for broadcasting the result
    

    return (1-alpha_where_mask)*image+alpha_where_mask*colored_mask


    
def get_mask(list_rle,shape):
    mask=np.zeros((shape[0],shape[1]),dtype=np.float64)

    for indice_label,rle in enumerate(list_rle):
        not_null_mask=rle_decode(rle,(shape[0],shape[1]),color=1+indice_label)
        mask=mask+(mask==0)*not_null_mask#add elements not already flag as elements

    return mask



def show_interactive_img_and_mask(img,masks):

    """
    img : heightxwidth
    masks : channelxheightxwidth
    """
    img_width = img.shape[1]
    img_height = img.shape[0]
    
    image=np.stack([img,img,img],axis=-1)
    image=image_plus_mask(image,masks)


    #see https://plotly.com/python/images/
    fig=go.Figure()
    scale_factor=0.5

    # Add invisible scatter trace.
    # This trace is added to help the autoresize logic work.
    fig.add_trace(
        go.Scatter(
            x=[0, img_width * scale_factor],
            y=[0, img_height * scale_factor],
            mode="markers",
            marker_opacity=0
        )
    )

    # Configure axes
    fig.update_xaxes(
        visible=False,
        range=[0, img_width * scale_factor]
    )

    fig.update_yaxes(
        visible=False,
        range=[0, img_height * scale_factor],
        # the scaleanchor attribute ensures that the aspect ratio stays constant
        scaleanchor="x"
    )

    # Add image
    fig.add_layout_image(dict(
            x=0,
            sizex=img_width,
            y=0,
            sizey=img_height,
            xref="x",
            yref="y",
            opacity=1.0,
            layer="below",
            
            source=Image.fromarray(image.astype(np.uint8)))
    )


    #set initial axes
    fig.update_xaxes(showgrid=False, range=(0, img_width))
    fig.update_yaxes(showgrid=False, scaleanchor='x', range=(img_height, 0))


    fig.update_layout(
        width=img_width * scale_factor,
        height=img_height * scale_factor,
        margin={"l": 0, "r": 0, "t": 0, "b": 0},
    )

    # Disable the autosize on double click because it adds unwanted margins around the image
    # More detail: https://plotly.com/python/configuration-options/
    fig.show(config={'doubleClick': 'reset'})
    return fig



In [55]:
def pil_loader(path: str) -> Image.Image:
    # from pytorch https://pytorch.org/vision/stable/_modules/torchvision/datasets/folder.html
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')




# representation of the data

insight :
- limit computation when getting the file
- using a batch
- using a custum dataset from torch
- implement transform to the dataset
- using ren lenght encoded datas

Done : 
- visualisation tool : added an image of random color with some transparancy
- change the representation of the masks : the get methode return the image & mask of size (height,width,n_labels) (idea came from kaggle, better for nn construction ) 
- adapted all visualisation for thoses new representation :
    - move the decoding part to the __getitem__ methode
    - change the visualisation of the data to match the new representation
- decoding function too slow : 
    - found a better implementation of rle decoding 
- decoding the data is still very slow. Is it a problem ?
    - not a problem if we use gpu for training & cpu is sufficient
    - if it become a botteneck, we could use some other strategies to solve the problem
- issue with the dataloader : variable lenght for the mask. we have to define our own collate function.
    - first collide function based on the article
- issues of performances : 
    - get a memorry error (5min run time) : decrease the batch size & close internet pages to increase available cpus
    - 30s per batch (12min total)-> memory is insuficient (cpu)
    - change the dtype for the mask to speed up the process
    - savin mask in order to speed up the process ? 
    - test shows that the speeds up the process. However, it takes too much place on disk (more than 10Go), x2 times for first loop. 
    - changing the saving file : representation with a mask with value correspounding to the value of the number of the class : speed up the process
    - other representation of the data 
        - oter représentation than a mask (has the same output than the nn) :  https://www.kaggle.com/ctawong/cell-instance-segmentation-detectron2-mask-rcnn/ 



    


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [79]:
class Dataset(torch.utils.data.Dataset):

    def __init__(self, data,transform=None):
      #' Initialization'
      self.image_ids=data["id"].unique()
      
      self.transform = transform
     
      self.rles=data.groupby("id").agg({"annotation":lambda x: list(x)})["annotation"]
      self.width=data["width"].iloc[0] 
      self.height=data["height"].iloc[0]
   
    def __len__(self):
      'Denotes the total number of samples'
      return len(self.image_ids)
      
    
    def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        ID = self.image_ids[index]
        # Load data and get label
        save_location=f"numpy_saves/{ID}.npy"
        X =  np.array(pil_loader(f'train/{ID}.png'))[:,:,0]#1 channel is sufficient to get all the datas
        if os.path.isfile(save_location):
          mask=np.load(save_location)
       
        else:
          mask = get_mask(self.rles[ID],(self.height,self.width)) 
          np.save(save_location,mask)
          
        obj_ids = np.unique(mask)
        # first id is the background, so remove it
        obj_ids = obj_ids[1:]

        # split the color-encoded mask into a set
        # of binary masks
        masks = mask == obj_ids[ :,None, None]
        
        masks = torch.as_tensor(masks, dtype=torch.bool)# convert to tensor

        if self.transform:
            X = self.transform(X)
        return  X,masks
    
    def show(self,index):
        
        img,masks=self[index]#get the data to plot
        masks=np.array(masks)
        img=(255*np.array(img)).astype(np.uint8)#conversion to the right format of datas for the visualisation
        img=img[0,:,:]#due to the way toTensor works, we get (1,height,width) tensors
        show_interactive_img_and_mask(img,masks)#plot the data


# CUDA for PyTorch
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True

# Parameters
max_epochs = 100
t=time.time()
test_dataset=Dataset(test_data,transform=ToTensor())
train_dataset=Dataset(train_data,transform=ToTensor())
print(" time for creation of the dataset : ", time.time()-t)
t=time.time()

 time for creation of the dataset :  0.10402488708496094


In [112]:
train_dataset.show(0)

In [113]:
def my_collate(batch):
    data = [item[0] for item in batch]#no changement for data
    target = [item[1] for item in batch]# target is shaped (height,width,number of chanels : not good)
    
    return [data, target]


test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=16,shuffle=True,drop_last=False,pin_memory=False,collate_fn=my_collate)
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=16,shuffle=True,drop_last=False,pin_memory=False,collate_fn=my_collate)
(len(test_dataloader))

1

In [116]:
for i in test_dataloader:#without loading 
    pass

In [117]:
for i in test_dataloader:#without loading 
    assert False

AssertionError: 

In [None]:

    for i,batch in enumerate(test_dataloader):
        pass
    print("all saving done",time.time()-t0)
    t0=time.time()
    
print(time.time()-t0)



all saving done 18.883190393447876
all saving done 0.3210256099700928
0.013002157211303711


In [32]:
masks=test_dataset[0][1]
masks.shape


torch.Size([1, 520, 704])

In [18]:

plt.Figure(figsize=(50,50))
test_dataset.show(0)

ValueError: operands could not be broadcast together with shapes (1,520,3) (520,704,3) 

# main code

In [28]:
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
import torchvision
model=torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
model.eval()
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True
model.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = model(x)
predictions

(2, 2, 3)