In [None]:
import cv2
import pandas as pd
import numpy as np
import os
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import h5py

In [None]:
class config:
    DIRECTORY_PATH = "../input/sartorius-cell-instance-segmentation"
    TRAIN_CSV = DIRECTORY_PATH + "/train.csv"
    TRAIN_PATH = DIRECTORY_PATH + "/train"
    TEST_PATH = DIRECTORY_PATH + "/test"
    TRAIN_SEMI_SUPERVISED_PATH = DIRECTORY_PATH + "/train_semi_supervised"

In [None]:
def getImagePaths(path):
    """
    Function to Combine Directory Path with individual Image Paths
    
    parameters: path(string) - Path of directory
    returns: image_names(string) - Full Image Path
    """
    image_names = []
    for dirname, _, filenames in os.walk(path):
        for filename in tqdm(filenames):
            fullpath = os.path.join(dirname, filename)
            image_names.append(fullpath)
    return image_names

In [None]:
#Get complete image paths for train and test datasets
train_images_path = getImagePaths(config.TRAIN_PATH)
test_images_path = getImagePaths(config.TEST_PATH)
train_semi_supervised_path = getImagePaths(config.TRAIN_SEMI_SUPERVISED_PATH)

In [None]:
df_train = pd.read_csv(config.TRAIN_CSV)

In [None]:
def rle_decode(mask_rle):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height, width, channels) of array to return 
    color: color for the mask
    Returns numpy array (mask)

    '''
    s = mask_rle.split()
    
    starts = list(map(lambda x: int(x) - 1, s[0::2]))
    lengths = list(map(int, s[1::2]))
    ends = [x + y for x, y in zip(starts, lengths)]
    
    img = np.zeros((704 * 520, 1), dtype=np.float32)
            
    for start, end in zip(starts, ends):
        img[start : end] = 1
    
    return img.reshape((520, 704))

In [None]:
def store(image_id):
    annos = df_train[(df_train.id == image_id)]['annotation'].tolist()
    image = cv2.imread(f"../input/sartorius-cell-instance-segmentation/train/{image_id}.png")
    mask = np.zeros((520, 704))
    for anno in annos:
        mask += rle_decode(anno)
    f = h5py.File(f'{image_id}.hdf5', 'w')
    f.create_dataset('image', data=image)
    f.create_dataset('mask', data=mask)
    f.close()

In [None]:
ids = df_train.id.unique()
store(ids[0])
hf = h5py.File('./0030fd0e6378.hdf5', 'r')
image = np.array(hf.get('image'))
mask = np.array(hf.get('mask'))
hf.close()

In [None]:
plt.imshow(image)

In [None]:
plt.imshow(mask)

In [None]:
ids = df_train.id.unique()
for i in ids:
    store(i)
hf = h5py.File('./0030fd0e6378.hdf5', 'r')
image = np.array(hf.get('image'))
mask = np.array(hf.get('mask'))
hf.close()

In [None]:
dct = {'shsy5y': 1, 'astro': 2, 'cort': 3}
df_train['type'] = df_train.cell_type.apply(lambda x: dct[x])

In [None]:
df_train.groupby('id')['type'].mean().unique()