# Requirements

In [None]:
import os
import cv2
import glob
import matplotlib
import numpy as np 
import pandas as pd
from tqdm import tqdm
import tifffile as tiff 
import matplotlib.pyplot as plt

# Setting up Wandb

In [None]:
%%capture
! pip install wandb --upgrade

In [None]:
import wandb
wandb.login()

# Directory Path

In [None]:
TRAIN_PATH = '../input/hubmap-organ-segmentation/train_images/'

# Dataset Exploration

In [None]:
train_df = pd.read_csv("../input/hubmap-organ-segmentation/train.csv")
train_df

### Function to label the bar graph

In [None]:
def autolabel(rects):
    for idx,rect in enumerate(bar_plot):
        height = rect.get_height()
        if type(x[idx]) == int:
          ax.text(rect.get_x() + rect.get_width()/2., 1.0*height,
                  [x[idx], y[idx]],
                  ha='center', va='bottom', rotation=90)
        else:
          ax.text(rect.get_x() + rect.get_width()/2., 1.0*height,
                  [x[idx], y[idx]],
                  ha='center', va='bottom', rotation=0)

### Organs Distribution

In [None]:
fig, ax = plt.subplots(figsize=(15, 5))

x = list(train_df['organ'].unique())
y = list(train_df['organ'].value_counts(sort=False))

bar_plot = plt.bar(x, y)
autolabel(bar_plot)
plt.xlabel('organ')
plt.ylabel('count')
plt.show()

### Age Distribution

In [None]:
fig, ax = plt.subplots(figsize=(15, 5))

x = list(map(int, train_df['age'].unique()))
y = list(train_df['age'].value_counts(sort=False))

bar_plot = plt.bar(x, y)
autolabel(bar_plot)
plt.xlabel('age')
plt.ylabel('count')
plt.show()

### Gender Distribution

In [None]:
fig, ax = plt.subplots(figsize=(10, 5))

x = list(train_df['sex'].unique())
y = list(train_df['sex'].value_counts(sort=False))

bar_plot = plt.bar(x, y)
autolabel(bar_plot)
plt.xlabel('sex')
plt.ylabel('count')
plt.show()

# Original Image

In [None]:
image_id_1 = 10044
image_1 = tiff.imread(TRAIN_PATH + str(image_id_1) + ".tiff")
print(image_1.shape)

In [None]:
plt.figure(figsize=(10, 10))
plt.imshow(image_1)
plt.axis("off")

# Mask to RLE & RLE to Mask

In [None]:
# https://www.kaggle.com/paulorzp/rle-functions-run-length-encode-decode
def mask2rle(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def rle2mask(mask_rle, shape=(1600,256)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (width,height) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T

In [None]:
mask_1 = rle2mask(train_df["rle"][0], (image_1.shape[1], image_1.shape[0]))
mask_1.shape

In [None]:
plt.figure(figsize=(10,10))
plt.imshow(mask_1, cmap='coolwarm', alpha=0.5)
plt.axis("off")

# Combining Mask Image and Original Image

In [None]:
plt.figure(figsize=(10,10))
plt.imshow(image_1)
plt.imshow(mask_1, cmap='coolwarm', alpha=0.5)
plt.axis("off")

# Adding data to wandb artifacts

In [None]:
image_files = sorted(glob.glob(TRAIN_PATH+ "*"))
image_file_df = pd.DataFrame(image_files, columns=['file_name'])
train_data = pd.concat([train_df, image_file_df], axis = 1)

In [None]:
run = wandb.init(project='HuBMAP-HPA', entity='cosmo3769')

data_artifact = wandb.Artifact(name='train', type='RLE-TO-MASK dataset')
data_table = wandb.Table(columns=['image_id', 
                               'image', 
                               'mask', 
                               'masked image', 
                               'organ', 
                               'data source', 
                               'image_height', 
                               'image_width', 
                               'pixel size', 
                               'tissue thickness',
                               'rle',
                               'age',
                               'sex'
                               ])

for i, df in tqdm(train_data.iterrows()):

        img = tiff.imread(df.file_name)
        mask = rle2mask(df.rle[i], (img.shape[1], img.shape[0]))
        
        plt.figure(figsize=(10,10))
        plt.axis("off")
        plt.imshow(img)
        plt.imshow(mask, cmap='coolwarm', alpha=0.5)
        plt.savefig(str(df.id) + "_masked.jpg")
        plt.close()

        data_table.add_data(
            df.id,
            wandb.Image(img), 
            wandb.Image(mask),
            wandb.Image(cv2.cvtColor(cv2.imread(str(df.id) + "_masked.jpg"), cv2.COLOR_BGR2RGB)),
            df.organ,
            df.data_source,
            df.img_height,
            df.img_width,
            df.pixel_size,
            df.tissue_thickness,
            df.rle,
            df.age,
            df.sex
        )
    
data_artifact.add(data_table, 'train-RLE-TO-MASK')
run.log_artifact(data_artifact)
wandb.finish()

### Work in Progress .....