In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob as glob
%matplotlib inline

In [None]:
DATA_DIR = "../input/hubmap-organ-segmentation/"
TRAIN_IMGS = glob.glob(DATA_DIR+"train_images/*.tiff")
train_df = pd.read_csv(DATA_DIR+"train.csv")

In [None]:
#https://www.kaggle.com/code/pestipeti/decoding-rle-masks/notebook
def mask2rle(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)


def rle2mask(mask_rle, shape=(3000,3000)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (width,height) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0::2], s[1::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T

In [None]:
organs = np.unique(train_df.organ)

In [None]:
fig, ax = plt.subplots(1,len(organs),figsize=(15,8))

for i in range(len(organs)):
    organ_df = train_df[train_df['organ']==organs[i]].reset_index()
    idx = np.random.randint(organ_df.shape[0])
    image = plt.imread(f"{DATA_DIR}train_images/{organ_df.id[idx]}.tiff")
    mask = rle2mask(organ_df.rle[idx],shape=(organ_df.img_height[idx],organ_df.img_width[idx]))
    ax[i].imshow(image)
    ax[i].imshow(mask,alpha=0.4)
    ax[i].set_title(organ_df.organ[idx])
    ax[i].axis("off")

In [None]:
males = []
females = []
for i in range(len(organs)):
    organ_df = train_df[train_df['organ']==organs[i]].reset_index()
    males.append(organ_df.sex.value_counts().Male)
    
    if len(organ_df.sex.value_counts())!=1:
        females.append(organ_df.sex.value_counts().Female)
    else:
        females.append(0)

In [None]:
plt.bar(np.arange(len(organs))-0.2,males,0.4,label='males')
plt.bar(np.arange(len(organs))+0.2,females,0.4,label='females')
plt.xticks(np.arange(len(organs)),organs)
plt.title("Number of Males and Females")
plt.legend()
plt.show()

In [None]:
for i in range(len(organs)):
    organ_df = train_df[train_df['organ']==organs[i]].reset_index()
    plt.hist(organ_df.age,bins=10,range=(0,100),edgecolor="black")
    plt.xlabel("Age")
    plt.ylabel("Count")
    plt.title(f"Age histogram for {organs[i]}")
    plt.show()