In [1]:
import pandas as pd
import cv2
import numpy as np

In [2]:
class config:
    root = '/kaggle/input/blood-vessel-segmentation'
    data = '/kaggle/input/sennet-hoa-kidney-13-dense-full-kidney-masks/train_rles.csv'
    groups = ['kidney_1_dense', 'kidney_3_dense', 'kidney_3_sparse', 'kidney_2']

In [3]:
dirs = {
    'kidney_1_dense': 'train/kidney_1_dense/images',
    'kidney_1_voi': 'train/kidney_1_voi/images',
    'kidney_2': 'train/kidney_2/images',
    'kidney_3_dense': 'train/kidney_3_sparse/images',
    'kidney_3_sparse': 'train/kidney_3_sparse/images'
}

In [4]:
def get_image_size(path):
    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    return list(img.shape)

def compute_stats(path):
    image = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    image = np.asarray(image / 255.0, dtype=np.float32)
    stats = (
        image.min(),
        image.max(),
        image.mean(),
        image.std()
    )
    return stats

In [5]:
df = pd.read_csv(config.data)

df['group'] = df['id'].apply(lambda x: "_".join(x.split('_')[:-1]))
df['image'] = df['id'].apply(lambda x: x.split('_')[-1])
df['path'] = df.apply(lambda x: f"{config.root}/{dirs[x.group]}/{x.image}.tif", axis = 1)

df = df.merge(
    df.drop_duplicates(subset = ['group'])
        .set_index('group')['path']
        .apply(get_image_size)
        .apply(pd.Series)
        .rename(columns = {0: 'height', 1: 'width'}),
    on = 'group',
    how = 'left'
)
df = df.loc[df.group.isin(config.groups)]
df = df.rename(columns = {'rle': 'vessels', 'kidney_rle': 'kidney'})

stats_df = (df.path
            .apply(compute_stats).apply(pd.Series)
            .rename(columns = {0: 'min', 1: 'max', 2: 'mean', 3: 'std'})
           )

df = pd.concat((df, stats_df), axis = 1)
df['image'] = df['image'].apply(lambda x: str(x).zfill(4))

In [6]:
df.to_csv('train-data.csv', index = False)