In [1]:
import os

try:
    import angionet
except ImportError:
    from kaggle_secrets import UserSecretsClient

    secrets = UserSecretsClient()

    GITHUB_TOKEN = secrets.get_secret("github-token")
    USERNAME = secrets.get_secret("github-username")
    URL = f"https://{USERNAME}:{GITHUB_TOKEN}@github.com/{USERNAME}/sennet-segmentation.git"

    os.system(f"pip install -q git+{URL}")

In [2]:
import concurrent.futures

import pandas as pd
import numpy as np

from angionet.functional import decode, extract_patches
from angionet.preprocessing import prepare_input

In [3]:
class config:
    root = "/kaggle/input/blood-vessel-segmentation"
    data = "/kaggle/input/sennet-data-preparation/train-data.csv"
    dim = 512
    stride = 412
    padding = 'reflect'
    fill = -722.5

In [4]:
df = pd.read_csv(config.data)
df = df.loc[df.vessels != '1 0'] # Filter empty masks
df = df.reset_index(drop = True)

for group in df.group.unique():
    os.makedirs(group, exist_ok = True)

In [5]:
print("Dataset processing...")

Dataset processing...


In [6]:
with concurrent.futures.ThreadPoolExecutor(max_workers = 8) as executor:
    futures = {executor.submit(
        prepare_input,
        sample.path, 
        [sample.vessels, sample.kidney],
        sample.group,
        config
    ) for _, sample in df.iterrows()}

    results = []
    for future in concurrent.futures.as_completed(futures):
        results.append(future.result())

In [7]:
patches = pd.DataFrame.from_records(
    np.concatenate(results), 
    columns = ['path', 'vessels_pixels', 'kidney_pixels'],
)

patches[['vessels_pixels', 'kidney_pixels']] =\
    patches[['vessels_pixels', 'kidney_pixels']].astype('float').astype('int')

patches['group'] = patches['path'].apply(lambda x: x.split('/')[0])
patches['image'] = patches['path'].apply(lambda x: x.split('/')[1].split('-')[0])
patches['id'] = patches['group'] + "_" + patches['image']
patches['axis'] = 'HxW'
patches = patches.sort_values(['group', 'image'])

In [8]:
patches.to_csv('patches-data.csv', index = False)

In [9]:
print(f"Saved {len(patches)} .npz files")

Saved 15833 .npz files
