In [1]:
import os

try:
    import angionet
except ImportError:
    from kaggle_secrets import UserSecretsClient

    secrets = UserSecretsClient()

    GITHUB_TOKEN = secrets.get_secret("github-token")
    USERNAME = secrets.get_secret("github-username")
    URL = f"https://{USERNAME}:{GITHUB_TOKEN}@github.com/{USERNAME}/sennet-segmentation.git"

    os.system(f"pip install -q git+{URL}")

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
aiobotocore 2.7.0 requires botocore<1.31.65,>=1.31.16, but you have botocore 1.34.2 which is incompatible.[0m[31m
[0m

In [2]:
import concurrent.futures

import pandas as pd
import numpy as np
import cv2

from angionet.functional import decode, extract_patches
from angionet.preprocessing import prepare_input

In [3]:
class config:
    root = "/kaggle/input/blood-vessel-segmentation"
    data = "/kaggle/input/sennet-data-preparation/train-data.csv"
    dim = 512
    stride = 412
    padding = 'reflect'
    fill = -722.5

In [4]:
df = pd.read_csv(config.data)
df = df.loc[df.vessels != '1 0'] # Filter empty masks
df = df.reset_index(drop = True)

for group in df.group.unique():
    os.makedirs(f"images/{group}", exist_ok=True)
    os.makedirs(group, exist_ok = True)

In [5]:
paths = []
for _, entry in df.iterrows():
    dst = f"images/{entry.group}/{entry.image:04d}.tif"
    image = cv2.imread(entry.path, cv2.IMREAD_GRAYSCALE)
    image = np.asarray(image, dtype = 'uint8')
    cv2.imwrite(dst, image)
    paths.append(dst)
    
data = df.copy()
data['path'] = paths
data['image'] = data['image'].apply(lambda x: str(x).zfill(4))

In [6]:
print("Dataset processing...")

Dataset processing...


In [7]:
with concurrent.futures.ThreadPoolExecutor(max_workers = 8) as executor:
    futures = {executor.submit(
        prepare_input,
        sample.path, 
        [sample.vessels, sample.kidney],
        sample.group,
        config
    ) for _, sample in data.iterrows()}

    results = []
    for future in concurrent.futures.as_completed(futures):
        results.append(future.result())

In [8]:
patches = pd.DataFrame.from_records(
    np.concatenate(results), 
    columns = ['path', 'vessels_pixels', 'kidney_pixels'],
)

patches[['vessels_pixels', 'kidney_pixels']] =\
    patches[['vessels_pixels', 'kidney_pixels']].astype('float').astype('int')

patches['group'] = patches['path'].apply(lambda x: x.split('/')[0])
patches['image'] = patches['path'].apply(lambda x: x.split('/')[1].split('-')[0])
patches['id'] = patches['group'] + "_" + patches['image']
patches['axis'] = 'HxW'
patches = patches.sort_values(['group', 'image'])

In [9]:
patches.to_csv('patches-data.csv', index = False)

In [10]:
print(f"Saved {len(patches)} .npz files")

Saved 15833 .npz files
