In [1]:
import os

try:
    import angionet
except ImportError:
    from kaggle_secrets import UserSecretsClient

    secrets = UserSecretsClient()

    GITHUB_TOKEN = secrets.get_secret("github-token")
    USERNAME = secrets.get_secret("github-username")
    URL = f"https://{USERNAME}:{GITHUB_TOKEN}@github.com/{USERNAME}/sennet-segmentation.git"

    os.system(f"pip install -q git+{URL}")

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
aiobotocore 2.7.0 requires botocore<1.31.65,>=1.31.16, but you have botocore 1.34.2 which is incompatible.[0m[31m
[0m

In [2]:
import gc
from pathlib import Path
import concurrent.futures

import pandas as pd
import numpy as np
import cv2

from angionet.datasets import VolumeDataset
from angionet.utils import load_volume, save_volume
from angionet.preprocessing import prepare_input

In [3]:
class config:
    root = '/kaggle/input/blood-vessel-segmentation'
    data = '/kaggle/input/sennet-data-preparation/train-data.csv'
    dim = 800
    stride = 600
    padding = 'reflect'
    fill = -722.5

In [4]:
df = pd.read_csv(config.data)

In [5]:
data = []
for group in df.group.unique():
    dataset = VolumeDataset(df, group)
    volume = load_volume(dataset)
    print(
        f"Volume statistics: {group}",
        f"  min: {(volume['image'].min() / 255):.4f}",
        f"  max: {(volume['image'].max() / 255):.4f}",
        f"  mean: {(volume['image'].mean() / 255):.4f}",
        f"  std: {(volume['image'].std() / 255):.4f}",
        sep = "\n"
    )
    data.append(save_volume("images/" + group, volume, (2, 0, 1))) # WxDxH
    gc.collect()

Volume statistics: kidney_1_dense
  min: 0.0000
  max: 1.0000
  mean: 0.3529
  std: 0.0421
Volume statistics: kidney_3_dense
  min: 0.0745
  max: 0.9647
  mean: 0.2978
  std: 0.0117


In [6]:
def compute_stats(path):
    image = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    return (
        image.min(),
        image.max(),
        image.mean(),
        image.std()
    )

In [7]:
data = pd.concat(data, axis = 0)
data = data.loc[data.vessels != '1 0'] # Filter empty masks
data['group'] = data['path'].apply(lambda x: x.split('/')[-2])
data['image'] = data['path'].apply(lambda x: Path(x).stem)
data['id'] = data['group'] + "_" + data['image']
data['axis'] = 'DxH'
data = data.reset_index(drop = True)

stats = pd.DataFrame(
    data['path'].apply(lambda x: compute_stats(x)),
    columns = ['min', 'max', 'mean', 'std']
)

data = pd.concat((data, stats), axis = 1)
data.to_csv("images/train_rles.csv", index = False)

for group in df.group.unique():
    os.makedirs(group, exist_ok = True)

In [8]:
print("Dataset processing...")

Dataset processing...


In [9]:
with concurrent.futures.ThreadPoolExecutor(max_workers = 8) as executor:
    futures = {executor.submit(
        prepare_input,
        sample.path, 
        [sample.vessels, sample.kidney],
        sample.group,
        config
    ) for _, sample in data.iterrows()}

    results = []
    for future in concurrent.futures.as_completed(futures):
        results.append(future.result())

In [10]:
patches = pd.DataFrame.from_records(
    np.concatenate(results), 
    columns = ['path', 'vessels_pixels', 'kidney_pixels'],
)

patches[['vessels_pixels', 'kidney_pixels']] =\
    patches[['vessels_pixels', 'kidney_pixels']].astype('float').astype('int')

patches['group'] = patches['path'].apply(lambda x: x.split('/')[0])
patches['image'] = patches['path'].apply(lambda x: x.split('/')[1].split('-')[0])
patches['id'] = patches['group'] + "_" + patches['image']
patches['axis'] = 'DxH'
patches = patches.sort_values(['group', 'image'])

In [11]:
patches.to_csv('patches-data.csv', index = False)

In [12]:
print(f"Saved {len(patches)} .npz files")

Saved 7821 .npz files
