In [1]:
import SimpleITK as sitk
from dotenv import load_dotenv
from dotenv import dotenv_values
from glob import glob
from pathlib import Path
import os
import numpy as np
import yaml
from tqdm import tqdm
import matplotlib.pyplot as plt
from tqdm import tqdm

from acouslic_utils import *

In [2]:
env = dotenv_values(".env")
print(env)

consts = None
with open('consts.yaml') as f:
    try:
        consts = yaml.safe_load(f)
    except yaml.YAMLError as exc:
        print(exc)    
consts

OrderedDict([('DS_RAW', '/mnt/Enterprise2/PUBLIC_DATASETS/acouslic-ai/')])


{'OPTIMAL': 1,
 'SUBOPTIMAL': 2,
 'TOTAL_FRAMECOUNT': 840,
 'TOTAL_SWEEPS': 6,
 'singlestack_dir': 'single_stack'}

In [13]:
def create_sitk_img_from_arr(arr:np.array,orig_sitk_img:sitk.Image,override_spacing = [0.28,0.28,0.28]):
    sitk_img = sitk.GetImageFromArray(arr)
    override_spacing.reverse()
    sitk_img.SetSpacing(override_spacing)
    sitk_img.SetDirection(orig_sitk_img.GetDirection())
    return sitk_img

In [21]:
def unstack_volume(img_path, output_path_template):
    """ saves multistack volume into separate sweep-wise volume"""
    img = read_image(img_path)
    img_np = sitk_to_numpy(img)

    new_paths = []

    FRAMES_PER_SWEEP = int(consts['TOTAL_FRAMECOUNT'] / consts['TOTAL_SWEEPS'])
    sweep_start_frames = range(0,consts['TOTAL_FRAMECOUNT']+1, FRAMES_PER_SWEEP)
    start_frame_end_frame_generator = zip(sweep_start_frames,sweep_start_frames[1:])

    for i,(start_frame,end_frame) in enumerate(start_frame_end_frame_generator):
        sweep_arr = img_np[:,:,start_frame: end_frame]
        sweep_sitk_img = create_sitk_img_from_arr(sweep_arr,img)
        output_img_path = output_path_template.format(base_path = env['DS_RAW'],file_stem=get_stem(img_path),sweep_id=i+1)
        Path(output_img_path).parent.mkdir(parents=True,exist_ok=True)
        sitk.WriteImage(sweep_sitk_img,output_img_path)
        new_paths.append(output_img_path)
    return new_paths

In [19]:
new_image_paths = []
new_mask_paths = []


In [20]:

expected_img_path = '{base_path}/images/unstacked_fetal_ultrasound/{file_stem}/sweep_id/{sweep_id}.mha'
expected_mask_path = '{base_path}/masks/unstacked_fetal_ultrasound/{file_stem}/sweep_id/{sweep_id}.mha'

In [22]:
image_dir = Path(env['DS_RAW'])/'images'
image_paths = sorted(image_dir.rglob('*.mha'))

mask_dir =  Path(env['DS_RAW'])/'masks'
mask_paths = sorted(mask_dir.rglob('*.mha'))

for img_path, mask_path in tqdm(zip(image_paths,mask_paths), desc="Unstacking mha files", total=len(image_paths)):
    new_image_paths.extend(unstack_volume(img_path,expected_img_path))
    new_mask_paths.extend(unstack_volume(mask_path,expected_mask_path))

Unstacking mha files:   0%|          | 0/318 [00:00<?, ?it/s]

Unstacking mha files:  98%|█████████▊| 312/318 [1:01:51<01:11, 11.89s/it]


In [25]:
len(new_image_paths), len(new_mask_paths)

(1872, 1872)

In [27]:
import pandas as pd

df = pd.DataFrame({"image_path": new_image_paths, "mask_path": new_mask_paths})
df.to_csv("/mnt/Enterprise2/PUBLIC_DATASETS/acouslic-ai/acouslic_unstacked_sweeps.csv")

/mnt/Enterprise2/PUBLIC_DATASETS/acouslic-ai//images/unstacked_fetal_ultrasound/0199616b-bdeb-4119-97a3-a5a3571bd641/sweep_id/1.mha
/mnt/Enterprise2/PUBLIC_DATASETS/acouslic-ai//images/unstacked_fetal_ultrasound/0199616b-bdeb-4119-97a3-a5a3571bd641/sweep_id/2.mha
/mnt/Enterprise2/PUBLIC_DATASETS/acouslic-ai//images/unstacked_fetal_ultrasound/0199616b-bdeb-4119-97a3-a5a3571bd641/sweep_id/3.mha
/mnt/Enterprise2/PUBLIC_DATASETS/acouslic-ai//images/unstacked_fetal_ultrasound/0199616b-bdeb-4119-97a3-a5a3571bd641/sweep_id/4.mha
/mnt/Enterprise2/PUBLIC_DATASETS/acouslic-ai//images/unstacked_fetal_ultrasound/0199616b-bdeb-4119-97a3-a5a3571bd641/sweep_id/5.mha
/mnt/Enterprise2/PUBLIC_DATASETS/acouslic-ai//images/unstacked_fetal_ultrasound/0199616b-bdeb-4119-97a3-a5a3571bd641/sweep_id/6.mha
