In [1]:
import h5py
import numpy as np
import os
import cv2
from pathlib import Path

# Example paths (adjust as needed)
# angiographyDataFile = "/path/to/your/angiography.h5"
# nnUNetRawFolder = "/path/to/nnUNet/raw/folder"
angiographyDataFile = "/home/ubuntu/Angiostore/WebknossosAngiogramsRevisedUInt8List.h5"
annotationDataFile = "/home/ubuntu/Angiostore/WebknossosAnnotationsRevisedUnitized-5.h5"
annotationIndicizedDataFile = "/home/ubuntu/Angiostore/WebknossosAnnotationsRevisedIndicized-3.h5"
nnUNetRawFolder = "/home/ubuntu/Angiostore/nnUnet_raw"

In [2]:
def get_common_keys(angiographyDataFile, annotationDataFile):
    """
    Get the intersection of keys between the two HDF5 files
    """
    with h5py.File(angiographyDataFile, 'r') as f_angio, \
         h5py.File(annotationDataFile, 'r') as f_anno:
        
        angio_keys = set(f_angio.keys())
        anno_keys = set(f_anno.keys())
        common_keys = sorted(list(angio_keys.intersection(anno_keys)))
        
        print(f"Angiography keys: {len(angio_keys)}")
        print(f"Annotation keys: {len(anno_keys)}")
        print(f"Common keys: {len(common_keys)}")
        
        return common_keys


In [3]:
# Get common keys first
common_keys = get_common_keys(angiographyDataFile, annotationDataFile)

# If you want to see the keys before proceeding
print("Common keys:", common_keys)

Angiography keys: 160
Annotation keys: 129
Common keys: 129
Common keys: ['01_Case_CSF8U1R1_14', '01_Case_CSF8U1RQ_13', '03_Case_CSF8UKA1_2', '03_Case_CSF8UKA5_4', '03_Case_CSF8UKA9_1', '03_Case_CSF8UKAH_5', '06_Case_CSF8W343_9', '10_Case_CSF8WNXU_5', '10_Case_CSF8WNY6_1', '12_Case_CSF8XAD2_23', '12_Case_CSF8XADW_10', '13_Case_CSVS1XAP_1', '13_Case_CSVS1XAU_6', '13_Case_CSVS1XB4_2', '14_Case_CSVS23F6_14', '14_Case_CSVS23FO_5', '14_Case_CSVS23FS_4', 'Angios_005_rev', 'Angios_006_rev', 'Angios_007_rev', 'Angios_013_rev', 'Angios_015_rev', 'Angios_016_rev', 'Angios_021_rev', 'Angios_022_rev', 'Angios_030_rev', 'Angios_031_rev', 'Angios_032_rev', 'Angios_048_rev', 'Angios_061_rev', 'Angios_063_rev', 'Angios_065_rev', 'Angios_067_rev', 'Angios_072_rev', 'Angios_075_rev', 'Angios_079_rev', 'Angios_081_rev', 'Angios_082_rev', 'Angios_104_rev', 'Angios_114_rev', 'Angios_120_rev', 'Angios_128_rev', 'Angios_129_rev', 'Angios_133_rev', 'Angios_148_rev', 'Angios_157_rev', 'Angios_161_rev', 'Angios

In [4]:

def export_angiography_to_nnunet(angiographyDataFile, nnUNetRawFolder, common_keys):
    """
    Modified to only process common keys
    """
    images_dir = os.path.join(nnUNetRawFolder, 'imagesTr')
    Path(images_dir).mkdir(parents=True, exist_ok=True)
    
    frameKeys=[]
    
    with h5py.File(angiographyDataFile, 'r') as f:
        blockCounter = 0
        
        # Only iterate through common keys
        for dataset_name in common_keys:
            print(f"Processing angiogram dataset: {dataset_name}")
            angio_data = f[dataset_name][:]
            
            n_frames = angio_data.shape[0]
            
            for center_idx in range(2, n_frames - 2):
                frameKeys.append([dataset_name, center_idx])
                frame_indices = range(center_idx - 2, center_idx + 3)
                frames = angio_data[frame_indices]
                
                for frame_num, frame in enumerate(frames):
                    if frame.dtype != np.uint8:
                        if frame.max() > 1:
                            frame = (frame / frame.max() * 255).astype(np.uint8)
                        else:
                            frame = (frame * 255).astype(np.uint8)
                    
                    filename = f'Angios_{blockCounter:04d}_{frame_num:04d}.png'
                    filepath = os.path.join(images_dir, filename)
                    cv2.imwrite(filepath, frame)
                
                blockCounter += 1
        
        print(f"Exported {blockCounter} sets of 5 frames each")
        print(f"Total number of PNG files created: {blockCounter * 5}")
        return frameKeys


In [5]:
frameKeys = export_angiography_to_nnunet(angiographyDataFile, nnUNetRawFolder, common_keys)


Processing angiogram dataset: 01_Case_CSF8U1R1_14
Processing angiogram dataset: 01_Case_CSF8U1RQ_13
Processing angiogram dataset: 03_Case_CSF8UKA1_2
Processing angiogram dataset: 03_Case_CSF8UKA5_4
Processing angiogram dataset: 03_Case_CSF8UKA9_1
Processing angiogram dataset: 03_Case_CSF8UKAH_5
Processing angiogram dataset: 06_Case_CSF8W343_9
Processing angiogram dataset: 10_Case_CSF8WNXU_5
Processing angiogram dataset: 10_Case_CSF8WNY6_1
Processing angiogram dataset: 12_Case_CSF8XAD2_23
Processing angiogram dataset: 12_Case_CSF8XADW_10
Processing angiogram dataset: 13_Case_CSVS1XAP_1
Processing angiogram dataset: 13_Case_CSVS1XAU_6
Processing angiogram dataset: 13_Case_CSVS1XB4_2
Processing angiogram dataset: 14_Case_CSVS23F6_14
Processing angiogram dataset: 14_Case_CSVS23FO_5
Processing angiogram dataset: 14_Case_CSVS23FS_4
Processing angiogram dataset: Angios_005_rev
Processing angiogram dataset: Angios_006_rev
Processing angiogram dataset: Angios_007_rev
Processing angiogram datase

In [None]:

def indicize_annotations(annotationDataFile, annotationIndicizedDataFile):
    """
    Transform 5-channel unitized annotations to single-channel indicized format.
    
    Rules:
    (0,0,0,*,*) -> 0  # background
    (0,1,0,*,*) -> 1  # catheter
    (0,0,1,0,*) -> 2  # vessel
    (0,0,*,1,*) -> 0  # stenosis (maps to background)
    """
    
    def dataset_transform(data):
        catheter = data[1]
        vessel =  2 * (data[2] - data[2]*data[3]) 
        result = catheter + vessel - data[1]*data[2] 
        
        return result
    
    # Open both files
    with h5py.File(annotationDataFile, 'r') as f_in, \
         h5py.File(annotationIndicizedDataFile, 'w') as f_out:
        
        # Process each dataset
        for dataset_name in f_in.keys():
            print(f"Processing dataset: {dataset_name}")
            
            # Read input data
            data = f_in[dataset_name][:]
            print(f"Dataset shape: {data.shape}")
            transformed = dataset_transform(data)
            print(f"Transformed shape: {transformed.shape}")
            
            # Create output dataset
            output_shape = transformed.shape
            dset_out = f_out.create_dataset(
                dataset_name,
                output_shape,
                dtype=np.uint8
            )
            
            # Write transformed data
            dset_out[:] = transformed
            
            # Verify unique values
            unique_values = np.unique(dset_out[:])
            print(f"  Dataset {dataset_name} unique values: {unique_values}")
            
        print("Transformation complete!")

# Example usage:
# transform_annotations(annotationDataFile, annotationIndicizedDataFile)

# For debugging, let's also print the shape of the first dataset
with h5py.File(annotationDataFile, 'r') as f:
    first_dataset_name = list(f.keys())[0]
    first_dataset = f[first_dataset_name][:]
    print(f"First dataset shape: {first_dataset.shape}")

In [None]:
# Example paths (adjust as needed)
# annotationDataFile = "/path/to/5channel/annotations.h5"
# annotationIndicizedDataFile = "/path/to/output/indicized_annotations.h5"

indicize_annotations(annotationDataFile, annotationIndicizedDataFile)

In [None]:
def get_hdf5_keys(hdf5_file: str) -> list[str]:
    """
    Get list of dataset keys in an HDF5 file.

    Args:
        hdf5_file: Path to the HDF5 file

    Returns:
        list[str]: List of dataset keys in the file
    """
    with h5py.File(hdf5_file, 'r') as f:
        keys = list(f.keys())
        print(f"Keys in {hdf5_file}:")
        for key in keys:
            print(f"  {key}")
        return keys

# Get keys from annotationIndicizedDataFile
keys = get_hdf5_keys(annotationIndicizedDataFile)


In [None]:
len(keys)

In [None]:
def get_dataset_data(hdf5_file: str, key: str) -> np.ndarray:
    """
    Get data from a specific dataset in an HDF5 file.

    Args:
        hdf5_file: Path to the HDF5 file
        key: Key of the dataset to retrieve

    Returns:
        np.ndarray: Data from the specified dataset
    """
    with h5py.File(hdf5_file, 'r') as f:
        data = f[key][:]
        print(f"Shape of dataset {key}: {data.shape}")
        return data

# Get data for "Napari_9_rev" dataset
key = random.choice(keys)
example_data = get_dataset_data(annotationIndicizedDataFile, key)
print(key, example_data.shape)


In [None]:
# Display as an image
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 10))
plt.imshow(example_data[30], cmap='gray')
plt.colorbar()
plt.title('example_data[30]')
plt.axis('on')
plt.show()


In [64]:

def export_annotations_to_nnunet(annotationDataFile, nnUNetRawFolder, frameKeys):
    """
    Modified to only process common keys and verify count matches angiography
    """
    labels_dir = os.path.join(nnUNetRawFolder, 'labelsTr')
    Path(labels_dir).mkdir(parents=True, exist_ok=True)
    
    with h5py.File(annotationDataFile, 'r') as f:
        blockCounter = 0
        
        # Only iterate through common keys
        for dataset_name, center_idx in frameKeys:
            print(f"Processing annotation dataset: {dataset_name}")
            anno_data = f[dataset_name][center_idx]
            
            filename = f'Angios_{blockCounter:04d}.png'
            filepath = os.path.join(labels_dir, filename)
            cv2.imwrite(filepath, anno_data)
                
            blockCounter += 1
        
        print(f"Exported {blockCounter} label files")
        
        # Verify we have the same number of cases as angiography
        assert blockCounter == len(frameKeys), \
            f"Mismatch in number of cases: Angiography had {len(frameKeys)}, Annotations had {blockCounter}"


In [None]:
# Print first 50 frame keys to inspect the data
print("First 50 frame keys:")
for i, (dataset_name, center_idx) in enumerate(frameKeys[:50]):
    print(f"{i}: {dataset_name}, center_idx={center_idx}")


In [None]:
export_annotations_to_nnunet(annotationIndicizedDataFile, nnUNetRawFolder, frameKeys)

In [None]:
len(frameKeys)

In [None]:
# ... existing code ...

from datetime import datetime

# Get current timestamp in a human-readable format
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

datasetJson = {
    "_comment": f"Dataset contains 5-channel angiography data at 7.5Hz (every other frame) with 5-frame neighborhoods. Labels include background (0), C (1), and V (2). Data sourced from WebknossosAngiogramsRevisedUInt8List.h5 and WebknossosAnnotationsRevisedIndicized-3.h5. Generated on {timestamp}.",
    "channel_names": {
        "0": "0",
        "1": "1",
        "2": "2",
        "3": "3",
        "4": "4"
    },
    "labels": {
        "background": 0,
        "catheter": 1,
        "vessel": 2
    },
    "numTraining": len(frameKeys),
    "file_ending": ".png"
}

# Write the dataset.json file
import json
dataset_json_path = os.path.join(nnUNetRawFolder, 'dataset.json')
with open(dataset_json_path, 'w') as f:
    json.dump(datasetJson, f, indent=4)

print(f"Created dataset.json at {dataset_json_path}")