In [1]:
import nibabel as nib
import os
from scipy import ndimage
from modules.edge_detection import detect_edges, scale_data, scale_semantic_map
import h5py
import matplotlib.pyplot as plt
import numpy as np
import json

In [2]:
# subjects_path = "MnM2/dataset"

# resolution = 256

# all_subjects = [d for d in os.listdir(subjects_path) if os.path.isdir(os.path.join(subjects_path, d))]

# sorted_subjects = sorted(all_subjects)

# processed_data_path = os.path.join("processed_data")
# os.makedirs(processed_data_path, exist_ok=True)

# # Dictionary to store the index mapping
# index_mapping = {}
# current_index = 0

# for sub in sorted_subjects:
#     subject_path = os.path.join(subjects_path, sub)
#     subject_files = [d for d in os.listdir(subject_path) if d.endswith(('.nii', '.nii.gz'))]
#     sorted_subject_files = sorted(subject_files)

#     print(sorted_subject_files)

#     normalized_images = []
#     edges_images = []
#     semantic_maps = []

#     for file in sorted_subject_files:
#         parts = file.split('_')
#         file_type = '_'.join(parts[1:-1])
#         file_type += '_' + parts[-1].split('.')[0]

#         if file_type == "LA_ED":
#             img = nib.load(os.path.join(subject_path, file))
#             data = img.get_fdata()

#             resized_data = ndimage.zoom(data, (resolution/data.shape[0], resolution/data.shape[1], 1))

#             scale = scale_data(resized_data)

#             edges = detect_edges(resized_data)

#             normalized_images.append(scale[:,:,0])

#             edges_images.append(edges)

#         elif file_type == "LA_ED_gt":
#             img = nib.load(os.path.join(subject_path, file))
#             data = img.get_fdata()

#             resized_data = scale_semantic_map(data, (resolution, resolution))

#             semantic_maps.append(resized_data[:,:,0])

#         elif file_type == "LA_ES":
#             img = nib.load(os.path.join(subject_path, file))
#             data = img.get_fdata()

#             resized_data = ndimage.zoom(data, (resolution/data.shape[0], resolution/data.shape[1], 1))

#             scale = scale_data(resized_data)

#             edges = detect_edges(resized_data)

#             normalized_images.append(scale[:,:,0])

#             edges_images.append(edges)

#         elif file_type == "LA_ES_gt":
#             img = nib.load(os.path.join(subject_path, file))
#             data = img.get_fdata()

#             resized_data = scale_semantic_map(data, (resolution, resolution))

#             semantic_maps.append(resized_data[:,:,0])

#         elif file_type == "SA_ED":
#             img = nib.load(os.path.join(subject_path, file))
#             data = img.get_fdata()

#             for i in range(data.shape[2]):
#                 resized_data = ndimage.zoom(data[:, :, i], (resolution/data.shape[0], resolution/data.shape[1]))
#                 edges = detect_edges(resized_data)

#                 scale = scale_data(resized_data)

#                 normalized_images.append(scale)

#                 edges_images.append(edges)

#         elif file_type == "SA_ED_gt":
#             img = nib.load(os.path.join(subject_path, file))
#             data = img.get_fdata()

#             for i in range(data.shape[2]):
#                 resized_data = scale_semantic_map(data[:, :, i], (resolution, resolution))

#                 semantic_maps.append(resized_data)

#         elif file_type == "SA_ES":
#             img = nib.load(os.path.join(subject_path, file))
#             data = img.get_fdata()

#             for i in range(data.shape[2]):
#                 resized_data = ndimage.zoom(data[:, :, i], (resolution/data.shape[0], resolution/data.shape[1]))

#                 edges = detect_edges(resized_data)

#                 scale = scale_data(resized_data)

#                 normalized_images.append(scale)

#                 edges_images.append(edges)

#         elif file_type == "SA_ES_gt":
#             img = nib.load(os.path.join(subject_path, file))
#             data = img.get_fdata()

#             for i in range(data.shape[2]):
#                 resized_data = scale_semantic_map(data[:, :, i], (resolution, resolution))

#                 semantic_maps.append(resized_data)

#     # Store the index range for this subject
#     start_idx = current_index
#     num_samples = len(normalized_images)
#     end_idx = start_idx + num_samples - 1
    
#     index_mapping[sub] = {
#         "start_index": start_idx,
#         "end_index": end_idx,
#         "num_samples": num_samples,
#         "file_name": f"{sub}_data.h5"
#     }
    
#     current_index = end_idx + 1

#     # After processing all files for a subject, create the compressed HDF5 file
#     hdf5_filename = f"{sub}_data.h5"
#     hdf5_filepath = os.path.join(processed_data_path, hdf5_filename)
#     with h5py.File(hdf5_filepath, 'w') as hf:
#         hf.create_dataset('images', data=np.array(normalized_images), dtype=np.float32, compression="gzip")
#         hf.create_dataset('edges', data=np.array(edges_images), dtype=np.uint8, compression="gzip")
#         hf.create_dataset('semantic_maps', data=np.array(semantic_maps), dtype=np.uint8, compression="gzip")

#     print(f"Created compressed HDF5 file for subject {sub}: {hdf5_filepath}")

# # Save the index mapping to a JSON file
# mapping_filepath = os.path.join(processed_data_path, "dataset_index_mapping.json")
# with open(mapping_filepath, 'w') as f:
#     json.dump(index_mapping, f, indent=2)

# print(f"Created index mapping file: {mapping_filepath}")
# print("Finished processing all subjects.")
        
        

In [3]:
subjects_path = "MnM2/dataset"

resolution = 256

empty_map = np.zeros((resolution, resolution), dtype=np.uint8)

all_subjects = [d for d in os.listdir(subjects_path) if os.path.isdir(os.path.join(subjects_path, d))]

sorted_subjects = sorted(all_subjects)

processed_data_path = os.path.join("processed_data/autoencoder")
os.makedirs(processed_data_path, exist_ok=True)

# Dictionary to store the index mapping
index_mapping = {}
current_index = 0

for sub in sorted_subjects:
    subject_path = os.path.join(subjects_path, sub)
    subject_files = [d for d in os.listdir(subject_path) if d.endswith(('.nii', '.nii.gz'))]
    sorted_subject_files = sorted(subject_files)

    print(sorted_subject_files)

    normalized_images = []
    edges_images = []
    semantic_maps = []

    for file in sorted_subject_files:
        parts = file.split('_')
        file_type = '_'.join(parts[1:-1])
        file_type += '_' + parts[-1].split('.')[0]

        if file_type == "SA_ED":
            img = nib.load(os.path.join(subject_path, file))
            data = img.get_fdata()

            for i in range(data.shape[2]):
                resized_data = ndimage.zoom(data[:, :, i], (resolution/data.shape[0], resolution/data.shape[1]))
                edges = detect_edges(resized_data)

                scale = scale_data(resized_data)

                normalized_images.append(scale)

                edges_images.append(edges)

        elif file_type == "SA_ED_gt":
            img = nib.load(os.path.join(subject_path, file))
            data = img.get_fdata()

            for i in range(data.shape[2]):
                resized_data = scale_semantic_map(data[:, :, i], (resolution, resolution))

                semantic_maps.append(resized_data)

        elif file_type == "SA_ES":
            img = nib.load(os.path.join(subject_path, file))
            data = img.get_fdata()

            for i in range(data.shape[2]):
                resized_data = ndimage.zoom(data[:, :, i], (resolution/data.shape[0], resolution/data.shape[1]))

                edges = detect_edges(resized_data)

                scale = scale_data(resized_data)

                normalized_images.append(scale)

                edges_images.append(edges)

        elif file_type == "SA_ES_gt":
            img = nib.load(os.path.join(subject_path, file))
            data = img.get_fdata()

            for i in range(data.shape[2]):
                resized_data = scale_semantic_map(data[:, :, i], (resolution, resolution))

                semantic_maps.append(resized_data)

        elif file_type == "SA_CINE":
            img = nib.load(os.path.join(subject_path, file))
            data = img.get_fdata()

            for slice_idx in range(data.shape[2]):
                for frame_idx in range(data.shape[3]):
                    resized_data = ndimage.zoom(data[:, :, slice_idx, frame_idx], (resolution/data.shape[0], resolution/data.shape[1]))
                    
                    edges = detect_edges(resized_data)
                    
                    scale = scale_data(resized_data)

                    normalized_images.append(scale)

                    edges_images.append(edges)

                    semantic_maps.append(empty_map)

    # Store the index range for this subject
    start_idx = current_index
    num_samples = len(normalized_images)
    end_idx = start_idx + num_samples - 1
    
    index_mapping[sub] = {
        "start_index": start_idx,
        "end_index": end_idx,
        "num_samples": num_samples,
        "file_name": f"{sub}_data.h5"
    }
    
    current_index = end_idx + 1

    # After processing all files for a subject, create the compressed HDF5 file
    hdf5_filename = f"{sub}_data.h5"
    hdf5_filepath = os.path.join(processed_data_path, hdf5_filename)
    with h5py.File(hdf5_filepath, 'w') as hf:
        hf.create_dataset('images', data=np.array(normalized_images), dtype=np.float32, compression="gzip")
        hf.create_dataset('edges', data=np.array(edges_images), dtype=np.uint8, compression="gzip")
        hf.create_dataset('semantic_maps', data=np.array(semantic_maps), dtype=np.uint8, compression="gzip")

    print(f"Created compressed HDF5 file for subject {sub}: {hdf5_filepath}")

# Save the index mapping to a JSON file
mapping_filepath = os.path.join(processed_data_path, "dataset_index_mapping.json")
with open(mapping_filepath, 'w') as f:
    json.dump(index_mapping, f, indent=2)

print(f"Created index mapping file: {mapping_filepath}")
print("Finished processing all subjects.")
        
        