In [None]:
from pathlib import Path
import pydicom
import numpy as np
import cv2
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches

In [None]:
labels = pd.read_csv("./heart_detection_labels.csv") #Add path to the labels file for the train/val/test dataset

In [None]:
ROOT_PATH = Path("./train_images/") #Add path to the folder with train/val/test images
SAVE_PATH = Path("./processed_heart_detection/") #Add path to the folder where the processed images will be saved

In [None]:
train_sums = 0
train_sums_squared = 0
train_ids = []
val_ids = []
test_ids = []

for counter, patient_id in enumerate(list(labels.name)):  
    img_path = ROOT_PATH/patient_id
    img_path = img_path.with_suffix(".dcm")
    
    img = pydicom.read_file(img_path).pixel_array / 255
    img_array = cv2.resize(img, (224, 224)).astype(np.float16)

    if counter < 400:
        train_val_test = "train"
        train_ids.append(patient_id)
    elif counter < 448:
        train_val_test = "val"
        val_ids.append(patient_id)
    else:
        train_val_test = "test"
        test_ids.append(patient_id)
    
    current_save_path = SAVE_PATH/train_val_test
    current_save_path.mkdir(parents=True, exist_ok=True)
    np.save(current_save_path/patient_id, img_array)
    
    normalizer = img_array.shape[0] * img_array.shape[1]
    if train_val_test == "train":
        train_sums += np.sum(img_array) / normalizer
        train_sums_squared += (np.power(img_array, 2).sum()) / normalizer

In [None]:
np.save(f"{SAVE_PATH}/train_subjects", train_ids)
np.save(f"{SAVE_PATH}/val_subjects", val_ids)
np.save(f"{SAVE_PATH}/test_subjects", test_ids)

In [None]:
train_mean = train_sums / len(train_ids)
train_std = np.sqrt(train_sums_squared / len(train_ids) - (train_mean**2))

In [None]:
print(f"Mean of Train Dataset: {train_mean}, STD: {train_std}")