In [None]:
import os
import shutil
import errno
import random
import re
import pickle
from tqdm import tqdm
import time
import numpy as np
import nibabel as nib
from skimage.feature import peak_local_max, canny
from skimage.transform import hough_circle

In [None]:
np.random.seed(42)
random.seed(42)

In [None]:
# Helper Functions
def save_data(data, filename, out_path):
    """Save data dictionary as pickle."""
    out_filename = os.path.join(out_path, filename)
    os.makedirs(out_path, exist_ok=True)
    with open(out_filename, 'wb') as f:
        pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
    print(f"Saved to {out_filename}")

def copy(src, dest):
    """Copy files or directories."""
    try:
        shutil.copytree(src, dest)
    except OSError as e:
        if e.errno == errno.ENOTDIR:
            shutil.copy(src, dest)
        else:
            print(f"Directory not copied. Error: {e}")

def read_patient_cfg(path):
    """Reads Info.cfg file into a dictionary."""
    patient_info = {}
    with open(os.path.join(path, 'Info.cfg')) as f_in:
        for line in f_in:
            key, val = line.rstrip().split(": ")
            patient_info[key] = val
    return patient_info

In [None]:
# Heart Metrics & ROI Extraction
def heart_metrics(seg_3Dmap, voxel_size, classes=[3, 1, 2]):
    """Compute volumes (in mL) for LV, RV, and Myocardium."""
    volumes = []
    for c in classes:
        seg_copy = np.copy(seg_3Dmap)
        seg_copy[seg_copy != c] = 0
        seg_copy = np.clip(seg_copy, 0, 1)
        volume = seg_copy.sum() * np.prod(voxel_size) / 1000.0
        volumes.append(volume)
    return volumes

def ejection_fraction(ed_vol, es_vol):
    stroke_vol = ed_vol - es_vol
    return (float(stroke_vol) / float(ed_vol)) * 100

In [None]:
# Dataset Class
class Dataset:
    def __init__(self, directory, subdir):
        self.directory = directory
        self.name = subdir
        self.patient_data = {}

    def _filename(self, file):
        return os.path.join(self.directory, self.name, file)

    def load_nii(self, img_path):
        nimg = nib.load(self._filename(img_path))
        return nimg.get_fdata(), nimg.affine, nimg.header

    def read_patient_info_data(self):
        with open(self._filename('Info.cfg')) as f_in:
            for line in f_in:
                key, val = line.rstrip().split(": ")
                self.patient_data[key] = val

    def read_patient_data(self, mode='train'):
        self.read_patient_info_data()
        patient_No = int(re.match(r"patient(\d{3})", self.name).group(1))
        ED_frame = int(self.patient_data['ED'])
        ES_frame = int(self.patient_data['ES'])

        ed, aff, hdr = self.load_nii(f"patient{patient_No:03d}_frame{ED_frame:02d}.nii.gz")
        es, _, _ = self.load_nii(f"patient{patient_No:03d}_frame{ES_frame:02d}.nii.gz")

        self.patient_data['ED_VOL'] = ed
        self.patient_data['ES_VOL'] = es
        self.patient_data['header'] = {'affine': aff, 'hdr': hdr}

        # Ground truth
        ed_gt, _, _ = self.load_nii(f"patient{patient_No:03d}_frame{ED_frame:02d}_gt.nii.gz")
        es_gt, _, _ = self.load_nii(f"patient{patient_No:03d}_frame{ES_frame:02d}_gt.nii.gz")

        ed_lv, ed_rv, ed_myo = heart_metrics(ed_gt, hdr.get_zooms())
        es_lv, es_rv, es_myo = heart_metrics(es_gt, hdr.get_zooms())

        ef_lv = ejection_fraction(ed_lv, es_lv)
        ef_rv = ejection_fraction(ed_rv, es_rv)

        self.patient_data['HeartParams'] = {
            'EDV_LV': ed_lv, 'EDV_RV': ed_rv,
            'ESV_LV': es_lv, 'ESV_RV': es_rv,
            'ED_MYO': ed_myo, 'ES_MYO': es_myo,
            'EF_LV': ef_lv, 'EF_RV': ef_rv
        }

        self.patient_data['ED_GT'] = ed_gt
        self.patient_data['ES_GT'] = es_gt


In [None]:
# Convert NIfTI to Pickle
def convert_nii_np(data_path, mode='train'):
    patient_fulldata = {}
    patient_folders = [f for f in sorted(os.listdir(data_path))
                       if os.path.isdir(os.path.join(data_path, f)) and f.startswith('patient')]

    if len(patient_folders) == 0:
        raise ValueError(f"No patient folders found in {data_path}")

    for patient in tqdm(patient_folders, desc=f"Processing {mode} data"):
        dset = Dataset(data_path, patient)
        try:
            dset.read_patient_data(mode=mode)
            patient_fulldata[dset.name] = dset.patient_data
        except FileNotFoundError:
            print(f" Warning: Missing data for {patient}, skipping.")

    return patient_fulldata

In [None]:
# Main Processing
start_time = time.time()

# Paths (update these as needed)
training_path = r'D:\MSDS\Data Minning\Project\Code\ACDC_dataset\training'
testing_path = r'D:\MSDS\Data Minning\Project\Code\ACDC_dataset\testing'
output_path = r'processed_acdc_dataset\pickled_classification'
os.makedirs(output_path, exist_ok=True)

# --- Step 1: Process Training Data ---
full_train_data = convert_nii_np(training_path, mode='train')
save_data(full_train_data, 'train.pkl', output_path)

# --- Step 2: Process Testing Data ---
full_test_data = convert_nii_np(testing_path, mode='test')
save_data(full_test_data, 'test.pkl', output_path)

print(f"\nTraining Patients: {len(full_train_data)}")
print(f"Testing Patients: {len(full_test_data)}")