# 1. Loading in Required Packages

In [None]:
# ===============================
# Standard Library Imports
# ===============================
import os
import gc
import sys

# ===============================
# Third-Party Imports
# ===============================
import numpy as np
import pandas as pd
from pathlib import Path

# TensorFlow / Keras
import tensorflow as tf

# ===============================
# Local Module Imports
# ===============================
from QHETI_Transformer import *
from QHETI_eval_pipeline.data_preparer import *
from QHETI_eval_pipeline.model_evaluator import *
from QHETI_eval_pipeline.evaluator import *
from QHETI_eval_pipeline.model_processor import *
from QHETI_eval_pipeline.feature_extractor import *

# 2. Checking if GPU is present

In [None]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Set memory growth to avoid allocating all GPU memory upfront
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"✅ Using GPU: {[gpu.name for gpu in gpus]}")
    except RuntimeError as e:
        print(f"❌ RuntimeError: {e}")
else:
    print("⚠️ No GPU found. Running on CPU.")

# 3. Version Check

In [None]:
# CHECK FOR CORRECT KERAS AND PANDAS VERSIONS
print("keras.__version__ = ", keras.__version__) # 2.14.0

# Error will occur if pandas greater than specified due to loss of backward compatibility
# https://stackoverflow.com/questions/75953279/modulenotfounderror-no-module-named-pandas-core-indexes-numeric-using-metaflo
# pip install "pandas<2.0.0"
print("pd.__version__ = ", pd.__version__) # 1.5.3
print("np.__version__ = ", np.__version__) # 1.24.4
print("tf.__version__ = ", tf.__version__) # 2.14.0
# [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
print("tf.config.list_physical_devices('GPU') = ", tf.config.list_physical_devices())
print("tf.test.is_built_with_cuda() = ", tf.test.is_built_with_cuda())  # True
device = "cuda" if tf.test.is_built_with_cuda() else "cpu"
print("device = ", device)

# 4. Data Settings (Feature Extraction / Evaluation)

In [None]:
# ===============================
# File Paths
# ===============================
source_file_path = ""   # specify your data source path
OUTPUT_PATH = ""        # specify your output path

# ===============================
# Feature Info
# ===============================
quadrant_features = {
    'Q1': [],
    'Q2': [],
    'Q3': [],
    'Q4': []
}

FEATURES_DROPPED = []

# ===============================
# Model Training Config
# ===============================
CLASS_VAR = "class"         
MINORITY_CLASS = 0          

NUM_CV_FOLDS = 3
FIRST_EPOCHS = ""
LAST_EPOCHS = [1000]
BATCH_SIZE = ""

# Layers to unfreeze during transfer learning
NUM_LAYERS_UNFROZEN_SOURCE = []
NUM_LAYERS_UNFROZEN_IND = []

bool_remove_target = False
augmentation_algo = ""   # placeholder

CONVENTIONAL_MODEL_TYPE_LIST = [
    "SVM",
    "DecisionTree",
    "K-NN",
    "LogisticRegression",
    "NaiveBayes",
]

# ===============================
# Evaluation Metrics
# ===============================
EVALUATION_METRICS = [
    "Weighted Accuracy", "Sensitivity/Recall", "Specificity",
    "Precision_class0", "Precision_class1", "Precision_avg",
    "F1_class0", "F1_class1", "F1_avg", "auc_roc_score",
    "False_Discovery_Rate", "False_Negative_Rate",
    "False_Omission_Rate", "False_Positive_Rate", "Jaccard"
]

# ===============================
# Experiment Groups
# ===============================
patient_grp = [
    # Patients list removed for privacy
]

# ===============================
# Model Discovery Function (Proprietary)
# ===============================
def find_all_QHETI_individual_models(QHETI_individual_model_file_path, patient_group, NUM_LAYERS_UNFROZEN_POP):
    """
    Proprietary function to discover and organize available individual models 
    for each patient and layer configuration.

    NOTE: Core logic hidden for confidentiality.
    """
    # Initialize storage dictionary
    QHETI_Individual_Models_list = {}

    # Iterate over layer configurations
    for num_layers in NUM_LAYERS_UNFROZEN_POP:
        layer_key = f"{num_layers}_unfrozen_source"
        layer_group_models = {}

        # Iterate over patients and find models (details hidden)
        for target_id in patient_group:
            Model_List = {}
            Model_path = ...  # Proprietary path construction logic

            # Proprietary file discovery and filtering logic
            # if ...:
            #     for file_name in ...:
            #         if file_name.endswith(".h5"):
            #             Model_List[file_name] = ...

            print(f"Total number of Models in {target_id} ({layer_key}): {len(Model_List)}")
            layer_group_models[target_id] = Model_List

        QHETI_Individual_Models_list[num_layers] = layer_group_models

    return QHETI_Individual_Models_list


# ===============================
# Function Call (Example)
# ===============================
# NOTE: Proprietary arguments and file path structure hidden
QHETI_Individual_Models_list = find_all_QHETI_individual_models(
    ... ,  # Proprietary model base path
    ... ,  # Proprieta
)

# 5. Load & Process Patient Data

In [None]:
# Function to load data from a file and convert it to a NumPy array (if applicable)
def load_data(source_file_path, allow_pickle=True):
    try:
        # Load the .npy file; expected to contain a single dictionary object
        data_ndarr = np.load(source_file_path, allow_pickle=allow_pickle)
        print(f"[INFO] Loaded object of type: {type(data_ndarr)}")

        # Extract the dictionary (assumes it's the only item in the array)
        datadict = data_ndarr.item()
        print(f"[INFO] Extracted dictionary of type: {type(datadict)}")

        return datadict

    except IOError as e:
        raise IOError(f"[ERROR] Failed to load data from {source_file_path}") from e
    
datadict = load_data(source_file_path)

In [None]:
def process_patient_data(datadict):
    p_ids = datadict.keys()
    sample_size_dict = {}
    print("patients: n =", len(p_ids), end="\n\n")

    for p_id in p_ids:
        df = datadict[p_id]
        df.columns = df.columns.str.lower()
        # Drop common unnecessary columns
        df.drop(FEATURES_DROPPED, axis=1, inplace=True)
        
        # Convert data frame to NumPy array and cast to float32
        df = np.asarray(df).astype(np.float32)
        print(p_id, "shape:", df.shape)
        sample_size_dict[p_id] = df.shape[0]

    return sample_size_dict, p_ids, df


sample_size_dict, p_ids, df = process_patient_data(datadict)

# Running Code

In [None]:
if __name__ == "__main__":
    # Create output directory (proprietary path setup hidden)
    for NUM_LAYERS in NUM_LAYERS_UNFROZEN_SOURCE:
        output_path = ...  # Proprietary output path construction
        os.makedirs(..., exist_ok=True)

        # Step 1: Initialize evaluator (metrics)
        evaluator = ...  # Proprietary evaluator class initialization

        # Step 2: Initialize model processor with evaluator
        model_processor = ...  # Proprietary ModelProcessor setup

        # Step 3: Set up data preparer (scaling + augmentation)
        data_preparer = ...  # Proprietary DataPreparer initialization

        # Step 4: Initialize transformer for image conversion
        qati_transformer = ...  # Proprietary QATI transformer initialization

        # Step 5: Build the model evaluator
        model_evaluator = ...  # Proprietary ModelEvaluator initialization

        # Step 6: Inject processing function for parallel execution
        model_evaluator.process_model_fn = ...  # Proprietary processing function assignment

        # Step 7: Run evaluation for patient group and model types
        model_evaluator.evaluate_group(
            patient_group=...,
            CONVENTIONAL_MODEL_TYPE_LIST=...
        )
