In [None]:
# Essential imports for ensemble learning pipeline
import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# PyTorch deep learning framework
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torch.optim.lr_scheduler import ReduceLROnPlateau, StepLR
import torchvision
from torchvision import models, transforms
import timm  # For Xception and other models

# Data augmentation
import albumentations as A
from albumentations.pytorch import ToTensorV2

# Machine learning utilities
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, precision_recall_curve, roc_curve, accuracy_score
from sklearn.utils.class_weight import compute_class_weight

# Genetic algorithm for hyperparameter optimization
import deap
from deap import base, creator, tools, algorithms
import random
from tqdm import tqdm
import time

# --- Step 1: FastMRI to DICOM ---
import datetime
import os
from pathlib import Path
import h5py
import numpy as np
import pydicom
from pydicom.dataset import Dataset, FileMetaDataset
from pydicom.uid import generate_uid
import xmltodict

import pydicom
import numpy as np
from PIL import Image
from pathlib import Path
import os

from pathlib import Path

In [2]:
def fastmri_to_dicom(filename: Path,
    reconstruction_name: str,
    outfolder: Path,
    flip_up_down: bool = False,
    flip_left_right: bool = False) -> None:
    fileparts = os.path.splitext(filename.name)
    patientName = fileparts[0]
    f = h5py.File(filename,'r')
    if not outfolder:
        outfolder = Path(patientName)
        outfolder.mkdir(parents=True, exist_ok=True)
    if 'ismrmrd_header' not in f.keys():
        raise Exception('ISMRMRD header not found in file')
    if reconstruction_name not in f.keys():
        raise Exception('Reconstruction name not found in file')
    head = xmltodict.parse(f['ismrmrd_header'][()])
    reconSpace = head['ismrmrdHeader']['encoding']['reconSpace']
    measurementInformation = head['ismrmrdHeader']['measurementInformation']
    acquisitionSystemInformation = head['ismrmrdHeader']['acquisitionSystemInformation']
    H1resonanceFrequency_Hz = head['ismrmrdHeader']['experimentalConditions']['H1resonanceFrequency_Hz']
    sequenceParameters = head['ismrmrdHeader']['sequenceParameters']
    pixelSizeX = float(reconSpace['fieldOfView_mm']['x'])/float(reconSpace['matrixSize']['x'])
    pixelSizeY = float(reconSpace['fieldOfView_mm']['y'])/float(reconSpace['matrixSize']['y'])
    img_data = f[reconstruction_name][:]
    slices = img_data.shape[0]
    if flip_left_right:
        img_data = img_data[:, :, ::-1]
    if flip_up_down:
        img_data = img_data[:, ::-1, :]
    image_max = 1024
    scale = image_max / np.percentile(img_data, 99.9)
    pixels_scaled = np.clip((scale * img_data), 0, image_max).astype('int16')
    windowWidth = 2 * (np.percentile(pixels_scaled, 99.9) - np.percentile(pixels_scaled, 0.1))
    windowCenter = windowWidth/2
    studyInstanceUid = generate_uid('999.')
    seriesInstanceUid = generate_uid('9999.')
    for s in range(0, slices):
        slice_filename = "%s_%03d.dcm"%(patientName, s)
        slice_full_path = outfolder/slice_filename
        slice_pixels = pixels_scaled[s,:,:]
        file_meta = FileMetaDataset()
        file_meta.MediaStorageSOPClassUID = '1.2.840.10008.5.1.4.1.1.4'
        file_meta.MediaStorageSOPInstanceUID = "1.2.3"
        file_meta.ImplementationClassUID = "1.2.3.4"
        file_meta.TransferSyntaxUID = '1.2.840.10008.1.2.1'
        ds = Dataset()
        dt = datetime.datetime.now()
        ds.ContentDate = dt.strftime('%Y%m%d')
        timeStr = dt.strftime('%H%M%S.%f')
        ds.SOPClassUID = '1.2.840.10008.5.1.4.1.1.4'
        ds.SOPInstanceUID = generate_uid('9999.')
        ds.ContentTime = timeStr
        ds.Modality = 'MR'
        ds.ModalitiesInStudy = ['', 'PR', 'MR', '']
        ds.StudyDescription = measurementInformation['protocolName']
        ds.PatientName = patientName
        ds.PatientID = patientName
        ds.PatientBirthDate = '19700101'
        ds.PatientSex = 'M'
        ds.PatientAge = '030Y'
        ds.PatientIdentityRemoved = 'YES'
        ds.MRAcquisitionType = '2D'
        ds.SequenceName = sequenceParameters['sequence_type']
        ds.SliceThickness = reconSpace['fieldOfView_mm']['z']
        ds.RepetitionTime = sequenceParameters['TR']
        ds.EchoTime = sequenceParameters['TE']
        ds.ImagingFrequency = H1resonanceFrequency_Hz
        ds.ImagedNucleus = '1H'
        ds.EchoNumbers = "1"
        ds.MagneticFieldStrength = acquisitionSystemInformation['systemFieldStrength_T']
        ds.SpacingBetweenSlices = reconSpace['fieldOfView_mm']['z']
        ds.FlipAngle = str(sequenceParameters['flipAngle_deg'])
        ds.PatientPosition = measurementInformation['patientPosition']
        ds.StudyInstanceUID = studyInstanceUid
        ds.SeriesInstanceUID = seriesInstanceUid
        ds.StudyID = measurementInformation['measurementID']
        ds.InstanceNumber = str(s+1)
        ds.ImagesInAcquisition = str(slices)
        ds.SamplesPerPixel = 1
        ds.PhotometricInterpretation = 'MONOCHROME2'
        ds.NumberOfFrames = "1"
        ds.Rows = slice_pixels.shape[0]
        ds.Columns = slice_pixels.shape[1]
        ds.PixelSpacing = [pixelSizeX, pixelSizeY]
        ds.PixelAspectRatio = [1, 1]
        ds.BitsAllocated = 16
        ds.BitsStored = 12
        ds.HighBit = 11
        ds.PixelRepresentation = 1 
        ds.SmallestImagePixelValue = 0
        ds.LargestImagePixelValue = 1024
        ds.BurnedInAnnotation = 'NO'
        ds.WindowCenter = str(windowCenter)
        ds.WindowWidth = str(windowWidth)
        ds.LossyImageCompression = '00'
        ds.StudyStatusID = 'COMPLETED'
        ds.ResultsID = ''
        # NOTE: The following method may need to be replaced with ds.PixelData assignment depending on pydicom version
        ds.set_pixel_data(slice_pixels, 'MONOCHROME2', 12)
        ds.file_meta = file_meta
        ds.is_implicit_VR = False
        ds.is_little_endian = True
        ds.save_as(slice_full_path, write_like_original=False)
# Example usage
# fastmri_to_dicom(Path("/mnt/f/datasets/demo_data/file1000002.h5"), "reconstruction_rss", Path("output"), True, False)

In [3]:
# --- Step 2: DICOM to PNG ---

def dicom_to_png(dicom_path, output_path=None, apply_windowing=True, normalize=True):
    """
    Convert a DICOM file to PNG format.
    Args:
        dicom_path (str or Path): Path to the DICOM file
        output_path (str or Path, optional): Output PNG file path. If None, uses same name as DICOM with .png extension
        apply_windowing (bool): Whether to apply DICOM windowing (window center/width)
        normalize (bool): Whether to normalize pixel values to 0-255 range
    Returns:
        str: Path to the saved PNG file
    """
    dicom_path = Path(dicom_path)
    if not dicom_path.exists():
        raise FileNotFoundError(f"DICOM file not found: {dicom_path}")
    ds = pydicom.dcmread(dicom_path)
    pixel_array = ds.pixel_array
    if ds.PixelRepresentation == 1:
        pixel_array = pixel_array.astype(np.int16)
    if hasattr(ds, 'RescaleSlope') and hasattr(ds, 'RescaleIntercept'):
        pixel_array = pixel_array * ds.RescaleSlope + ds.RescaleIntercept
    if apply_windowing and hasattr(ds, 'WindowCenter') and hasattr(ds, 'WindowWidth'):
        window_center = float(ds.WindowCenter) if isinstance(ds.WindowCenter, (int, float, str)) else float(ds.WindowCenter[0])
        window_width = float(ds.WindowWidth) if isinstance(ds.WindowWidth, (int, float, str)) else float(ds.WindowWidth[0])
        window_min = window_center - window_width / 2
        window_max = window_center + window_width / 2
        pixel_array = np.clip(pixel_array, window_min, window_max)
        pixel_array = (pixel_array - window_min) / (window_max - window_min) * 255
    elif normalize:
        pixel_min = np.min(pixel_array)
        pixel_max = np.max(pixel_array)
        if pixel_max > pixel_min:
            pixel_array = (pixel_array - pixel_min) / (pixel_max - pixel_min) * 255
        else:
            pixel_array = np.zeros_like(pixel_array)
    pixel_array = pixel_array.astype(np.uint8)
    if len(pixel_array.shape) == 3:
        print(f"Multi-frame DICOM detected with {pixel_array.shape[0]} frames")
        output_dir = output_path.parent if output_path else dicom_path.parent
        base_name = output_path.stem if output_path else dicom_path.stem
        saved_files = []
        for i, frame in enumerate(pixel_array):
            frame_output = output_dir / f"{base_name}_frame_{i:03d}.png"
            if len(frame.shape) == 2:
                img = Image.fromarray(frame, mode='L')
            else:
                img = Image.fromarray(frame)
            img.save(frame_output)
            saved_files.append(str(frame_output))
            print(f"Saved frame {i}: {frame_output}")
        return saved_files
    else:
        if output_path is None:
            output_path = dicom_path.with_suffix('.png')
        else:
            output_path = Path(output_path)
        output_path.parent.mkdir(parents=True, exist_ok=True)
        if len(pixel_array.shape) == 2:
            img = Image.fromarray(pixel_array, mode='L')
        else:
            img = Image.fromarray(pixel_array)
        img.save(output_path)
        print(f"Saved: {output_path}")
        return str(output_path)

def process_directory(input_dir, output_dir=None, apply_windowing=True, normalize=True):
    input_dir = Path(input_dir)
    if output_dir is None:
        output_dir = input_dir / "png_output"
    else:
        output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    dicom_extensions = ['.dcm', '.dicom', '.ima', '']
    dicom_files = []
    for ext in dicom_extensions:
        if ext:
            dicom_files.extend(input_dir.glob(f"*{ext}"))
        else:
            for file in input_dir.iterdir():
                if file.is_file() and not file.suffix:
                    try:
                        pydicom.dcmread(file, stop_before_pixels=True)
                        dicom_files.append(file)
                    except:
                        continue
    if not dicom_files:
        print(f"No DICOM files found in {input_dir}")
        return
    print(f"Found {len(dicom_files)} DICOM files")
    for dicom_file in dicom_files:
        try:
            output_path = output_dir / f"{dicom_file.stem}.png"
            dicom_to_png(dicom_file, output_path, apply_windowing, normalize)
        except Exception as e:
            print(f"Error processing {dicom_file}: {e}")
# Example usage
# process_directory("output", "png-output", True, True)

In [4]:
# Step 1: Convert FastMRI HDF5 to DICOM slices
fastmri_h5_path = Path("/mnt/f/datasets/demo_data/file1000002.h5")  # Update path as needed
output_dicom_dir = Path("output")
fastmri_to_dicom(fastmri_h5_path, "reconstruction_rss", output_dicom_dir, flip_up_down=True, flip_left_right=False)

# Step 2: Convert DICOM slices to PNG images
output_png_dir = Path("png-output")
process_directory(output_dicom_dir, output_png_dir, apply_windowing=True, normalize=True)

print("Pipeline complete. DICOM files are in 'output/', PNG files are in 'png-output/'.")

FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = '/mnt/f/datasets/demo_data/file1000002.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [None]:
# Set random seeds for reproducibility
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.cuda.manual_seed_all(42)
random.seed(42)

# Set deterministic behavior
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Environment setup complete.")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")
    print(f"CUDA memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
print(f"Using device: {device}")


In [None]:
# Load and process annotations from knee.csv
class DataProcessor:
    def __init__(self, csv_path, png_dir):
        self.csv_path = csv_path
        self.png_dir = Path(png_dir)
        self.df = None
        self.subject_labels = {}
        
    def load_annotations(self):
        """Load and process knee annotations"""
        self.df = pd.read_csv(self.csv_path)
        print(f"Loaded {len(self.df)} annotations")
        print(f"Unique files: {self.df['file'].nunique()}")
        print(f"Label distribution:")
        print(self.df['label'].value_counts())
        return self.df
    
    def create_target_labels(self):
        """Create target labels: ACL tear, Meniscus tear, Neither"""
        # Map detailed labels to our target classes
        acl_keywords = ['ACL', 'Anterior Cruciate', 'Anterior Cruciate Ligament', 'ACL High Grade Sprain', 'ACL Low Grade sprain']
        meniscus_keywords = ['Meniscus', 'Meniscus Tear']
        
        subject_conditions = {}
        
        for file_id in self.df['file'].unique():
            file_data = self.df[self.df['file'] == file_id]
            labels = file_data['label'].tolist()
            
            has_acl = any(any(keyword.lower() in label.lower() for keyword in acl_keywords) for label in labels)
            has_meniscus = any(any(keyword.lower() in label.lower() for keyword in meniscus_keywords) for label in labels)
            
            if has_acl and has_meniscus:
                subject_conditions[file_id] = 'Both'  # We'll handle this case
            elif has_acl:
                subject_conditions[file_id] = 'ACL_tear'
            elif has_meniscus:
                subject_conditions[file_id] = 'Meniscus_tear'
            else:
                subject_conditions[file_id] = 'Neither'
        
        self.subject_labels = subject_conditions
        print("\nSubject-level label distribution:")
        label_counts = pd.Series(list(subject_conditions.values())).value_counts()
        print(label_counts)
        
        return subject_conditions
    
    def get_bounding_boxes(self, file_id, slice_num):
        """Get bounding boxes for a specific file and slice"""
        slice_data = self.df[(self.df['file'] == file_id) & (self.df['slice'] == slice_num)]
        boxes = []
        for _, row in slice_data.iterrows():
            boxes.append({
                'x': row['x'], 'y': row['y'], 
                'width': row['width'], 'height': row['height'],
                'label': row['label']
            })
        return boxes
    
    def get_available_images(self):
        """Get list of available PNG images with their labels"""
        available_images = []
        
        for png_file in self.png_dir.glob('*.png'):
            # Extract file ID and slice from filename (e.g., file1000002_000.png)
            filename = png_file.stem
            parts = filename.split('_')
            if len(parts) >= 2:
                file_id = '_'.join(parts[:-1])  # Everything except last part
                slice_num = int(parts[-1])  # Last part is slice number
                
                if file_id in self.subject_labels:
                    available_images.append({
                        'path': str(png_file),
                        'file_id': file_id,
                        'slice': slice_num,
                        'label': self.subject_labels[file_id]
                    })
        
        print(f"\nFound {len(available_images)} available images")
        return available_images

# Initialize data processor
data_processor = DataProcessor('knee.csv', 'png-output')
annotations = data_processor.load_annotations()
subject_labels = data_processor.create_target_labels()
available_images = data_processor.get_available_images()

print(f"\nReady to process {len(available_images)} images from {len(subject_labels)} subjects")


In [None]:
# Data Augmentation for MRI Scans using Albumentations
class MRIAugmentation:
    def __init__(self, image_size=(224, 224)):
        self.image_size = image_size
        
    def get_train_augmentation(self):
        """Augmentation pipeline for training data - appropriate for MRI"""
        return A.Compose([
            A.Resize(self.image_size[0], self.image_size[1], always_apply=True),
            
            # Geometric transformations (conservative for medical images)
            A.HorizontalFlip(p=0.5),  # Anatomically valid
            A.Rotate(limit=10, p=0.3),  # Small rotations only
            A.ShiftScaleRotate(
                shift_limit=0.05, 
                scale_limit=0.05, 
                rotate_limit=5, 
                p=0.3
            ),
            
            # Intensity transformations (important for MRI)
            A.RandomBrightnessContrast(
                brightness_limit=0.1, 
                contrast_limit=0.1, 
                p=0.3
            ),
            A.RandomGamma(gamma_limit=(90, 110), p=0.2),
            
            # Noise and blur (simulate acquisition variations)
            A.GaussNoise(var_limit=(10.0, 50.0), p=0.2),
            A.GaussianBlur(blur_limit=(1, 3), p=0.1),
            
            # Elastic deformation (subtle tissue deformation)
            A.ElasticTransform(
                alpha=30,
                sigma=5,
                alpha_affine=3,
                p=0.1
            ),
            
            # Normalization
            A.Normalize(
                mean=[0.485],  # Grayscale
                std=[0.229],
                max_pixel_value=255.0
            )
        ])
    
    def get_val_augmentation(self):
        """Augmentation pipeline for validation/test data"""
        return A.Compose([
            A.Resize(self.image_size[0], self.image_size[1], always_apply=True),
            A.Normalize(
                mean=[0.485],
                std=[0.229],
                max_pixel_value=255.0
            )
        ])

# Initialize augmentation
augmentation = MRIAugmentation(image_size=(224, 224))
train_transform = augmentation.get_train_augmentation()
val_transform = augmentation.get_val_augmentation()

print("MRI-specific augmentation pipeline created.")
print("Train augmentations: Flip, Rotate, Brightness/Contrast, Noise, Blur")
print("Validation augmentations: Resize, Normalize only")


In [None]:
# Subject-level Data Splitting and Balancing
class SubjectLevelSplitter:
    def __init__(self, available_images, test_size=0.2, val_size=0.2, random_state=42):
        self.available_images = available_images
        self.test_size = test_size
        self.val_size = val_size
        self.random_state = random_state
        
    def split_subjects(self):
        """Split data at subject level to prevent data leakage"""
        # Group images by subject
        subject_data = {}
        for img_info in self.available_images:
            file_id = img_info['file_id']
            if file_id not in subject_data:
                subject_data[file_id] = []
            subject_data[file_id].append(img_info)
        
        # Get unique subjects and their labels
        subjects = list(subject_data.keys())
        subject_labels_list = [subject_data[subj][0]['label'] for subj in subjects]
        
        print(f"Total subjects: {len(subjects)}")
        print(f"Label distribution across subjects:")
        print(pd.Series(subject_labels_list).value_counts())
        
        # First split: train+val vs test
        train_val_subjects, test_subjects = train_test_split(
            subjects, 
            test_size=self.test_size, 
            stratify=subject_labels_list, 
            random_state=self.random_state
        )
        
        # Second split: train vs val
        train_val_labels = [subject_data[subj][0]['label'] for subj in train_val_subjects]
        train_subjects, val_subjects = train_test_split(
            train_val_subjects,
            test_size=self.val_size / (1 - self.test_size),  # Adjust for already removed test set
            stratify=train_val_labels,
            random_state=self.random_state
        )
        
        # Create final datasets
        train_data = []
        val_data = []
        test_data = []
        
        for subj in train_subjects:
            train_data.extend(subject_data[subj])
        for subj in val_subjects:
            val_data.extend(subject_data[subj])
        for subj in test_subjects:
            test_data.extend(subject_data[subj])
        
        print(f"\nData split completed:")
        print(f"Train: {len(train_data)} images from {len(train_subjects)} subjects")
        print(f"Val: {len(val_data)} images from {len(val_subjects)} subjects")
        print(f"Test: {len(test_data)} images from {len(test_subjects)} subjects")
        
        # Print label distribution for each split
        for split_name, split_data in [('Train', train_data), ('Val', val_data), ('Test', test_data)]:
            labels = [item['label'] for item in split_data]
            print(f"\n{split_name} label distribution:")
            print(pd.Series(labels).value_counts())
        
        return train_data, val_data, test_data
    
    def balance_classes(self, data, method='undersample'):
        """Balance classes in the dataset"""
        # Group by label
        label_groups = {}
        for item in data:
            label = item['label']
            if label not in label_groups:
                label_groups[label] = []
            label_groups[label].append(item)
        
        print(f"\nClass distribution before balancing:")
        for label, items in label_groups.items():
            print(f"{label}: {len(items)}")
        
        if method == 'undersample':
            # Undersample to the minority class size
            min_size = min(len(items) for items in label_groups.values())
            balanced_data = []
            
            for label, items in label_groups.items():
                np.random.shuffle(items)
                balanced_data.extend(items[:min_size])
                
        elif method == 'oversample':
            # Oversample to the majority class size
            max_size = max(len(items) for items in label_groups.values())
            balanced_data = []
            
            for label, items in label_groups.items():
                # Repeat items to reach max_size
                multiplier = max_size // len(items)
                remainder = max_size % len(items)
                
                extended_items = items * multiplier
                if remainder > 0:
                    np.random.shuffle(items)
                    extended_items.extend(items[:remainder])
                
                balanced_data.extend(extended_items)
        
        np.random.shuffle(balanced_data)
        
        print(f"\nClass distribution after {method}:")
        balanced_labels = [item['label'] for item in balanced_data]
        print(pd.Series(balanced_labels).value_counts())
        
        return balanced_data

# Perform subject-level splitting
splitter = SubjectLevelSplitter(available_images, test_size=0.2, val_size=0.2)
train_data, val_data, test_data = splitter.split_subjects()

# Balance training data (optional - you can choose undersample or oversample)
train_data_balanced = splitter.balance_classes(train_data, method='oversample')

print(f"\nFinal dataset sizes:")
print(f"Balanced Train: {len(train_data_balanced)}")
print(f"Val: {len(val_data)}")
print(f"Test: {len(test_data)}")


In [None]:
# PyTorch Dataset for Efficient Loading
class MRIDataset(Dataset):
    def __init__(self, data, transform=None, num_classes=3):
        self.data = data
        self.transform = transform
        self.num_classes = num_classes
        
        # Create label encoder
        self.label_encoder = LabelEncoder()
        all_labels = [item['label'] for item in data]
        self.label_encoder.fit(all_labels)
        
        # Pre-encode all labels
        self.encoded_labels = self.label_encoder.transform(all_labels)
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        item = self.data[idx]
        
        # Load image
        image = cv2.imread(item['path'], cv2.IMREAD_GRAYSCALE)
        if image is None:
            # Create dummy image if loading fails
            print(f"Warning: Could not load image {item['path']}")
            image = np.zeros((224, 224), dtype=np.uint8)
        
        # Convert to RGB (repeat grayscale across 3 channels)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
        
        # Apply augmentation
        if self.transform:
            augmented = self.transform(image=image_rgb)
            image_rgb = augmented['image']
        
        # Ensure correct format for PyTorch
        if isinstance(image_rgb, np.ndarray):
            if len(image_rgb.shape) == 3 and image_rgb.shape[2] == 3:
                # Convert from HWC to CHW format for PyTorch
                image_tensor = torch.from_numpy(image_rgb.transpose(2, 0, 1)).float()
            else:
                # Fallback: normalize and convert
                image_rgb = image_rgb.astype(np.float32) / 255.0
                if len(image_rgb.shape) == 2:
                    image_rgb = np.stack([image_rgb] * 3, axis=0)  # CHW format
                else:
                    image_rgb = image_rgb.transpose(2, 0, 1)  # HWC to CHW
                image_tensor = torch.from_numpy(image_rgb).float()
        else:
            # Already a tensor from albumentations
            image_tensor = image_rgb
        
        # Get label
        label = self.encoded_labels[idx]
        
        return image_tensor, label
    
    def get_labels(self):
        return [item['label'] for item in self.data]
    
    def get_class_weights(self):
        labels = self.get_labels()
        label_encoded = self.label_encoder.transform(labels)
        class_weights = compute_class_weight(
            'balanced',
            classes=np.unique(label_encoded),
            y=label_encoded
        )
        return torch.FloatTensor(class_weights).to(device)
    
    def get_weighted_sampler(self):
        """Create weighted sampler for balanced training"""
        labels = self.encoded_labels
        class_counts = np.bincount(labels)
        class_weights = 1.0 / class_counts
        sample_weights = class_weights[labels]
        
        return WeightedRandomSampler(
            weights=sample_weights,
            num_samples=len(sample_weights),
            replacement=True
        )

# Update augmentation to work with PyTorch
class MRIAugmentationPyTorch:
    def __init__(self, image_size=(224, 224)):
        self.image_size = image_size
        
    def get_train_augmentation(self):
        """Augmentation pipeline for training data - appropriate for MRI"""
        return A.Compose([
            A.Resize(self.image_size[0], self.image_size[1], always_apply=True),
            
            # Geometric transformations (conservative for medical images)
            A.HorizontalFlip(p=0.5),  # Anatomically valid
            A.Rotate(limit=10, p=0.3),  # Small rotations only
            A.ShiftScaleRotate(
                shift_limit=0.05, 
                scale_limit=0.05, 
                rotate_limit=5, 
                p=0.3
            ),
            
            # Intensity transformations (important for MRI)
            A.RandomBrightnessContrast(
                brightness_limit=0.1, 
                contrast_limit=0.1, 
                p=0.3
            ),
            A.RandomGamma(gamma_limit=(90, 110), p=0.2),
            
            # Noise and blur (simulate acquisition variations)
            A.GaussNoise(var_limit=(10.0, 50.0), p=0.2),
            A.GaussianBlur(blur_limit=(1, 3), p=0.1),
            
            # Elastic deformation (subtle tissue deformation)
            A.ElasticTransform(
                alpha=30,
                sigma=5,
                alpha_affine=3,
                p=0.1
            ),
            
            # Normalization for PyTorch
            A.Normalize(
                mean=[0.485, 0.456, 0.406],  # ImageNet means for RGB
                std=[0.229, 0.224, 0.225],   # ImageNet stds for RGB
                max_pixel_value=255.0
            ),
            ToTensorV2()  # Convert to PyTorch tensor
        ])
    
    def get_val_augmentation(self):
        """Augmentation pipeline for validation/test data"""
        return A.Compose([
            A.Resize(self.image_size[0], self.image_size[1], always_apply=True),
            A.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
                max_pixel_value=255.0
            ),
            ToTensorV2()
        ])

# Create PyTorch datasets and dataloaders
print("Creating PyTorch datasets and dataloaders...")

# Initialize augmentation
augmentation = MRIAugmentationPyTorch(image_size=(224, 224))
train_transform = augmentation.get_train_augmentation()
val_transform = augmentation.get_val_augmentation()

# Create datasets
train_dataset = MRIDataset(train_data_balanced, transform=train_transform)
val_dataset = MRIDataset(val_data, transform=val_transform)
test_dataset = MRIDataset(test_data, transform=val_transform)

# Create dataloaders
batch_size = 16

# Use weighted sampler for balanced training
train_sampler = train_dataset.get_weighted_sampler()

train_loader = DataLoader(
    train_dataset, 
    batch_size=batch_size, 
    sampler=train_sampler,
    num_workers=4,
    pin_memory=True if torch.cuda.is_available() else False
)

val_loader = DataLoader(
    val_dataset, 
    batch_size=batch_size, 
    shuffle=False,
    num_workers=4,
    pin_memory=True if torch.cuda.is_available() else False
)

test_loader = DataLoader(
    test_dataset, 
    batch_size=batch_size, 
    shuffle=False,
    num_workers=4,
    pin_memory=True if torch.cuda.is_available() else False
)

# Get class weights
class_weights = train_dataset.get_class_weights()

print(f"Train loader: {len(train_loader)} batches")
print(f"Val loader: {len(val_loader)} batches")
print(f"Test loader: {len(test_loader)} batches")
print(f"Class weights: {class_weights}")
print(f"Label encoding: {dict(zip(train_dataset.label_encoder.classes_, range(len(train_dataset.label_encoder.classes_))))}")


In [None]:
# Base Model Architectures in PyTorch
class BaseModelBuilder:
    def __init__(self, num_classes=3):
        self.num_classes = num_classes
    
    def build_resnext50(self, dropout_rate=0.5, freeze_backbone=True):
        """Build ResNeXt-50 model"""
        model = models.resnext50_32x4d(pretrained=True)
        
        # Freeze backbone if specified
        if freeze_backbone:
            for param in model.parameters():
                param.requires_grad = False
        
        # Replace classifier
        num_features = model.fc.in_features
        model.fc = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(num_features, 512),
            nn.ReLU(),
            nn.BatchNorm1d(512),
            nn.Dropout(dropout_rate),
            nn.Linear(512, self.num_classes)
        )
        
        return model
    
    def build_densenet201(self, dropout_rate=0.5, freeze_backbone=True):
        """Build DenseNet-201 model"""
        model = models.densenet201(pretrained=True)
        
        # Freeze backbone if specified
        if freeze_backbone:
            for param in model.features.parameters():
                param.requires_grad = False
        
        # Replace classifier
        num_features = model.classifier.in_features
        model.classifier = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(num_features, 512),
            nn.ReLU(),
            nn.BatchNorm1d(512),
            nn.Dropout(dropout_rate),
            nn.Linear(512, self.num_classes)
        )
        
        return model
    
    def build_efficientnet_b7(self, dropout_rate=0.5, freeze_backbone=True):
        """Build EfficientNet-B7 model"""
        model = models.efficientnet_b7(pretrained=True)
        
        # Freeze backbone if specified
        if freeze_backbone:
            for param in model.features.parameters():
                param.requires_grad = False
        
        # Replace classifier
        num_features = model.classifier[1].in_features
        model.classifier = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(num_features, 1024),
            nn.ReLU(),
            nn.BatchNorm1d(1024),
            nn.Dropout(dropout_rate),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(512, self.num_classes)
        )
        
        return model
    
    def build_xception(self, dropout_rate=0.5, freeze_backbone=True):
        """Build Xception model"""
        model = timm.create_model('xception41', pretrained=True)
        
        # Freeze backbone if specified
        if freeze_backbone:
            for param in model.parameters():
                param.requires_grad = False
            # Unfreeze classifier head
            for param in model.classifier.parameters():
                param.requires_grad = True
        
        # Replace classifier head
        num_features = model.classifier.in_features
        model.classifier = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(num_features, 512),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(512, self.num_classes)
        )
        
        return model
    
    def get_model_by_name(self, model_name, **kwargs):
        """Get model by name"""
        if model_name == 'resnext50':
            return self.build_resnext50(**kwargs)
        elif model_name == 'densenet201':
            return self.build_densenet201(**kwargs)
        elif model_name == 'efficientnet_b7':
            return self.build_efficientnet_b7(**kwargs)
        elif model_name == 'xception':
            return self.build_xception(**kwargs)
        else:
            raise ValueError(f"Unknown model name: {model_name}")

class PyTorchTrainer:
    def __init__(self, model, device, class_weights=None):
        self.model = model.to(device)
        self.device = device
        self.class_weights = class_weights
        
        # Loss function with class weights
        if class_weights is not None:
            self.criterion = nn.CrossEntropyLoss(weight=class_weights)
        else:
            self.criterion = nn.CrossEntropyLoss()
        
        self.training_history = {
            'train_loss': [], 'train_acc': [],
            'val_loss': [], 'val_acc': []
        }
    
    def train_epoch(self, train_loader, optimizer):
        """Train for one epoch"""
        self.model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        train_bar = tqdm(train_loader, desc='Training')
        for batch_idx, (data, target) in enumerate(train_bar):
            data, target = data.to(self.device), target.to(self.device)
            
            optimizer.zero_grad()
            output = self.model(data)
            loss = self.criterion(output, target)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
            
            # Update progress bar
            train_bar.set_postfix({
                'Loss': f'{running_loss/(batch_idx+1):.4f}',
                'Acc': f'{100.*correct/total:.2f}%'
            })
        
        epoch_loss = running_loss / len(train_loader)
        epoch_acc = correct / total
        
        return epoch_loss, epoch_acc
    
    def validate_epoch(self, val_loader):
        """Validate for one epoch"""
        self.model.eval()
        running_loss = 0.0
        correct = 0
        total = 0
        
        with torch.no_grad():
            for data, target in val_loader:
                data, target = data.to(self.device), target.to(self.device)
                output = self.model(data)
                loss = self.criterion(output, target)
                
                running_loss += loss.item()
                _, predicted = torch.max(output.data, 1)
                total += target.size(0)
                correct += (predicted == target).sum().item()
        
        epoch_loss = running_loss / len(val_loader)
        epoch_acc = correct / total
        
        return epoch_loss, epoch_acc
    
    def train(self, train_loader, val_loader, epochs=50, learning_rate=1e-4,
              patience=10, save_path=None):
        """Full training loop"""
        optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
        scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, 
                                     patience=5, verbose=True)
        
        best_val_acc = 0.0
        patience_counter = 0
        
        print(f"Starting training for {epochs} epochs...")
        
        for epoch in range(epochs):
            print(f"\nEpoch {epoch+1}/{epochs}")
            print("-" * 30)
            
            # Train
            train_loss, train_acc = self.train_epoch(train_loader, optimizer)
            
            # Validate
            val_loss, val_acc = self.validate_epoch(val_loader)
            
            # Update learning rate
            scheduler.step(val_acc)
            
            # Save history
            self.training_history['train_loss'].append(train_loss)
            self.training_history['train_acc'].append(train_acc)
            self.training_history['val_loss'].append(val_loss)
            self.training_history['val_acc'].append(val_acc)
            
            print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
            print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
            
            # Early stopping and model saving
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                patience_counter = 0
                if save_path:
                    torch.save(self.model.state_dict(), save_path)
                    print(f"Model saved to {save_path}")
            else:
                patience_counter += 1
                
            if patience_counter >= patience:
                print(f"Early stopping triggered after {epoch+1} epochs")
                break
        
        print(f"\nTraining completed. Best validation accuracy: {best_val_acc:.4f}")
        return self.training_history

# Initialize model builder
model_builder = BaseModelBuilder(num_classes=3)

print("PyTorch model architectures defined:")
print("- ResNeXt-50 (32x4d)")
print("- DenseNet-201")
print("- EfficientNet-B7")
print("- Xception-41")
print("\nAll models use transfer learning with ImageNet pretrained weights.")


In [None]:
# Genetic Algorithm for Hyperparameter Optimization (PyTorch)
class GeneticOptimizerPyTorch:
    def __init__(self, model_name, model_builder, train_loader, val_loader, device):
        self.model_name = model_name
        self.model_builder = model_builder
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.device = device
        self.setup_deap()
    
    def setup_deap(self):
        """Setup DEAP genetic algorithm framework"""
        # Create fitness class (maximize validation accuracy)
        if hasattr(creator, 'FitnessMax'):
            del creator.FitnessMax
        if hasattr(creator, 'Individual'):
            del creator.Individual
            
        creator.create("FitnessMax", base.Fitness, weights=(1.0,))
        creator.create("Individual", list, fitness=creator.FitnessMax)
        
        self.toolbox = base.Toolbox()
        
        # Hyperparameter ranges
        self.param_ranges = {
            'learning_rate': (1e-5, 1e-2),
            'dropout_rate': (0.2, 0.7),
            'weight_decay': (1e-6, 1e-3),
            'batch_size': [8, 16, 32]  # Limited options for memory
        }
        
        # Register genetic operators
        self.toolbox.register("learning_rate", random.uniform, *self.param_ranges['learning_rate'])
        self.toolbox.register("dropout_rate", random.uniform, *self.param_ranges['dropout_rate'])
        self.toolbox.register("weight_decay", random.uniform, *self.param_ranges['weight_decay'])
        self.toolbox.register("batch_size", random.choice, self.param_ranges['batch_size'])
        
        self.toolbox.register("individual", tools.initCycle, creator.Individual,
                            (self.toolbox.learning_rate, self.toolbox.dropout_rate,
                             self.toolbox.weight_decay, self.toolbox.batch_size), n=1)
        
        self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual)
        self.toolbox.register("evaluate", self.evaluate_individual)
        self.toolbox.register("mate", tools.cxTwoPoint)
        self.toolbox.register("mutate", self.mutate_individual, indpb=0.2)
        self.toolbox.register("select", tools.selTournament, tournsize=3)
    
    def evaluate_individual(self, individual):
        """Evaluate an individual (set of hyperparameters)"""
        learning_rate, dropout_rate, weight_decay, batch_size = individual
        
        try:
            # Build model with current hyperparameters
            model = self.model_builder.get_model_by_name(
                self.model_name, 
                dropout_rate=dropout_rate, 
                freeze_backbone=True
            )
            
            # Create trainer
            trainer = PyTorchTrainer(model, self.device, class_weights)
            
            # Create optimizer
            optimizer = optim.Adam(
                model.parameters(), 
                lr=learning_rate, 
                weight_decay=weight_decay
            )
            
            # Quick training for evaluation (reduced epochs)
            model.train()
            total_correct = 0
            total_samples = 0
            
            # Train for a few batches only
            for batch_idx, (data, target) in enumerate(self.train_loader):
                if batch_idx >= 5:  # Limit to 5 batches for speed
                    break
                    
                data, target = data.to(self.device), target.to(self.device)
                optimizer.zero_grad()
                output = model(data)
                loss = trainer.criterion(output, target)
                loss.backward()
                optimizer.step()
            
            # Quick validation
            model.eval()
            with torch.no_grad():
                for batch_idx, (data, target) in enumerate(self.val_loader):
                    if batch_idx >= 3:  # Limit validation batches
                        break
                        
                    data, target = data.to(self.device), target.to(self.device)
                    output = model(data)
                    _, predicted = torch.max(output.data, 1)
                    total_samples += target.size(0)
                    total_correct += (predicted == target).sum().item()
            
            val_accuracy = total_correct / total_samples if total_samples > 0 else 0.0
            
            # Clean up GPU memory
            del model, trainer, optimizer
            torch.cuda.empty_cache() if torch.cuda.is_available() else None
            
            return (val_accuracy,)
            
        except Exception as e:
            print(f"Error evaluating individual: {e}")
            # Clean up on error
            torch.cuda.empty_cache() if torch.cuda.is_available() else None
            return (0.0,)
    
    def mutate_individual(self, individual, indpb):
        """Mutate an individual"""
        if random.random() < indpb:
            individual[0] = random.uniform(*self.param_ranges['learning_rate'])
        if random.random() < indpb:
            individual[1] = random.uniform(*self.param_ranges['dropout_rate'])
        if random.random() < indpb:
            individual[2] = random.uniform(*self.param_ranges['weight_decay'])
        if random.random() < indpb:
            individual[3] = random.choice(self.param_ranges['batch_size'])
        
        return individual,
    
    def optimize(self, population_size=8, generations=3):
        """Run genetic algorithm optimization"""
        print(f"Starting genetic optimization for {self.model_name}...")
        print(f"Population size: {population_size}, Generations: {generations}")
        
        # Create initial population
        population = self.toolbox.population(n=population_size)
        
        # Track statistics
        stats = tools.Statistics(lambda ind: ind.fitness.values)
        stats.register("avg", np.mean)
        stats.register("min", np.min)
        stats.register("max", np.max)
        
        # Run genetic algorithm
        population, logbook = algorithms.eaSimple(
            population, self.toolbox,
            cxpb=0.7,
            mutpb=0.3,
            ngen=generations,
            stats=stats,
            verbose=True
        )
        
        # Get best individual
        best_individual = tools.selBest(population, 1)[0]
        best_params = {
            'learning_rate': best_individual[0],
            'dropout_rate': best_individual[1],
            'weight_decay': best_individual[2],
            'batch_size': int(best_individual[3]),
            'fitness': best_individual.fitness.values[0]
        }
        
        print(f"\nBest parameters for {self.model_name}:")
        for param, value in best_params.items():
            print(f"{param}: {value}")
        
        return best_params, logbook

print("PyTorch Genetic Algorithm optimizer defined.")
print("Ready to optimize hyperparameters for each base model.")
print("Note: Optimization is faster but less thorough due to PyTorch memory management.")


In [None]:
# Train Base Models with Optimized Hyperparameters (PyTorch)
class PyTorchModelTrainer:
    def __init__(self, model_builder, train_loader, val_loader, test_loader, class_weights, device):
        self.model_builder = model_builder
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader = test_loader
        self.class_weights = class_weights
        self.device = device
        self.trained_models = {}
        self.training_histories = {}
        self.best_params = {}
    
    def optimize_and_train_model(self, model_name, optimize_hyperparams=True, 
                                default_params=None, epochs=50):
        """Optimize hyperparameters and train a model"""
        print(f"\n{'='*50}")
        print(f"Training {model_name.upper()}")
        print(f"{'='*50}")
        
        if optimize_hyperparams:
            # Genetic algorithm optimization
            optimizer = GeneticOptimizerPyTorch(
                model_name, self.model_builder, 
                self.train_loader, self.val_loader, self.device
            )
            best_params, _ = optimizer.optimize(population_size=4, generations=2)  # Reduced for demo
            self.best_params[model_name] = best_params
        else:
            # Use default parameters
            best_params = default_params or {
                'learning_rate': 1e-4,
                'dropout_rate': 0.5,
                'weight_decay': 1e-4,
                'batch_size': 16
            }
            self.best_params[model_name] = best_params
        
        print(f"\nUsing parameters: {best_params}")
        
        # Build model with best parameters
        model = self.model_builder.get_model_by_name(
            model_name, 
            dropout_rate=best_params['dropout_rate'], 
            freeze_backbone=True
        )
        
        print(f"Model architecture for {model_name}:")
        total_params = sum(p.numel() for p in model.parameters())
        trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
        print(f"Total parameters: {total_params:,}")
        print(f"Trainable parameters: {trainable_params:,}")
        
        # Phase 1: Train with frozen backbone
        print("\nPhase 1: Training with frozen backbone...")
        trainer = PyTorchTrainer(model, self.device, self.class_weights)
        
        history1 = trainer.train(
            self.train_loader, self.val_loader,
            epochs=epochs // 2,
            learning_rate=best_params['learning_rate'],
            save_path=f'best_{model_name}_phase1.pth'
        )
        
        # Phase 2: Fine-tune with unfrozen backbone
        print("\nPhase 2: Fine-tuning with unfrozen backbone...")
        
        # Unfreeze backbone gradually
        if model_name == 'resnext50':
            # Unfreeze last few layers
            for param in model.layer4.parameters():
                param.requires_grad = True
        elif model_name == 'densenet201':
            # Unfreeze last dense block
            for param in model.features.denseblock4.parameters():
                param.requires_grad = True
        elif model_name == 'efficientnet_b7':
            # Unfreeze last few blocks
            for param in list(model.features.children())[-3:][0].parameters():
                param.requires_grad = True
        elif model_name == 'xception':
            # Unfreeze last few transformer blocks
            for param in model.encoder.layers[-2:].parameters():
                param.requires_grad = True
        
        # Create new trainer with lower learning rate
        trainer_ft = PyTorchTrainer(model, self.device, self.class_weights)
        
        history2 = trainer_ft.train(
            self.train_loader, self.val_loader,
            epochs=epochs // 2,
            learning_rate=best_params['learning_rate'] / 10,  # Lower LR for fine-tuning
            save_path=f'best_{model_name}_final.pth'
        )
        
        # Combine histories
        combined_history = {}
        for key in history1.keys():
            combined_history[key] = history1[key] + history2[key]
        
        # Store model and history
        self.trained_models[model_name] = model
        self.training_histories[model_name] = combined_history
        
        print(f"\n{model_name.upper()} training completed!")
        
        return model, combined_history
    
    def train_all_models(self, optimize_hyperparams=False, epochs=30):
        """Train all base models"""
        model_names = ['resnext50', 'densenet201', 'efficientnet_b7', 'xception']
        
        # Default parameters for quick training (if not optimizing)
        default_params = {
            'resnext50': {'learning_rate': 1e-4, 'dropout_rate': 0.4, 'weight_decay': 1e-4, 'batch_size': 16},
            'densenet201': {'learning_rate': 1e-4, 'dropout_rate': 0.6, 'weight_decay': 1e-4, 'batch_size': 16},
            'efficientnet_b7': {'learning_rate': 5e-5, 'dropout_rate': 0.5, 'weight_decay': 1e-4, 'batch_size': 8},
            'xception': {'learning_rate': 1e-4, 'dropout_rate': 0.3, 'weight_decay': 1e-4, 'batch_size': 16}
        }
        
        for model_name in model_names:
            try:
                self.optimize_and_train_model(
                    model_name, 
                    optimize_hyperparams=optimize_hyperparams,
                    default_params=default_params.get(model_name),
                    epochs=epochs
                )
                
                # Clear GPU memory between models
                torch.cuda.empty_cache() if torch.cuda.is_available() else None
                
            except Exception as e:
                print(f"Error training {model_name}: {e}")
                continue
        
        print(f"\nTraining completed for {len(self.trained_models)} models.")
        return self.trained_models, self.training_histories
    
    def evaluate_model(self, model, data_loader, model_name):
        """Evaluate a trained model"""
        model.eval()
        all_preds = []
        all_targets = []
        all_probs = []
        
        print(f"Evaluating {model_name}...")
        
        with torch.no_grad():
            for data, target in tqdm(data_loader, desc='Evaluating'):
                data, target = data.to(self.device), target.to(self.device)
                output = model(data)
                
                # Get probabilities and predictions
                probs = F.softmax(output, dim=1)
                _, preds = torch.max(output, 1)
                
                all_preds.extend(preds.cpu().numpy())
                all_targets.extend(target.cpu().numpy())
                all_probs.extend(probs.cpu().numpy())
        
        return np.array(all_targets), np.array(all_preds), np.array(all_probs)

# Initialize PyTorch trainer
trainer = PyTorchModelTrainer(
    model_builder, train_loader, val_loader, test_loader, 
    class_weights, device
)

print("PyTorch model trainer initialized.")
print("Ready to train base models with or without hyperparameter optimization.")
print(f"Training will be performed on: {device}")


In [None]:
# Model Evaluation and Metrics (PyTorch)
class PyTorchModelEvaluator:
    def __init__(self, label_encoder, device):
        self.label_encoder = label_encoder
        self.class_names = label_encoder.classes_
        self.device = device
    
    def evaluate_model(self, model, test_loader, model_name):
        """Comprehensive model evaluation"""
        print(f"\n{'='*50}")
        print(f"EVALUATING {model_name.upper()}")
        print(f"{'='*50}")
        
        model.eval()
        all_preds = []
        all_targets = []
        all_probs = []
        
        with torch.no_grad():
            for data, target in tqdm(test_loader, desc='Evaluating'):
                data, target = data.to(self.device), target.to(self.device)
                output = model(data)
                
                # Get probabilities and predictions
                probs = F.softmax(output, dim=1)
                _, preds = torch.max(output, 1)
                
                all_preds.extend(preds.cpu().numpy())
                all_targets.extend(target.cpu().numpy())
                all_probs.extend(probs.cpu().numpy())
        
        y_true = np.array(all_targets)
        y_pred = np.array(all_preds)
        y_pred_proba = np.array(all_probs)
        
        # Basic metrics
        accuracy = accuracy_score(y_true, y_pred)
        print(f"Test Accuracy: {accuracy:.4f}")
        
        # Classification report
        report = classification_report(
            y_true, y_pred, 
            target_names=self.class_names,
            output_dict=True
        )
        
        print("\nClassification Report:")
        print(classification_report(y_true, y_pred, target_names=self.class_names))
        
        # ROC AUC for multi-class
        try:
            auc_scores = []
            for i in range(len(self.class_names)):
                y_true_binary = (y_true == i).astype(int)
                auc = roc_auc_score(y_true_binary, y_pred_proba[:, i])
                auc_scores.append(auc)
                print(f"AUC for {self.class_names[i]}: {auc:.4f}")
            
            macro_auc = np.mean(auc_scores)
            print(f"Macro-averaged AUC: {macro_auc:.4f}")
        except Exception as e:
            print(f"Could not compute AUC: {e}")
            auc_scores = []
            macro_auc = 0
        
        # Confusion Matrix
        cm = confusion_matrix(y_true, y_pred)
        
        # Store results
        results = {
            'model_name': model_name,
            'accuracy': accuracy,
            'classification_report': report,
            'confusion_matrix': cm,
            'auc_scores': auc_scores,
            'macro_auc': macro_auc,
            'y_true': y_true,
            'y_pred': y_pred,
            'y_pred_proba': y_pred_proba
        }
        
        return results
    
    def plot_training_history(self, history, model_name):
        """Plot training history"""
        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
        fig.suptitle(f'{model_name.upper()} Training History', fontsize=16)
        
        epochs = range(1, len(history['train_loss']) + 1)
        
        # Loss
        axes[0, 0].plot(epochs, history['train_loss'], label='Training')
        axes[0, 0].plot(epochs, history['val_loss'], label='Validation')
        axes[0, 0].set_title('Model Loss')
        axes[0, 0].set_xlabel('Epoch')
        axes[0, 0].set_ylabel('Loss')
        axes[0, 0].legend()
        axes[0, 0].grid(True)
        
        # Accuracy
        axes[0, 1].plot(epochs, history['train_acc'], label='Training')
        axes[0, 1].plot(epochs, history['val_acc'], label='Validation')
        axes[0, 1].set_title('Model Accuracy')
        axes[0, 1].set_xlabel('Epoch')
        axes[0, 1].set_ylabel('Accuracy')
        axes[0, 1].legend()
        axes[0, 1].grid(True)
        
        # Learning rate (if available)
        axes[1, 0].text(0.5, 0.5, 'Learning Rate\nScheduling Applied', 
                        ha='center', va='center', transform=axes[1, 0].transAxes,
                        fontsize=12)
        axes[1, 0].set_title('Learning Rate Schedule')
        
        # Best metrics summary
        best_train_acc = max(history['train_acc'])
        best_val_acc = max(history['val_acc'])
        final_train_loss = history['train_loss'][-1]
        final_val_loss = history['val_loss'][-1]
        
        metrics_text = f"Best Train Acc: {best_train_acc:.4f}\n"
        metrics_text += f"Best Val Acc: {best_val_acc:.4f}\n"
        metrics_text += f"Final Train Loss: {final_train_loss:.4f}\n"
        metrics_text += f"Final Val Loss: {final_val_loss:.4f}"
        
        axes[1, 1].text(0.1, 0.5, metrics_text, transform=axes[1, 1].transAxes,
                        fontsize=11, verticalalignment='center')
        axes[1, 1].set_title('Training Summary')
        axes[1, 1].axis('off')
        
        plt.tight_layout()
        plt.show()
    
    def plot_confusion_matrix(self, cm, model_name):
        """Plot confusion matrix"""
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                   xticklabels=self.class_names,
                   yticklabels=self.class_names)
        plt.title(f'{model_name.upper()} - Confusion Matrix')
        plt.xlabel('Predicted')
        plt.ylabel('Actual')
        plt.tight_layout()
        plt.show()
    
    def plot_roc_curves(self, results_dict):
        """Plot ROC curves for all models"""
        plt.figure(figsize=(15, 5))
        
        for i, class_name in enumerate(self.class_names):
            plt.subplot(1, 3, i+1)
            
            for model_name, results in results_dict.items():
                if len(results['auc_scores']) > i:
                    y_true_binary = (results['y_true'] == i).astype(int)
                    y_pred_proba_binary = results['y_pred_proba'][:, i]
                    
                    fpr, tpr, _ = roc_curve(y_true_binary, y_pred_proba_binary)
                    auc = results['auc_scores'][i]
                    
                    plt.plot(fpr, tpr, label=f'{model_name} (AUC = {auc:.3f})')
            
            plt.plot([0, 1], [0, 1], 'k--', alpha=0.6)
            plt.xlabel('False Positive Rate')
            plt.ylabel('True Positive Rate')
            plt.title(f'ROC Curve - {class_name}')
            plt.legend()
            plt.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
    
    def compare_models(self, results_dict):
        """Compare all models performance"""
        comparison_data = []
        
        for model_name, results in results_dict.items():
            comparison_data.append({
                'Model': model_name.capitalize(),
                'Accuracy': results['accuracy'],
                'Macro AUC': results['macro_auc'],
                'Precision (Macro)': results['classification_report']['macro avg']['precision'],
                'Recall (Macro)': results['classification_report']['macro avg']['recall'],
                'F1-Score (Macro)': results['classification_report']['macro avg']['f1-score']
            })
        
        comparison_df = pd.DataFrame(comparison_data)
        
        print("\n" + "="*80)
        print("MODEL COMPARISON SUMMARY")
        print("="*80)
        print(comparison_df.to_string(index=False, float_format='%.4f'))
        
        # Plot comparison
        fig, axes = plt.subplots(1, 2, figsize=(15, 6))
        
        # Accuracy comparison
        axes[0].bar(comparison_df['Model'], comparison_df['Accuracy'], 
                   color=['skyblue', 'lightcoral', 'lightgreen', 'orange'])
        axes[0].set_title('Model Accuracy Comparison')
        axes[0].set_ylabel('Accuracy')
        axes[0].set_ylim(0, 1)
        for i, v in enumerate(comparison_df['Accuracy']):
            axes[0].text(i, v + 0.01, f'{v:.3f}', ha='center', va='bottom')
        
        # Multi-metric comparison
        metrics = ['Accuracy', 'Macro AUC', 'Precision (Macro)', 'Recall (Macro)', 'F1-Score (Macro)']
        x = np.arange(len(comparison_df))
        width = 0.15
        
        for i, metric in enumerate(metrics):
            axes[1].bar(x + i*width, comparison_df[metric], width, 
                       label=metric, alpha=0.8)
        
        axes[1].set_title('Multi-Metric Comparison')
        axes[1].set_ylabel('Score')
        axes[1].set_xlabel('Models')
        axes[1].set_xticks(x + width * 2)
        axes[1].set_xticklabels(comparison_df['Model'])
        axes[1].legend()
        axes[1].set_ylim(0, 1)
        
        plt.tight_layout()
        plt.show()
        
        return comparison_df

# Initialize PyTorch evaluator
evaluator = PyTorchModelEvaluator(train_dataset.label_encoder, device)

print("PyTorch model evaluator initialized.")
print("Ready to evaluate trained models and generate comprehensive metrics.")


In [None]:
# Sequential Ensemble Learning Model (PyTorch)
class PyTorchSequentialEnsemble:
    def __init__(self, base_models, model_names, label_encoder, device):
        self.base_models = base_models
        self.model_names = model_names
        self.label_encoder = label_encoder
        self.device = device
        self.ensemble_weights = None
        self.meta_model = None
        self.num_classes = len(label_encoder.classes_)
    
    def extract_features(self, data_loader, verbose=True):
        """Extract features from all base models"""
        if verbose:
            print("Extracting features from base models...")
        
        base_predictions = {}
        
        for name, model in zip(self.model_names, self.base_models):
            if verbose:
                print(f"Extracting features from {name}...")
            
            model.eval()
            predictions = []
            
            with torch.no_grad():
                for data, _ in tqdm(data_loader, desc=f'Extracting {name}', disable=not verbose):
                    data = data.to(self.device)
                    output = model(data)
                    probs = F.softmax(output, dim=1)
                    predictions.append(probs.cpu().numpy())
            
            base_predictions[name] = np.vstack(predictions)
        
        return base_predictions
    
    def simple_voting_ensemble(self, test_loader, voting_type='soft'):
        """Simple voting ensemble (soft or hard voting)"""
        print(f"\nPerforming {voting_type} voting ensemble...")
        
        base_predictions = self.extract_features(test_loader)
        
        if voting_type == 'soft':
            # Average probabilities
            ensemble_pred_proba = np.mean(
                [pred for pred in base_predictions.values()], axis=0
            )
            ensemble_pred = np.argmax(ensemble_pred_proba, axis=1)
        else:
            # Hard voting - majority vote
            hard_predictions = []
            for pred in base_predictions.values():
                hard_predictions.append(np.argmax(pred, axis=1))
            
            ensemble_pred = []
            for i in range(len(hard_predictions[0])):
                votes = [pred[i] for pred in hard_predictions]
                ensemble_pred.append(max(set(votes), key=votes.count))
            
            ensemble_pred = np.array(ensemble_pred)
            ensemble_pred_proba = None
        
        return ensemble_pred, ensemble_pred_proba
    
    def weighted_ensemble(self, val_loader, test_loader, method='accuracy'):
        """Weighted ensemble based on individual model performance"""
        print(f"\nCreating weighted ensemble based on {method}...")
        
        # Get validation predictions for weight calculation
        val_predictions = self.extract_features(val_loader)
        
        # Get true labels
        y_val_true = []
        for _, target in val_loader:
            y_val_true.extend(target.numpy())
        y_val_true = np.array(y_val_true)
        
        # Calculate weights based on performance
        weights = []
        for name, pred in val_predictions.items():
            y_pred = np.argmax(pred, axis=1)[:len(y_val_true)]
            
            if method == 'accuracy':
                weight = accuracy_score(y_val_true, y_pred)
            elif method == 'auc':
                try:
                    auc_scores = []
                    for i in range(self.num_classes):
                        y_true_binary = (y_val_true == i).astype(int)
                        auc = roc_auc_score(y_true_binary, pred[:len(y_val_true), i])
                        auc_scores.append(auc)
                    weight = np.mean(auc_scores)
                except:
                    weight = 0.5  # Fallback weight
            
            weights.append(weight)
            print(f"{name} weight ({method}): {weight:.4f}")
        
        # Normalize weights
        weights = np.array(weights)
        weights = weights / np.sum(weights)
        
        print(f"Normalized weights: {dict(zip(self.model_names, weights))}")
        self.ensemble_weights = weights
        
        # Get test predictions
        test_predictions = self.extract_features(test_loader)
        
        # Weighted average
        weighted_pred_proba = np.zeros_like(list(test_predictions.values())[0])
        for i, (name, pred) in enumerate(test_predictions.items()):
            weighted_pred_proba += weights[i] * pred
        
        ensemble_pred = np.argmax(weighted_pred_proba, axis=1)
        
        return ensemble_pred, weighted_pred_proba
    
    def meta_learner_ensemble(self, train_loader, val_loader, test_loader):
        """Meta-learner ensemble using a neural network"""
        print("\nTraining meta-learner ensemble...")
        
        # Extract features from training set
        print("Extracting training features...")
        train_predictions = self.extract_features(train_loader, verbose=False)
        
        # Prepare meta-training data
        X_meta_train = np.hstack([pred for pred in train_predictions.values()])
        
        # Get true labels for training
        y_meta_train = []
        for _, target in train_loader:
            y_meta_train.extend(target.numpy())
        y_meta_train = np.array(y_meta_train[:len(X_meta_train)])
        
        # Extract features from validation set
        print("Extracting validation features...")
        val_predictions = self.extract_features(val_loader, verbose=False)
        X_meta_val = np.hstack([pred for pred in val_predictions.values()])
        
        # Get true labels for validation
        y_meta_val = []
        for _, target in val_loader:
            y_meta_val.extend(target.numpy())
        y_meta_val = np.array(y_meta_val[:len(X_meta_val)])
        
        # Build meta-model
        meta_input_dim = X_meta_train.shape[1]
        
        class MetaLearner(nn.Module):
            def __init__(self, input_dim, num_classes):
                super(MetaLearner, self).__init__()
                self.layers = nn.Sequential(
                    nn.Linear(input_dim, 128),
                    nn.ReLU(),
                    nn.Dropout(0.3),
                    nn.Linear(128, 64),
                    nn.ReLU(),
                    nn.Dropout(0.3),
                    nn.Linear(64, num_classes)
                )
            
            def forward(self, x):
                return self.layers(x)
        
        self.meta_model = MetaLearner(meta_input_dim, self.num_classes).to(self.device)
        
        # Prepare data for PyTorch
        X_train_tensor = torch.FloatTensor(X_meta_train).to(self.device)
        y_train_tensor = torch.LongTensor(y_meta_train).to(self.device)
        X_val_tensor = torch.FloatTensor(X_meta_val).to(self.device)
        y_val_tensor = torch.LongTensor(y_meta_val).to(self.device)
        
        # Create data loaders for meta-training
        meta_train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
        meta_val_dataset = torch.utils.data.TensorDataset(X_val_tensor, y_val_tensor)
        
        meta_train_loader = DataLoader(meta_train_dataset, batch_size=32, shuffle=True)
        meta_val_loader = DataLoader(meta_val_dataset, batch_size=32, shuffle=False)
        
        # Train meta-model
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(self.meta_model.parameters(), lr=0.001)
        scheduler = ReduceLROnPlateau(optimizer, patience=5, factor=0.5)
        
        print("Training meta-model...")
        meta_history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}
        
        best_val_acc = 0.0
        patience_counter = 0
        
        for epoch in range(50):
            # Training
            self.meta_model.train()
            train_loss = 0
            train_correct = 0
            train_total = 0
            
            for X_batch, y_batch in meta_train_loader:
                optimizer.zero_grad()
                outputs = self.meta_model(X_batch)
                loss = criterion(outputs, y_batch)
                loss.backward()
                optimizer.step()
                
                train_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                train_total += y_batch.size(0)
                train_correct += (predicted == y_batch).sum().item()
            
            # Validation
            self.meta_model.eval()
            val_loss = 0
            val_correct = 0
            val_total = 0
            
            with torch.no_grad():
                for X_batch, y_batch in meta_val_loader:
                    outputs = self.meta_model(X_batch)
                    loss = criterion(outputs, y_batch)
                    
                    val_loss += loss.item()
                    _, predicted = torch.max(outputs.data, 1)
                    val_total += y_batch.size(0)
                    val_correct += (predicted == y_batch).sum().item()
            
            # Calculate metrics
            train_acc = train_correct / train_total
            val_acc = val_correct / val_total
            
            meta_history['train_loss'].append(train_loss / len(meta_train_loader))
            meta_history['train_acc'].append(train_acc)
            meta_history['val_loss'].append(val_loss / len(meta_val_loader))
            meta_history['val_acc'].append(val_acc)
            
            scheduler.step(val_acc)
            
            if epoch % 10 == 0:
                print(f"Epoch {epoch}: Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}")
            
            # Early stopping
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                patience_counter = 0
            else:
                patience_counter += 1
                
            if patience_counter >= 10:
                print(f"Early stopping at epoch {epoch}")
                break
        
        # Get test predictions
        print("Generating ensemble predictions...")
        test_predictions = self.extract_features(test_loader, verbose=False)
        X_meta_test = np.hstack([pred for pred in test_predictions.values()])
        X_test_tensor = torch.FloatTensor(X_meta_test).to(self.device)
        
        # Meta-model predictions
        self.meta_model.eval()
        with torch.no_grad():
            ensemble_output = self.meta_model(X_test_tensor)
            ensemble_pred_proba = F.softmax(ensemble_output, dim=1).cpu().numpy()
        
        ensemble_pred = np.argmax(ensemble_pred_proba, axis=1)
        
        return ensemble_pred, ensemble_pred_proba, meta_history
    
    def evaluate_ensemble(self, y_true, y_pred, y_pred_proba, ensemble_name):
        """Evaluate ensemble performance"""
        print(f"\n{'='*50}")
        print(f"ENSEMBLE EVALUATION: {ensemble_name.upper()}")
        print(f"{'='*50}")
        
        # Basic metrics
        accuracy = accuracy_score(y_true, y_pred)
        print(f"Ensemble Accuracy: {accuracy:.4f}")
        
        # Classification report
        report = classification_report(
            y_true, y_pred,
            target_names=self.label_encoder.classes_,
            output_dict=True
        )
        
        print("\nClassification Report:")
        print(classification_report(y_true, y_pred, target_names=self.label_encoder.classes_))
        
        # AUC scores
        if y_pred_proba is not None:
            try:
                auc_scores = []
                for i in range(self.num_classes):
                    y_true_binary = (y_true == i).astype(int)
                    auc = roc_auc_score(y_true_binary, y_pred_proba[:, i])
                    auc_scores.append(auc)
                    print(f"AUC for {self.label_encoder.classes_[i]}: {auc:.4f}")
                
                macro_auc = np.mean(auc_scores)
                print(f"Macro-averaged AUC: {macro_auc:.4f}")
            except Exception as e:
                print(f"Could not compute AUC: {e}")
                auc_scores = []
                macro_auc = 0
        else:
            auc_scores = []
            macro_auc = 0
        
        # Confusion matrix
        cm = confusion_matrix(y_true, y_pred)
        
        results = {
            'ensemble_name': ensemble_name,
            'accuracy': accuracy,
            'classification_report': report,
            'confusion_matrix': cm,
            'auc_scores': auc_scores,
            'macro_auc': macro_auc,
            'y_pred': y_pred,
            'y_pred_proba': y_pred_proba
        }
        
        return results

print("PyTorch Sequential Ensemble Learning class defined.")
print("Ready to create ensemble models from trained base models.")


In [None]:
# Execute Complete Pipeline (PyTorch)

# Configuration
QUICK_MODE = True  # Set to False for full optimization with genetic algorithm
EPOCHS = 10 if QUICK_MODE else 40
OPTIMIZE_HYPERPARAMS = False if QUICK_MODE else True

print(f"Running in {'QUICK' if QUICK_MODE else 'FULL'} mode")
print(f"Epochs: {EPOCHS}")
print(f"Hyperparameter optimization: {OPTIMIZE_HYPERPARAMS}")
print(f"Device: {device}")
print("\n" + "="*60)
print("STARTING COMPLETE ENSEMBLE LEARNING PIPELINE (PyTorch)")
print("="*60)

# Step 1: Train all base models
print("\nSTEP 1: Training Base Models")
print("-" * 40)

trained_models, training_histories = trainer.train_all_models(
    optimize_hyperparams=OPTIMIZE_HYPERPARAMS, 
    epochs=EPOCHS
)

print(f"\nTrained {len(trained_models)} base models successfully.")

# Step 2: Evaluate individual base models
print("\nSTEP 2: Evaluating Base Models")
print("-" * 40)

base_model_results = {}
for model_name, model in trained_models.items():
    try:
        # Get test predictions
        y_true, y_pred, y_pred_proba = trainer.evaluate_model(model, test_loader, model_name)
        
        # Create results dictionary in the same format as evaluator expects
        accuracy = accuracy_score(y_true, y_pred)
        report = classification_report(y_true, y_pred, target_names=evaluator.class_names, output_dict=True)
        
        # Calculate AUC scores
        auc_scores = []
        try:
            for i in range(len(evaluator.class_names)):
                y_true_binary = (y_true == i).astype(int)
                auc = roc_auc_score(y_true_binary, y_pred_proba[:, i])
                auc_scores.append(auc)
            macro_auc = np.mean(auc_scores)
        except:
            auc_scores = []
            macro_auc = 0
        
        cm = confusion_matrix(y_true, y_pred)
        
        results = {
            'model_name': model_name,
            'accuracy': accuracy,
            'classification_report': report,
            'confusion_matrix': cm,
            'auc_scores': auc_scores,
            'macro_auc': macro_auc,
            'y_true': y_true,
            'y_pred': y_pred,
            'y_pred_proba': y_pred_proba
        }
        
        base_model_results[model_name] = results
        
        # Print results
        print(f"\n{model_name.upper()} Results:")
        print(f"Accuracy: {accuracy:.4f}")
        print(f"Macro AUC: {macro_auc:.4f}")
        
        # Plot training history
        evaluator.plot_training_history(training_histories[model_name], model_name)
        
        # Plot confusion matrix
        evaluator.plot_confusion_matrix(results['confusion_matrix'], model_name)
        
        # Clear GPU memory
        torch.cuda.empty_cache() if torch.cuda.is_available() else None
        
    except Exception as e:
        print(f"Error evaluating {model_name}: {e}")
        import traceback
        traceback.print_exc()

# Compare base models
if base_model_results:
    print("\nSTEP 2.1: Base Model Comparison")
    comparison_df = evaluator.compare_models(base_model_results)
    
    # Plot ROC curves
    evaluator.plot_roc_curves(base_model_results)

print(f"\nBase model evaluation completed for {len(base_model_results)} models.")

# Memory management
if torch.cuda.is_available():
    print(f"GPU memory used: {torch.cuda.memory_allocated(device) / 1024**3:.2f} GB")
    print(f"GPU memory cached: {torch.cuda.memory_reserved(device) / 1024**3:.2f} GB")


In [None]:
# Step 3: Create and Evaluate Ensemble Models (PyTorch)
print("\nSTEP 3: Creating Ensemble Models")
print("-" * 40)

if len(trained_models) >= 2:  # Need at least 2 models for ensemble
    # Initialize ensemble
    model_list = list(trained_models.values())
    model_names = list(trained_models.keys())
    
    ensemble = PyTorchSequentialEnsemble(
        base_models=model_list,
        model_names=model_names,
        label_encoder=train_dataset.label_encoder,
        device=device
    )
    
    # Get true test labels for evaluation
    y_test_true = []
    for _, target in test_loader:
        y_test_true.extend(target.numpy())
    y_test_true = np.array(y_test_true)
    
    ensemble_results = {}
    
    # 3.1: Simple Soft Voting Ensemble
    print("\n3.1: Soft Voting Ensemble")
    try:
        soft_pred, soft_pred_proba = ensemble.simple_voting_ensemble(
            test_loader, voting_type='soft'
        )
        
        # Ensure same length
        min_len = min(len(y_test_true), len(soft_pred))
        y_test_true_trimmed = y_test_true[:min_len]
        soft_pred_trimmed = soft_pred[:min_len]
        soft_pred_proba_trimmed = soft_pred_proba[:min_len] if soft_pred_proba is not None else None
        
        soft_results = ensemble.evaluate_ensemble(
            y_test_true_trimmed, soft_pred_trimmed, soft_pred_proba_trimmed, "Soft Voting"
        )
        ensemble_results['soft_voting'] = soft_results
        
        # Plot confusion matrix
        evaluator.plot_confusion_matrix(soft_results['confusion_matrix'], "Soft Voting Ensemble")
        
        # Clear memory
        torch.cuda.empty_cache() if torch.cuda.is_available() else None
        
    except Exception as e:
        print(f"Error with soft voting ensemble: {e}")
        import traceback
        traceback.print_exc()
    
    # 3.2: Hard Voting Ensemble
    print("\n3.2: Hard Voting Ensemble")
    try:
        hard_pred, hard_pred_proba = ensemble.simple_voting_ensemble(
            test_loader, voting_type='hard'
        )
        
        # Ensure same length
        min_len = min(len(y_test_true), len(hard_pred))
        y_test_true_trimmed = y_test_true[:min_len]
        hard_pred_trimmed = hard_pred[:min_len]
        
        hard_results = ensemble.evaluate_ensemble(
            y_test_true_trimmed, hard_pred_trimmed, None, "Hard Voting"
        )
        ensemble_results['hard_voting'] = hard_results
        
        # Plot confusion matrix
        evaluator.plot_confusion_matrix(hard_results['confusion_matrix'], "Hard Voting Ensemble")
        
        # Clear memory
        torch.cuda.empty_cache() if torch.cuda.is_available() else None
        
    except Exception as e:
        print(f"Error with hard voting ensemble: {e}")
        import traceback
        traceback.print_exc()
    
    # 3.3: Weighted Ensemble (based on accuracy)
    print("\n3.3: Weighted Ensemble")
    try:
        weighted_pred, weighted_pred_proba = ensemble.weighted_ensemble(
            val_loader, test_loader, method='accuracy'
        )
        
        # Ensure same length
        min_len = min(len(y_test_true), len(weighted_pred))
        y_test_true_trimmed = y_test_true[:min_len]
        weighted_pred_trimmed = weighted_pred[:min_len]
        weighted_pred_proba_trimmed = weighted_pred_proba[:min_len] if weighted_pred_proba is not None else None
        
        weighted_results = ensemble.evaluate_ensemble(
            y_test_true_trimmed, weighted_pred_trimmed, weighted_pred_proba_trimmed, "Weighted Ensemble"
        )
        ensemble_results['weighted'] = weighted_results
        
        # Plot confusion matrix
        evaluator.plot_confusion_matrix(weighted_results['confusion_matrix'], "Weighted Ensemble")
        
        # Clear memory
        torch.cuda.empty_cache() if torch.cuda.is_available() else None
        
    except Exception as e:
        print(f"Error with weighted ensemble: {e}")
        import traceback
        traceback.print_exc()
    
    # 3.4: Meta-learner Ensemble (if not in quick mode)
    if not QUICK_MODE:
        print("\n3.4: Meta-learner Ensemble")
        try:
            meta_pred, meta_pred_proba, meta_history = ensemble.meta_learner_ensemble(
                train_loader, val_loader, test_loader
            )
            
            # Ensure same length
            min_len = min(len(y_test_true), len(meta_pred))
            y_test_true_trimmed = y_test_true[:min_len]
            meta_pred_trimmed = meta_pred[:min_len]
            meta_pred_proba_trimmed = meta_pred_proba[:min_len] if meta_pred_proba is not None else None
            
            meta_results = ensemble.evaluate_ensemble(
                y_test_true_trimmed, meta_pred_trimmed, meta_pred_proba_trimmed, "Meta-learner Ensemble"
            )
            ensemble_results['meta_learner'] = meta_results
            
            # Plot confusion matrix
            evaluator.plot_confusion_matrix(meta_results['confusion_matrix'], "Meta-learner Ensemble")
            
            # Plot meta-model training history
            plt.figure(figsize=(12, 4))
            
            epochs = range(1, len(meta_history['train_acc']) + 1)
            
            plt.subplot(1, 2, 1)
            plt.plot(epochs, meta_history['train_acc'], label='Training')
            plt.plot(epochs, meta_history['val_acc'], label='Validation')
            plt.title('Meta-model Accuracy')
            plt.xlabel('Epoch')
            plt.ylabel('Accuracy')
            plt.legend()
            plt.grid(True)
            
            plt.subplot(1, 2, 2)
            plt.plot(epochs, meta_history['train_loss'], label='Training')
            plt.plot(epochs, meta_history['val_loss'], label='Validation')
            plt.title('Meta-model Loss')
            plt.xlabel('Epoch')
            plt.ylabel('Loss')
            plt.legend()
            plt.grid(True)
            
            plt.tight_layout()
            plt.show()
            
            # Clear memory
            torch.cuda.empty_cache() if torch.cuda.is_available() else None
            
        except Exception as e:
            print(f"Error with meta-learner ensemble: {e}")
            import traceback
            traceback.print_exc()
    else:
        print("\n3.4: Meta-learner Ensemble (Skipped in QUICK_MODE)")
    
    print(f"\nEnsemble evaluation completed for {len(ensemble_results)} ensemble methods.")
    
    # Final memory cleanup
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        print(f"\nFinal GPU memory used: {torch.cuda.memory_allocated(device) / 1024**3:.2f} GB")
        print(f"Final GPU memory cached: {torch.cuda.memory_reserved(device) / 1024**3:.2f} GB")
    
else:
    print("Not enough trained models for ensemble learning.")
    ensemble_results = {}


In [None]:
# Step 4: Final Results Summary and Comparison (PyTorch)
print("\nSTEP 4: Final Results Summary")
print("="*60)

# Combine all results for comprehensive comparison
all_results = {}

# Add base model results
for model_name, results in base_model_results.items():
    all_results[f"Base: {model_name.capitalize()}"] = {
        'accuracy': results['accuracy'],
        'macro_auc': results['macro_auc'],
        'precision': results['classification_report']['macro avg']['precision'],
        'recall': results['classification_report']['macro avg']['recall'],
        'f1_score': results['classification_report']['macro avg']['f1-score']
    }

# Add ensemble results
for ensemble_name, results in ensemble_results.items():
    all_results[f"Ensemble: {ensemble_name.replace('_', ' ').title()}"] = {
        'accuracy': results['accuracy'],
        'macro_auc': results['macro_auc'],
        'precision': results['classification_report']['macro avg']['precision'],
        'recall': results['classification_report']['macro avg']['recall'],
        'f1_score': results['classification_report']['macro avg']['f1-score']
    }

# Create comprehensive comparison
if all_results:
    final_comparison = pd.DataFrame.from_dict(all_results, orient='index')
    final_comparison = final_comparison.round(4)
    
    print("\nCOMPREHENSIVE MODEL COMPARISON (PyTorch)")
    print("-" * 80)
    print(final_comparison.to_string(float_format='%.4f'))
    
    # Find best performing models
    best_accuracy = final_comparison['accuracy'].max()
    best_auc = final_comparison['macro_auc'].max()
    best_f1 = final_comparison['f1_score'].max()
    
    best_accuracy_model = final_comparison[final_comparison['accuracy'] == best_accuracy].index[0]
    best_auc_model = final_comparison[final_comparison['macro_auc'] == best_auc].index[0]
    best_f1_model = final_comparison[final_comparison['f1_score'] == best_f1].index[0]
    
    print(f"\nBEST PERFORMING MODELS:")
    print(f"Highest Accuracy: {best_accuracy_model} ({best_accuracy:.4f})")
    print(f"Highest AUC: {best_auc_model} ({best_auc:.4f})")
    print(f"Highest F1-Score: {best_f1_model} ({best_f1:.4f})")
    
    # Visualization
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    
    # Accuracy comparison
    axes[0, 0].barh(range(len(final_comparison)), final_comparison['accuracy'], 
                    color=['lightblue' if 'Base:' in idx else 'lightcoral' for idx in final_comparison.index])
    axes[0, 0].set_yticks(range(len(final_comparison)))
    axes[0, 0].set_yticklabels(final_comparison.index, fontsize=8)
    axes[0, 0].set_xlabel('Accuracy')
    axes[0, 0].set_title('Model Accuracy Comparison (PyTorch)')
    axes[0, 0].grid(True, alpha=0.3)
    
    # AUC comparison
    axes[0, 1].barh(range(len(final_comparison)), final_comparison['macro_auc'], 
                    color=['lightblue' if 'Base:' in idx else 'lightcoral' for idx in final_comparison.index])
    axes[0, 1].set_yticks(range(len(final_comparison)))
    axes[0, 1].set_yticklabels(final_comparison.index, fontsize=8)
    axes[0, 1].set_xlabel('Macro AUC')
    axes[0, 1].set_title('Model AUC Comparison (PyTorch)')
    axes[0, 1].grid(True, alpha=0.3)
    
    # F1-Score comparison
    axes[1, 0].barh(range(len(final_comparison)), final_comparison['f1_score'], 
                    color=['lightblue' if 'Base:' in idx else 'lightcoral' for idx in final_comparison.index])
    axes[1, 0].set_yticks(range(len(final_comparison)))
    axes[1, 0].set_yticklabels(final_comparison.index, fontsize=8)
    axes[1, 0].set_xlabel('F1-Score')
    axes[1, 0].set_title('Model F1-Score Comparison (PyTorch)')
    axes[1, 0].grid(True, alpha=0.3)
    
    # Multi-metric radar chart for top 3 models
    top_3_models = final_comparison.nlargest(3, 'accuracy')
    
    metrics = ['accuracy', 'macro_auc', 'precision', 'recall', 'f1_score']
    angles = np.linspace(0, 2 * np.pi, len(metrics), endpoint=False).tolist()
    angles += angles[:1]  # Complete the circle
    
    ax = plt.subplot(2, 2, 4, projection='polar')
    
    colors = ['red', 'blue', 'green']
    for i, (model_name, row) in enumerate(top_3_models.iterrows()):
        values = [row[metric] for metric in metrics]
        values += values[:1]  # Complete the circle
        
        ax.plot(angles, values, 'o-', linewidth=2, label=model_name[:20], color=colors[i])
        ax.fill(angles, values, alpha=0.25, color=colors[i])
    
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels([m.replace('_', ' ').title() for m in metrics])
    ax.set_ylim(0, 1)
    ax.set_title('Top 3 Models - Multi-Metric Comparison')
    ax.legend(loc='upper right', bbox_to_anchor=(0.1, 0.1))
    ax.grid(True)
    
    plt.tight_layout()
    plt.show()

# Print hyperparameters used (if optimization was performed)
if trainer.best_params:
    print("\nOPTIMIZED HYPERPARAMETERS (PyTorch):")
    print("-" * 40)
    for model_name, params in trainer.best_params.items():
        print(f"\n{model_name.upper()}:")
        for param, value in params.items():
            if param != 'fitness':
                print(f"  {param}: {value}")
        if 'fitness' in params:
            print(f"  Validation Accuracy: {params['fitness']:.4f}")

# Summary statistics
print("\nPYTORCH PIPELINE EXECUTION SUMMARY:")
print("="*50)
print(f"Framework: PyTorch {torch.__version__}")
print(f"Device used: {device}")
print(f"Total base models trained: {len(trained_models)}")
print(f"Total ensemble methods tested: {len(ensemble_results)}")
print(f"Total evaluation metrics computed: {len(all_results)}")

if all_results:
    print(f"Best overall accuracy: {final_comparison['accuracy'].max():.4f}")
    print(f"Best overall AUC: {final_comparison['macro_auc'].max():.4f}")
    print(f"Best overall F1-score: {final_comparison['f1_score'].max():.4f}")

if QUICK_MODE:
    print("\nNote: Pipeline was run in QUICK mode. For production use, consider:")
    print("- Setting QUICK_MODE = False for full optimization")
    print("- Increasing EPOCHS for better convergence")
    print("- Running genetic algorithm optimization")
    print("- Using larger population sizes and more generations")
    print("- Enabling meta-learner ensemble")

# Final memory cleanup and statistics
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print(f"\nFinal GPU memory statistics:")
    print(f"- Memory allocated: {torch.cuda.memory_allocated(device) / 1024**3:.2f} GB")
    print(f"- Memory cached: {torch.cuda.memory_reserved(device) / 1024**3:.2f} GB")
    print(f"- Max memory allocated: {torch.cuda.max_memory_allocated(device) / 1024**3:.2f} GB")
 
print("\n" + "="*60)
print("PyTorch ENSEMBLE LEARNING PIPELINE COMPLETED SUCCESSFULLY!")