In [1]:

!pip install fslpy==2.7.0

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/share/pkg.7/python3/3.7.9/install/bin/python3.7 -m pip install --upgrade pip' command.[0m


In [2]:
import sys
print(sys.version)

3.7.9 (default, Oct 26 2020, 11:27:26) 
[GCC 4.8.5 20150623 (Red Hat 4.8.5-39)]


In [3]:
!pip install --user https://github.com/ANTsX/ANTsPy/releases/download/v0.1.8/antspyx-0.1.7-cp37-cp37m-linux_x86_64.whl

You should consider upgrading via the '/share/pkg.7/python3/3.7.9/install/bin/python3.7 -m pip install --upgrade pip' command.[0m


In [7]:
import os
import shutil
import random
import csv
from pathlib import Path
import logging
from tqdm import tqdm

def setup_logging(root_directory):
    """Setup logging configuration"""
    log_dir = Path(root_directory) / 'logs'
    log_dir.mkdir(parents=True, exist_ok=True)
    
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        handlers=[
            logging.FileHandler(log_dir / 'subset_creation.log'),
            logging.StreamHandler()
        ]
    )
    return logging.getLogger('SubsetCreator')

def read_hemorrhage_labels(csv_path):
    """Read hemorrhage labels from CSV file"""
    labels = {}
    try:
        with open(csv_path, 'r') as f:
            csv_reader = csv.reader(f)
            next(csv_reader)  # Skip header row
            for row in csv_reader:
                if len(row) == 2:
                    h_type, label = row
                    labels[h_type] = int(label)
        return labels
    except Exception as e:
        print(f"Error reading {csv_path}: {str(e)}")
        return None

def get_image_hemorrhage_status(image_id, root_path):
    """Check if image has any hemorrhage type"""
    # Find corresponding hemorrhage_labels.csv
    for subdir in root_path.glob('ID_*'):
        if subdir.is_dir() and image_id in subdir.name:
            csv_path = subdir / 'hemorrhage_labels.csv'
            if csv_path.exists():
                labels = read_hemorrhage_labels(csv_path)
                if labels:
                    # Check for any hemorrhage type except 'any'
                    return any(labels[h_type] == 1 for h_type in labels if h_type != 'any')
    return False

def create_balanced_subset(root_directory, subset_size=10, hemorrhage_ratio=0.5, seed=42):
    """
    Create a balanced subset of the data
    
    Args:
        root_directory (str): Root directory containing original data
        subset_size (int): Number of images to include in subset
        hemorrhage_ratio (float): Desired ratio of hemorrhage cases (0-1)
        seed (int): Random seed for reproducibility
    """
    logger = setup_logging(root_directory)
    root_path = Path(root_directory)
    
    # Setup directories
    train_dir = root_path / 'trainingImages'
    subset_dir = root_path / 'subset_test'
    subset_dir.mkdir(exist_ok=True)
    
    # Get all images and their hemorrhage status
    logger.info("Analyzing images for hemorrhage status...")
    image_status = {}
    for img_path in tqdm(list(train_dir.glob('*.nii*')), desc="Checking images"):
        image_id = img_path.stem.split('.')[0]  # Remove extension
        if image_id.endswith('_Eq_1'):
            image_id = image_id[:-5]
        has_hemorrhage = get_image_hemorrhage_status(image_id, root_path)
        image_status[img_path] = has_hemorrhage
    
    # Split into hemorrhage and non-hemorrhage cases
    hemorrhage_cases = [path for path, has_hem in image_status.items() if has_hem]
    non_hemorrhage_cases = [path for path, has_hem in image_status.items() if not has_hem]
    
    logger.info(f"Found {len(hemorrhage_cases)} hemorrhage cases and {len(non_hemorrhage_cases)} non-hemorrhage cases")
    
    # Calculate numbers for balanced subset
    num_hemorrhage = int(subset_size * hemorrhage_ratio)
    num_non_hemorrhage = subset_size - num_hemorrhage
    
    # Randomly select cases
    random.seed(seed)
    selected_hemorrhage = random.sample(hemorrhage_cases, min(num_hemorrhage, len(hemorrhage_cases)))
    selected_non_hemorrhage = random.sample(non_hemorrhage_cases, min(num_non_hemorrhage, len(non_hemorrhage_cases)))
    
    # Combine selections
    selected_images = selected_hemorrhage + selected_non_hemorrhage
    
    # Copy selected images to subset directory
    logger.info(f"Creating subset with {len(selected_images)} images "
               f"({len(selected_hemorrhage)} hemorrhage, {len(selected_non_hemorrhage)} non-hemorrhage)")
    
    # Save selection details
    with open(subset_dir / 'subset_info.txt', 'w') as f:
        f.write("Subset Contents:\n\n")
        f.write("Hemorrhage Cases:\n")
        for img_path in selected_hemorrhage:
            f.write(f"{img_path.name}\n")
            shutil.copy2(img_path, subset_dir / img_path.name)
            
        f.write("\nNon-hemorrhage Cases:\n")
        for img_path in selected_non_hemorrhage:
            f.write(f"{img_path.name}\n")
            shutil.copy2(img_path, subset_dir / img_path.name)
    
    logger.info(f"\nSubset created in: {subset_dir}")
    logger.info(f"Total images in subset: {len(selected_images)}")
    logger.info(f"Hemorrhage cases: {len(selected_hemorrhage)}")
    logger.info(f"Non-hemorrhage cases: {len(selected_non_hemorrhage)}")
    
    return subset_dir

def run_segmentation_on_subset(subset_dir):
    """Run DeepBleed segmentation on the subset"""
    logger = logging.getLogger('DeepBleed')
    
    # Setup directories for DeepBleed
    input_dir = subset_dir / 'deepbleed_input'
    output_dir = subset_dir / 'deepbleed_output'
    
    input_dir.mkdir(exist_ok=True)
    output_dir.mkdir(exist_ok=True)
    
    # Process subset images
    for image_file in tqdm(list(subset_dir.glob('*.nii*')), desc="Processing subset"):
        try:
            if image_file.suffix == '.txt':
                continue
                
            # Clear and recreate input directory
            if input_dir.exists():
                shutil.rmtree(str(input_dir))
            input_dir.mkdir()
            
            # Copy image to input directory
            input_path = input_dir / image_file.name
            shutil.copy2(image_file, input_path)
            
            # Run DeepBleed prediction
            cmd = f"python3 predict.py --verbose --indir {input_dir} --outdir {output_dir} --weights weights"
            logger.info(f"Running: {cmd}")
            
            result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
            
            if result.returncode != 0:
                logger.error(f"Error processing {image_file.name}:")
                logger.error(result.stderr)
            else:
                logger.info(f"Successfully processed {image_file.name}")
            
        except Exception as e:
            logger.error(f"Error processing {image_file.name}: {str(e)}")

def main():
    try:
        root_dir = input("Enter the root directory path: ").strip()
        subset_size = int(input("Enter number of images for subset (default 10): ").strip() or "10")
        hemorrhage_ratio = float(input("Enter desired ratio of hemorrhage cases (0-1, default 0.5): ").strip() or "0.5")
        
        # Create balanced subset
        print("\nCreating balanced data subset...")
        subset_dir = create_balanced_subset(root_dir, subset_size, hemorrhage_ratio)
        
        # Run segmentation
        print("\nRunning DeepBleed segmentation on subset...")
        run_segmentation_on_subset(subset_dir)
        
    except Exception as e:
        print(f"Error: {str(e)}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()

Enter the root directory path: /projectnb/ec523kb/projects/hemorrhage-classification/stage_2_train_sorted_nifti_pruned
Enter number of images for subset (default 10): 10
Enter desired ratio of hemorrhage cases (0-1, default 0.5): 0.5


2024-12-10 01:27:06,778 - SubsetCreator - INFO - Analyzing images for hemorrhage status...
Checking images:   0%|          | 0/4492 [00:00<?, ?it/s]


Creating balanced data subset...


Checking images: 100%|██████████| 4492/4492 [01:09<00:00, 64.99it/s]
2024-12-10 01:28:15,970 - SubsetCreator - INFO - Found 1829 hemorrhage cases and 2663 non-hemorrhage cases
2024-12-10 01:28:15,971 - SubsetCreator - INFO - Creating subset with 10 images (5 hemorrhage, 5 non-hemorrhage)
2024-12-10 01:28:16,434 - SubsetCreator - INFO - 
Subset created in: /projectnb/ec523kb/projects/hemorrhage-classification/stage_2_train_sorted_nifti_pruned/subset_test
2024-12-10 01:28:16,434 - SubsetCreator - INFO - Total images in subset: 10
2024-12-10 01:28:16,435 - SubsetCreator - INFO - Hemorrhage cases: 5
2024-12-10 01:28:16,435 - SubsetCreator - INFO - Non-hemorrhage cases: 5
Processing subset:   0%|          | 0/10 [00:00<?, ?it/s]2024-12-10 01:28:16,449 - DeepBleed - INFO - Running: python3 predict.py --verbose --indir /projectnb/ec523kb/projects/hemorrhage-classification/stage_2_train_sorted_nifti_pruned/subset_test/deepbleed_input --outdir /projectnb/ec523kb/projects/hemorrhage-classificati


Running DeepBleed segmentation on subset...





In [5]:
# Check CUDA and GPU availability
import os
import tensorflow as tf
print("CUDA_VISIBLE_DEVICES:", os.environ.get('CUDA_VISIBLE_DEVICES'))
print("GPUs available:", tf.config.list_physical_devices('GPU'))

CUDA_VISIBLE_DEVICES: 1
GPUs available: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [2]:
import os
import subprocess
from pathlib import Path

# Set paths
subset_dir = Path("/projectnb/ec523kb/projects/hemorrhage-classification/stage_2_train_sorted_nifti_pruned/subset_test")
deepbleed_dir = subset_dir.parent / 'deepbleed'
input_dir = subset_dir / 'deepbleed_input'
output_dir = subset_dir / 'deepbleed_output'
weights_dir = deepbleed_dir / 'weights'

print("Paths set up:")
print(f"Input directory: {input_dir}")
print(f"Output directory: {output_dir}")
print(f"Weights directory: {weights_dir}")

# Create directories
input_dir.mkdir(exist_ok=True)
output_dir.mkdir(exist_ok=True)
weights_dir.mkdir(exist_ok=True, parents=True)

# Clone DeepBleed if not exists
if not deepbleed_dir.exists():
    print("\nCloning DeepBleed...")
    subprocess.run(
        f"git clone https://github.com/msharrock/deepbleed.git {deepbleed_dir}",
        shell=True, check=True
    )

# Download and set up weights
if not (weights_dir / 'weights.index').exists():
    print("\nDownloading weights...")
    os.chdir(str(weights_dir))
    subprocess.run("wget -O weights.zip https://www.dropbox.com/s/v2ptd9mfpo13gcb/mistie_2-20200122T175000Z-001.zip?dl=1", 
                  shell=True, check=True)
    subprocess.run("unzip -j weights.zip", shell=True, check=True)
    subprocess.run("""for i in _data-00001-of-00002 _data-00000-of-00002 _index; 
                     do out=`echo ${i} | sed "s/_/weights./"`; mv ${i} ${out}; 
                     done""", shell=True, check=True)

# Go to DeepBleed directory
os.chdir(str(deepbleed_dir))
print(f"\nCurrent working directory: {os.getcwd()}")

# Run the prediction
print("\nRunning prediction...")
cmd = f"python3 predict.py --indir {input_dir} --outdir {output_dir} --weights {weights_dir}/weights"
print(f"Command: {cmd}")

result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
print("\nOutput:", result.stdout)
if result.stderr:
    print("\nErrors:", result.stderr)

Paths set up:
Input directory: /projectnb/ec523kb/projects/hemorrhage-classification/stage_2_train_sorted_nifti_pruned/subset_test/deepbleed_input
Output directory: /projectnb/ec523kb/projects/hemorrhage-classification/stage_2_train_sorted_nifti_pruned/subset_test/deepbleed_output
Weights directory: /projectnb/ec523kb/projects/hemorrhage-classification/stage_2_train_sorted_nifti_pruned/deepbleed/weights

Current working directory: /projectnb/ec523kb/projects/hemorrhage-classification/stage_2_train_sorted_nifti_pruned/deepbleed

Running prediction...
Command: python3 predict.py --indir /projectnb/ec523kb/projects/hemorrhage-classification/stage_2_train_sorted_nifti_pruned/subset_test/deepbleed_input --outdir /projectnb/ec523kb/projects/hemorrhage-classification/stage_2_train_sorted_nifti_pruned/subset_test/deepbleed_output --weights /projectnb/ec523kb/projects/hemorrhage-classification/stage_2_train_sorted_nifti_pruned/deepbleed/weights/weights

Output: ANTsImage (LAI)
	 Pixel Type : fl

In [1]:
!nvidia-smi

Tue Dec 10 14:52:06 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 565.57.01              Driver Version: 565.57.01      CUDA Version: 12.7     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla V100-SXM2-16GB           On  |   00000000:18:00.0 Off |                    0 |
| N/A   42C    P0             44W /  300W |       1MiB /  16384MiB |      0%   E. Process |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  Tesla V100-SXM2-16GB          

In [None]:
#full process

In [None]:
import os
import subprocess
from pathlib import Path
from tqdm import tqdm
import shutil
import time
import tensorflow as tf
import multiprocessing as mp
from queue import Empty

def configure_gpu_memory():
    """Configure TF 2.1.0 GPU memory growth"""
    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
        try:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
        except RuntimeError as e:
            print(f"GPU memory configuration failed: {e}")

def process_single_image(gpu_queue, img_path, base_paths, result_queue):
    """Process a single image with the next available GPU"""
    try:
        # Get GPU ID from queue
        gpu_id = gpu_queue.get()
        deepbleed_dir, input_dir, output_dir, weights_dir = base_paths
        
        # Configure GPU for this process
        os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
        configure_gpu_memory()
        
        # Create GPU-specific input directory
        gpu_input_dir = input_dir / f"gpu_{gpu_id}"
        if gpu_input_dir.exists():
            shutil.rmtree(str(gpu_input_dir))
        gpu_input_dir.mkdir(parents=True)
        
        # Copy image to GPU-specific input directory
        shutil.copy2(img_path, gpu_input_dir / img_path.name)
        
        # Change to DeepBleed directory
        os.chdir(str(deepbleed_dir))
        
        # Run DeepBleed command
        cmd = f"python3 predict.py --verbose --indir {gpu_input_dir} --outdir {output_dir} --weights {weights_dir}/weights"
        
        result = subprocess.run(cmd, 
                              shell=True, 
                              text=True,
                              capture_output=True)
        
        success = result.returncode == 0
        message = "Success" if success else result.stderr
        
    except Exception as e:
        success = False
        message = str(e)
    
    finally:
        # Clean up
        if 'gpu_input_dir' in locals() and gpu_input_dir.exists():
            shutil.rmtree(str(gpu_input_dir))
        
        # Put GPU back in queue and send result
        gpu_queue.put(gpu_id)
        result_queue.put((img_path.name, success, message))

def run_deepbleed():
    # Configure GPU memory growth at start
    configure_gpu_memory()
    
    # Setup paths
    root_dir = Path('/projectnb/ec523kb/projects/hemorrhage-classification')
    training_dir = root_dir / 'stage_2_train_sorted_nifti_pruned/trainingImages'
    deepbleed_dir = root_dir / 'stage_2_train_sorted_nifti_pruned/deepbleed'
    input_dir = root_dir / 'stage_2_train_sorted_nifti_pruned/deepbleed_input'
    output_dir = root_dir / 'stage_2_train_sorted_nifti_pruned/deepbleed_output'
    weights_dir = deepbleed_dir / 'weights'
    
    # Get available GPUs
    num_gpus = len(tf.config.experimental.list_physical_devices('GPU'))
    print(f"Found {num_gpus} GPUs")
    
    if num_gpus == 0:
        print("No GPUs found. Exiting.")
        return
    
    # Create directories
    input_dir.mkdir(exist_ok=True)
    output_dir.mkdir(exist_ok=True)
    
    # Get list of all images
    training_images = list(training_dir.glob('*.nii*'))
    print(f"Found {len(training_images)} images to process")
    
    # Create GPU queue and fill it with available GPUs
    gpu_queue = mp.Queue()
    for i in range(num_gpus):
        gpu_queue.put(i)
    
    # Create result queue
    result_queue = mp.Queue()
    
    # Prepare base paths
    base_paths = (deepbleed_dir, input_dir, output_dir, weights_dir)
    
    # Process images with managed GPU allocation
    processes = []
    completed = 0
    successful = 0
    failed = 0
    failed_images = []
    
    with tqdm(total=len(training_images), desc="Processing images") as pbar:
        # Start initial batch of processes
        while len(processes) < num_gpus and completed < len(training_images):
            p = mp.Process(
                target=process_single_image,
                args=(gpu_queue, training_images[completed], base_paths, result_queue)
            )
            p.start()
            processes.append((p, training_images[completed].name))
            completed += 1
            time.sleep(5)  # Delay between process starts
        
        # Process remaining images and handle completions
        while processes or completed < len(training_images):
            # Start new processes if GPUs are available
            while len(processes) < num_gpus and completed < len(training_images):
                p = mp.Process(
                    target=process_single_image,
                    args=(gpu_queue, training_images[completed], base_paths, result_queue)
                )
                p.start()
                processes.append((p, training_images[completed].name))
                completed += 1
                time.sleep(5)
            
            # Check for completed processes
            for i in range(len(processes) - 1, -1, -1):
                process, img_name = processes[i]
                try:
                    result_name, success, message = result_queue.get_nowait()
                    if success:
                        successful += 1
                        print(f"\nSuccessfully processed: {result_name}")
                    else:
                        failed += 1
                        failed_images.append(result_name)
                        print(f"\nFailed to process {result_name}: {message}")
                    
                    pbar.update(1)
                    process.join()
                    processes.pop(i)
                    
                except Empty:
                    if not process.is_alive():
                        failed += 1
                        failed_images.append(img_name)
                        processes.pop(i)
                        pbar.update(1)
            
            time.sleep(1)
    
    # Print summary
    print("\nProcessing Complete!")
    print(f"Successfully processed: {successful}")
    print(f"Failed: {failed}")
    if failed_images:
        print("\nFailed images:")
        for img in failed_images:
            print(f"  - {img}")

if __name__ == "__main__":
    # Use spawn method for clean process creation
    mp.set_start_method('spawn', force=True)
    run_deepbleed()

Processing images:   0%|          | 0/4492 [00:00<?, ?it/s]

Found 4 GPUs
Found 4492 images to process


Processing images:  29%|██▊       | 1289/4492 [1:53:15<4:40:48,  5.26s/it]

In [2]:
!python3 -c "import tensorflow as tf; print(tf.__version__)"
!nvidia-smi  # To check CUDA version

2.1.0
Wed Dec 11 00:29:34 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 565.57.01              Driver Version: 565.57.01      CUDA Version: 12.7     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla V100-SXM2-16GB           On  |   00000000:18:00.0 Off |                    0 |
| N/A   46C    P0             74W /  300W |     384MiB /  16384MiB |      0%   E. Process |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  Tesla V100-SXM2-16GB           On  