# ICN3D Benchmark

This notebook benchmarks ICN3D (icn3dpy) for protein structure visualization against FlatProt and other visualization tools.

ICN3D is a web-based 3D protein structure viewer that can be embedded in Jupyter notebooks using the icn3dpy package.

In [None]:
# Install required packages
!pip install icn3dpy polars psutil

In [None]:
import csv
import glob
import os
import sys
import tempfile
import time
from pathlib import Path
from typing import List, Tuple, Optional

import polars as pl
import psutil
import icn3dpy

# Add parent directory to path to import FlatProt benchmark utilities
sys.path.append(str(Path.cwd().parent))

print(f"ICN3D version: {icn3dpy.__version__ if hasattr(icn3dpy, '__version__') else 'unknown'}")
print(f"Current working directory: {Path.cwd()}")

## Benchmark Configuration

In [None]:
# Configuration
SMALL_STRUCTURE_GLOB = "../data/**/*.cif"  # Adjust path as needed
N_ITERATIONS = 5
OUTPUT_FILE = Path("icn3d_benchmark_results.csv")

# Find structure files
small_structure_files = [Path(p) for p in glob.glob(SMALL_STRUCTURE_GLOB, recursive=True)]
print(f"Found {len(small_structure_files)} structure files:")
for f in small_structure_files[:5]:  # Show first 5
    print(f"  {f}")
if len(small_structure_files) > 5:
    print(f"  ... and {len(small_structure_files) - 5} more")

## Benchmark Utility Functions

In [None]:
def measure_execution_notebook(func, *args, **kwargs) -> Tuple[float, float, int, str]:
    """Execute a function and measure its performance in notebook context.
    
    Parameters
    ----------
    func
        The function to execute
    *args, **kwargs
        Arguments to pass to the function
        
    Returns
    -------
    A tuple containing:
        - Execution time in seconds (float)
        - Peak memory usage in MB (float)
        - Exit code (int) - 0 for success, 1 for error
        - Error message (str)
    """
    start_time = time.monotonic()
    initial_memory = psutil.Process().memory_info().rss / (1024 * 1024)  # MB
    peak_memory_mb = initial_memory
    
    try:
        # Monitor memory during execution
        process = psutil.Process()
        result = func(*args, **kwargs)
        
        # Get peak memory (approximate since we can't monitor continuously in notebook)
        current_memory = process.memory_info().rss / (1024 * 1024)  # MB
        peak_memory_mb = max(peak_memory_mb, current_memory)
        
        exec_time = time.monotonic() - start_time
        return exec_time, peak_memory_mb - initial_memory, 0, ""
        
    except Exception as e:
        exec_time = time.monotonic() - start_time
        return exec_time, peak_memory_mb - initial_memory, 1, str(e)


def log_result(writer, tool: str, method: str, structure: str, iteration: int, 
               exec_time: float, memory: float, exit_code: int, error_msg: str):
    """Write a benchmark result to the CSV file."""
    writer.writerow([
        tool, method, structure, iteration,
        f"{exec_time:.4f}", f"{memory:.4f}", exit_code, error_msg, "N/A"
    ])

print("Utility functions defined successfully")

## ICN3D Benchmark Functions

In [None]:
def icn3d_single_structure(structure_path: Path, temp_dir: Path) -> str:
    """Load and render a single structure using ICN3D.
    
    Parameters
    ----------
    structure_path
        Path to the structure file
    temp_dir
        Temporary directory for output (not used by ICN3D but kept for consistency)
        
    Returns
    -------
    str
        Status message
    """
    try:
        # Load structure using file URL format
        file_url = f"url=pdb|{structure_path.absolute()}"
        view = icn3dpy.view(q=file_url)
        
        # Trigger rendering by accessing view properties
        # Note: ICN3D is primarily for interactive viewing, not static output
        if hasattr(view, 'show'):
            view.show()
        
        return f"Successfully loaded {structure_path.name}"
        
    except Exception as e:
        raise RuntimeError(f"Failed to load structure {structure_path.name}: {str(e)}")


def icn3d_mmdb_structure(mmdb_id: str, temp_dir: Path) -> str:
    """Load and render a structure from MMDB using ICN3D.
    
    Parameters
    ----------
    mmdb_id
        MMDB identifier
    temp_dir
        Temporary directory for output (not used but kept for consistency)
        
    Returns
    -------
    str
        Status message
    """
    try:
        # Load structure using MMDB ID
        view = icn3dpy.view(q=f'mmdbid={mmdb_id}')
        
        # Trigger rendering
        if hasattr(view, 'show'):
            view.show()
        
        return f"Successfully loaded MMDB {mmdb_id}"
        
    except Exception as e:
        raise RuntimeError(f"Failed to load MMDB structure {mmdb_id}: {str(e)}")


def icn3d_family_structures(structure_paths: List[Path], temp_dir: Path) -> str:
    """Load and render multiple structures using ICN3D.
    
    Note: ICN3D doesn't have built-in family/overlay functionality like FlatProt,
    so this loads structures sequentially.
    
    Parameters
    ----------
    structure_paths
        List of paths to structure files
    temp_dir
        Temporary directory for output
        
    Returns
    -------
    str
        Status message
    """
    try:
        loaded_count = 0
        for structure_path in structure_paths:
            file_url = f"url=pdb|{structure_path.absolute()}"
            view = icn3dpy.view(q=file_url)
            
            if hasattr(view, 'show'):
                view.show()
            
            loaded_count += 1
        
        return f"Successfully loaded {loaded_count} structures"
        
    except Exception as e:
        raise RuntimeError(f"Failed to load family structures: {str(e)}")

print("ICN3D benchmark functions defined successfully")

## Test ICN3D Functions

In [None]:
# Test ICN3D with a known MMDB ID first
print("Testing ICN3D with MMDB ID...")
try:
    with tempfile.TemporaryDirectory() as temp_dir:
        exec_time, memory, exit_code, error_msg = measure_execution_notebook(
            icn3d_mmdb_structure, "6hjr", Path(temp_dir)
        )
    print(f"MMDB test - Time: {exec_time:.2f}s, Memory: {memory:.2f}MB, Exit: {exit_code}")
    if error_msg:
        print(f"Error: {error_msg}")
except Exception as e:
    print(f"MMDB test failed: {e}")

# Test with local file if available
if small_structure_files:
    print(f"\nTesting ICN3D with local file: {small_structure_files[0]}")
    try:
        with tempfile.TemporaryDirectory() as temp_dir:
            exec_time, memory, exit_code, error_msg = measure_execution_notebook(
                icn3d_single_structure, small_structure_files[0], Path(temp_dir)
            )
        print(f"Local file test - Time: {exec_time:.2f}s, Memory: {memory:.2f}MB, Exit: {exit_code}")
        if error_msg:
            print(f"Error: {error_msg}")
    except Exception as e:
        print(f"Local file test failed: {e}")

## Run ICN3D Benchmark

In [None]:
# Define benchmark test cases
benchmark_cases = [
    ("icn3d", "single", icn3d_single_structure),
    ("icn3d", "family", icn3d_family_structures),
]

# Add MMDB test if we want to test online functionality
test_mmdb_ids = ["6hjr", "1abc", "2xyz"]  # Example MMDB IDs

print(f"Running ICN3D benchmark with {N_ITERATIONS} iterations...")
print(f"Structure files: {len(small_structure_files)}")
print(f"Output file: {OUTPUT_FILE}")

results = []

with tempfile.TemporaryDirectory() as temp_dir_str:
    temp_dir = Path(temp_dir_str)
    
    with open(OUTPUT_FILE, "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow([
            "tool", "method", "structure_file", "iteration",
            "execution_time_seconds", "memory_mb", "exit_code", 
            "error_message", "family_available"
        ])
        
        for tool, method, func in benchmark_cases:
            print(f"\nBenchmarking {tool} ({method})...")
            
            for iteration in range(1, N_ITERATIONS + 1):
                print(f"  Iteration {iteration}/{N_ITERATIONS}")
                
                if method == "single":
                    # Test with available structure files
                    for structure in small_structure_files[:3]:  # Limit to first 3 for speed
                        print(f"    Processing {structure.name}")
                        
                        exec_time, memory, exit_code, error_msg = measure_execution_notebook(
                            func, structure, temp_dir
                        )
                        
                        log_result(
                            writer, tool, method, structure.name, iteration,
                            exec_time, memory, exit_code, error_msg
                        )
                        
                        results.append({
                            'tool': tool, 'method': method, 'file': structure.name,
                            'iteration': iteration, 'time': exec_time, 'memory': memory,
                            'success': exit_code == 0
                        })
                        
                elif method == "family":
                    # Test with multiple structures
                    print(f"    Processing family with {len(small_structure_files[:3])} structures")
                    
                    exec_time, memory, exit_code, error_msg = measure_execution_notebook(
                        func, small_structure_files[:3], temp_dir
                    )
                    
                    log_result(
                        writer, tool, method, "multiple_structures", iteration,
                        exec_time, memory, exit_code, error_msg
                    )
                    
                    results.append({
                        'tool': tool, 'method': method, 'file': 'multiple_structures',
                        'iteration': iteration, 'time': exec_time, 'memory': memory,
                        'success': exit_code == 0
                    })

print(f"\nBenchmark completed! Results saved to {OUTPUT_FILE}")
print(f"Total test cases: {len(results)}")

## Results Analysis

In [None]:
# Load and analyze results
if OUTPUT_FILE.exists():
    df = pl.read_csv(OUTPUT_FILE)
    print("\n=== ICN3D Benchmark Results ===")
    print(f"Total test cases: {len(df)}")
    
    # Success rates
    success_rate = df.group_by("tool", "method").agg(
        (pl.col("exit_code") == 0).mean().mul(100).round(2).alias("success_rate")
    )
    print("\nSuccess Rates (%):") 
    print(success_rate)
    
    # Performance metrics for successful runs
    successful = df.filter(pl.col("exit_code") == 0)
    if not successful.is_empty():
        performance = successful.group_by("tool", "method").agg([
            pl.col("execution_time_seconds").mean().round(3).alias("avg_time_sec"),
            pl.col("execution_time_seconds").std().round(3).alias("std_time_sec"),
            pl.col("memory_mb").mean().round(2).alias("avg_memory_mb"),
            pl.col("memory_mb").std().round(2).alias("std_memory_mb")
        ])
        print("\nPerformance Metrics (successful runs only):")
        print(performance)
    
    # Error analysis
    errors = df.filter(pl.col("exit_code") != 0)
    if not errors.is_empty():
        print("\nError Summary:")
        error_summary = errors.group_by("error_message").count().sort("count", descending=True)
        print(error_summary)
        
else:
    print(f"Results file not found: {OUTPUT_FILE}")

## Comparison with FlatProt (Optional)

If you have FlatProt benchmark results, you can compare them here.

In [None]:
# Compare with FlatProt if benchmark results exist
flatprot_results_file = Path("../benchmark_results.csv")

if flatprot_results_file.exists():
    print("\n=== Comparison with FlatProt ===")
    
    # Load FlatProt results
    flatprot_df = pl.read_csv(flatprot_results_file)
    
    # Load ICN3D results
    icn3d_df = pl.read_csv(OUTPUT_FILE)
    
    # Filter for FlatProt results
    flatprot_single = flatprot_df.filter(
        (pl.col("tool") == "flatprot") & (pl.col("method") == "single")
    )
    
    # Compare single structure performance
    if not flatprot_single.is_empty():
        flatprot_avg_time = flatprot_single.filter(pl.col("exit_code") == 0)["execution_time_seconds"].mean()
        icn3d_single_df = icn3d_df.filter(
            (pl.col("tool") == "icn3d") & (pl.col("method") == "single")
        )
        icn3d_avg_time = icn3d_single_df.filter(pl.col("exit_code") == 0)["execution_time_seconds"].mean()
        
        print(f"Average execution time (single structure):")
        print(f"  FlatProt: {flatprot_avg_time:.3f} seconds")
        print(f"  ICN3D: {icn3d_avg_time:.3f} seconds")
        
        if flatprot_avg_time and icn3d_avg_time:
            ratio = icn3d_avg_time / flatprot_avg_time
            print(f"  ICN3D is {ratio:.2f}x {'slower' if ratio > 1 else 'faster'} than FlatProt")
    
else:
    print("FlatProt benchmark results not found. Run the main benchmark script first for comparison.")

## Conclusions

This benchmark evaluates ICN3D's performance for protein structure visualization. Key findings:

1. **ICN3D Strengths:**
   - Interactive 3D visualization
   - Web-based (no local installation of molecular viewers)
   - Direct integration with NCBI databases
   - Jupyter notebook integration

2. **ICN3D Limitations:**
   - Primarily designed for interactive use, not batch processing
   - No built-in family alignment/overlay functionality
   - Requires internet connection for full functionality
   - Limited static output options

3. **Use Cases:**
   - Best for interactive exploration and analysis
   - Good for educational and research notebooks
   - Less suitable for automated batch processing or publication graphics

**Recommendation:** ICN3D serves a different niche compared to FlatProt. While FlatProt excels at creating publication-ready 2D projections and family comparisons, ICN3D is better suited for interactive 3D exploration and analysis in research workflows.