# Solvation Server Test Notebook

This notebook tests the solvation server functionality including:
1. `run_antechamber_robust` - PDB generation with preserved atom names
2. `merge_structures` - Merging protein and ligand structures
3. `solvate_structure` - Water box solvation
4. `embed_in_membrane` - Lipid bilayer embedding

## Prerequisites
- AmberTools (antechamber, parmchk2, packmol-memgen)
- mcp-md conda environment activated
- Test structures from previous notebooks


In [None]:
# Setup and imports
import sys
import os
from pathlib import Path

# Add parent directory for imports
sys.path.insert(0, str(Path.cwd().parent))
sys.path.insert(0, str(Path.cwd().parent / 'servers'))

# Import structure server functions
from servers.structure_server import (
    fetch_molecules,
    inspect_molecules,
    split_molecules,
    clean_protein,
    clean_ligand,
    run_antechamber_robust,
    merge_structures
)

# Import solvation server functions
from servers.solvation_server import (
    solvate_structure,
    embed_in_membrane,
    list_available_lipids
)

# Rich output for better display
from rich import print as rprint
from rich.console import Console
console = Console()

print("✓ All imports successful")


In [None]:
# Helper function to display results
def display_result(title: str, result: dict):
    """Display test result in a formatted way."""
    print("\n" + "="*60)
    print(f" {title}")
    print("="*60)
    
    if result.get("success"):
        print("\n✓ SUCCESS\n")
    else:
        print("\n✗ FAILED\n")
    
    # Display key fields
    skip_keys = {"charges", "chain_mapping"}
    print("Details:")
    for key, value in result.items():
        if key in skip_keys:
            if isinstance(value, list):
                print(f"  {key}: [complex data, {len(value)}]")
            elif isinstance(value, dict):
                print(f"  {key}: [complex data, dict]")
        elif isinstance(value, dict):
            print(f"  {key}: {value}")
        elif isinstance(value, list) and len(value) > 3:
            print(f"  {key}: {value[:3]}... ({len(value)} items)")
        else:
            print(f"  {key}: {value}")
    
    if result.get("errors"):
        print("\nErrors:")
        for err in result["errors"]:
            print(f"  ✗ {err}")
    
    if result.get("warnings"):
        print("\nWarnings:")
        for warn in result["warnings"]:
            print(f"  ⚠ {warn}")


---
## Test 0: Prepare Test Data

First, we need to prepare test structures using structure_server tools.


In [None]:
# Test 0.1: Fetch a test structure (using 1L2Y - small protein)
print("Test 0.1: Fetching test structure (1L2Y - Trp-cage miniprotein)...")

# In Jupyter, we can use await directly
fetch_result = await fetch_molecules("1L2Y", source="pdb")
display_result("Fetch 1L2Y", fetch_result)

if fetch_result["success"]:
    TEST_STRUCTURE = fetch_result["file_path"]
    print(f"\nTest structure: {TEST_STRUCTURE}")


In [None]:
# Test 0.2: Split and clean the structure
print("Test 0.2: Splitting and cleaning structure...")

# Split the structure
split_result = split_molecules(TEST_STRUCTURE, exclude_waters=True)
display_result("Split Structure", split_result)

if split_result["success"] and split_result["protein_files"]:
    PROTEIN_RAW = split_result["protein_files"][0]
    print(f"\nProtein file: {PROTEIN_RAW}")


In [None]:
# Test 0.3: Clean the protein
print("Test 0.3: Cleaning protein...")

clean_result = clean_protein(
    pdb_file=PROTEIN_RAW,
    ph=7.4,
    add_hydrogens=True
)
display_result("Clean Protein", clean_result)

if clean_result["success"]:
    PROTEIN_CLEAN = clean_result["output_file"]
    print(f"\nCleaned protein: {PROTEIN_CLEAN}")


---
## Test 1: run_antechamber_robust - PDB Generation

Test that antechamber now generates an atom-name-preserving PDB file.


In [None]:
# Test 1: Create a simple ligand and parameterize it
print("Test 1: run_antechamber_robust - PDB generation with preserved atom names")

# Create a simple test molecule (ethanol) as SDF
# Use absolute path to avoid issues with conda run
test_ligand_dir = Path.cwd() / "output/test_ligand"
test_ligand_dir.mkdir(parents=True, exist_ok=True)

# Create a simple ethanol SDF file for testing
ethanol_sdf = test_ligand_dir / "ethanol.sdf"
ethanol_sdf_content = """ethanol
     RDKit          3D

  9  8  0  0  0  0  0  0  0  0999 V2000
   -0.0400    1.0900    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -0.0400   -0.4200    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    1.1800   -1.0500    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
    0.8900    1.5100    0.4700 H   0  0  0  0  0  0  0  0  0  0  0  0
   -0.9200    1.4700    0.5300 H   0  0  0  0  0  0  0  0  0  0  0  0
   -0.1100    1.4700   -1.0200 H   0  0  0  0  0  0  0  0  0  0  0  0
    0.8700   -0.8100   -0.5400 H   0  0  0  0  0  0  0  0  0  0  0  0
   -0.9400   -0.7900   -0.5000 H   0  0  0  0  0  0  0  0  0  0  0  0
    1.1300   -2.0200    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
  1  2  1  0
  2  3  1  0
  1  4  1  0
  1  5  1  0
  1  6  1  0
  2  7  1  0
  2  8  1  0
  3  9  1  0
M  END
$$$$
"""

with open(ethanol_sdf, 'w') as f:
    f.write(ethanol_sdf_content)

print(f"Created test ligand: {ethanol_sdf}")

# Run antechamber
antechamber_result = run_antechamber_robust(
    ligand_file=str(ethanol_sdf),
    output_dir=str(test_ligand_dir),
    net_charge=0,
    residue_name="ETH",
    charge_method="bcc",
    atom_type="gaff2"
)

display_result("run_antechamber_robust (with PDB)", antechamber_result)

# Check if PDB was generated
if antechamber_result["success"]:
    if antechamber_result.get("pdb"):
        print(f"\n✓ PDB file generated: {antechamber_result['pdb']}")
        
        # Verify atom names are preserved
        pdb_path = Path(antechamber_result["pdb"])
        if pdb_path.exists():
            print("\nPDB content (first 15 lines):")
            with open(pdb_path) as f:
                for i, line in enumerate(f):
                    if i >= 15:
                        break
                    print(f"  {line.rstrip()}")
            
            LIGAND_PDB = str(pdb_path)
    else:
        print("\n⚠ PDB field not in result - check implementation")


---
## Test 2: merge_structures - Protein + Ligand (PDB)

Test merging protein and ligand PDB files.


In [None]:
# Test 2: Merge protein and ligand
print("Test 2: merge_structures - protein + ligand(PDB)")

# Check if we have the necessary files
if 'PROTEIN_CLEAN' in dir() and 'LIGAND_PDB' in dir():
    merge_result = merge_structures(
        pdb_files=[PROTEIN_CLEAN, LIGAND_PDB],
        output_name="merged"
    )
    display_result("merge_structures", merge_result)
    
    if merge_result["success"]:
        MERGED_PDB = merge_result["output_file"]
        print(f"\nMerged structure: {MERGED_PDB}")
        
        # Show chain mapping
        print("\nChain mapping:")
        for file_path, mapping in merge_result["chain_mapping"].items():
            print(f"  {Path(file_path).name}: {mapping}")
else:
    print("⚠ Missing PROTEIN_CLEAN or LIGAND_PDB - run previous tests first")


---
## Test 3: merge_structures - Multiple Chains

Test merging multiple protein chains.


In [None]:
# Test 3: Merge multiple chains (using same protein twice as example)
print("Test 3: merge_structures - multiple chains")

if 'PROTEIN_CLEAN' in dir():
    # Create a copy with different name to simulate multiple chains
    import shutil
    protein2 = Path(PROTEIN_CLEAN).parent / "protein_2.amber.pdb"
    shutil.copy(PROTEIN_CLEAN, protein2)
    
    merge_multi_result = merge_structures(
        pdb_files=[PROTEIN_CLEAN, str(protein2)],
        output_name="dimer"
    )
    display_result("merge_structures (multiple)", merge_multi_result)
    
    if merge_multi_result["success"]:
        print(f"\nTotal chains: {merge_multi_result['statistics']['total_chains']}")
        print(f"Total atoms: {merge_multi_result['statistics']['total_atoms']}")
else:
    print("⚠ Missing PROTEIN_CLEAN - run previous tests first")


---
## Test 4: solvate_structure - Basic Solvation

Test solvating a structure in a water box.


In [None]:
# Test 4: Basic solvation
print("Test 4: solvate_structure - basic solvation")

# Use protein only for solvation test (faster)
if 'PROTEIN_CLEAN' in dir():
    solvate_result = solvate_structure(
        pdb_file=PROTEIN_CLEAN,
        output_name="solvated",
        dist=10.0,  # Smaller box for faster test
        cubic=True,
        salt=True,
        salt_c="Na+",
        salt_a="Cl-",
        saltcon=0.15
    )
    display_result("solvate_structure", solvate_result)
    
    if solvate_result["success"]:
        SOLVATED_PDB = solvate_result["output_file"]
        print(f"\nSolvated structure: {SOLVATED_PDB}")
        print(f"Total atoms: {solvate_result['statistics'].get('total_atoms', 'N/A')}")
else:
    print("⚠ Missing PROTEIN_CLEAN - run previous tests first")


---
## Test 5: solvate_structure - Different Salt Concentration

Test solvation with different salt parameters.


In [None]:
# Test 5: Solvation with different salt concentration
print("Test 5: solvate_structure - different salt concentration (0.3M KCl)")

if 'PROTEIN_CLEAN' in dir():
    solvate_kcl_result = solvate_structure(
        pdb_file=PROTEIN_CLEAN,
        output_name="solvated_kcl",
        dist=10.0,
        cubic=True,
        salt=True,
        salt_c="K+",
        salt_a="Cl-",
        saltcon=0.30  # Higher concentration
    )
    display_result("solvate_structure (KCl 0.3M)", solvate_kcl_result)
else:
    print("⚠ Missing PROTEIN_CLEAN - run previous tests first")


---
## Test 6: embed_in_membrane - Single Lipid (POPC)

Test embedding in a POPC membrane.


In [None]:
# Test 6: Membrane embedding with POPC
print("Test 6: embed_in_membrane - single lipid (POPC)")
print("Note: This test may take several minutes...")

if 'PROTEIN_CLEAN' in dir():
    membrane_result = embed_in_membrane(
        pdb_file=PROTEIN_CLEAN,
        output_name="membrane_popc",
        lipids="POPC",
        ratio="1",
        dist=10.0,  # Smaller for faster test
        dist_wat=15.0,
        preoriented=True,  # Assume pre-oriented for test
        salt=True,
        nloop=20,  # Reduced for faster test
        nloop_all=50
    )
    display_result("embed_in_membrane (POPC)", membrane_result)
    
    if membrane_result["success"]:
        MEMBRANE_PDB = membrane_result["output_file"]
        print(f"\nMembrane structure: {MEMBRANE_PDB}")
else:
    print("⚠ Missing PROTEIN_CLEAN - run previous tests first")


---
## Test 7: embed_in_membrane - Mixed Lipids (DOPE:DOPG 3:1)

Test embedding in a bacterial-like membrane.


In [None]:
# Test 7: Membrane embedding with mixed lipids
print("Test 7: embed_in_membrane - mixed lipids (DOPE:DOPG 3:1)")
print("Note: This test may take several minutes...")
print("Note: Using salt=False because packmol has issues when ion count becomes 0")

if 'PROTEIN_CLEAN' in dir():
    membrane_mixed_result = embed_in_membrane(
        pdb_file=PROTEIN_CLEAN,
        output_name="membrane_bacterial",
        lipids="DOPE:DOPG",
        ratio="3:1",
        dist=10.0,
        dist_wat=15.0,
        preoriented=True,
        salt=False,  # Disabled due to packmol limitation with 0-count ions
        nloop=20,
        nloop_all=50
    )
    display_result("embed_in_membrane (DOPE:DOPG)", membrane_mixed_result)
else:
    print("⚠ Missing PROTEIN_CLEAN - run previous tests first")


---
## Test 8: OPM Fetch + Membrane Embedding (4HYT - Na+/K+ ATPase)

Test the full workflow for membrane proteins:
1. Fetch pre-oriented structure from OPM
2. Split and clean the protein
3. Embed in POPC membrane using preoriented=True


In [None]:
# Test 8.1: Fetch membrane protein from OPM
print("Test 8.1: Fetching 4HYT (Na+/K+ ATPase) from OPM...")
print("Note: OPM structures are pre-oriented with membrane normal along Z-axis")

opm_result = await fetch_molecules("4HYT", source="opm")
display_result("Fetch 4HYT from OPM", opm_result)

if opm_result["success"]:
    OPM_STRUCTURE = opm_result["file_path"]
    print(f"\nOPM structure: {OPM_STRUCTURE}")
    print(f"Pre-oriented: {opm_result.get('preoriented', 'N/A')}")


In [None]:
# Test 8.2: Split and inspect the OPM structure
print("Test 8.2: Splitting OPM structure...")
print("Note: Selecting protein chains A and B only (Na+/K+ ATPase alpha and beta subunits)")

if 'OPM_STRUCTURE' in dir():
    # First inspect to see what's in the structure
    inspect_result = inspect_molecules(OPM_STRUCTURE)
    
    print(f"\nStructure composition:")
    print(f"  Protein chains: {inspect_result['summary']['num_protein_chains']}")
    print(f"  Ligand chains: {inspect_result['summary']['num_ligand_chains']}")
    print(f"  Water chains: {inspect_result['summary']['num_water_chains']}")
    print(f"  Ion chains: {inspect_result['summary']['num_ion_chains']}")
    
    # Show author chain IDs
    author_chains = set(c["author_chain"] for c in inspect_result["chains"])
    print(f"  Author chain IDs: {sorted(author_chains)}")
    
    # Split the structure - select only chains A and B by author_chain
    # Note: use_author_chains=True matches PDB auth_asym_id (single letter like 'A', 'B')
    # This will include all entity types (protein, ligand, ion) within those chains
    # We'll use only the protein_files output
    split_opm_result = split_molecules(
        OPM_STRUCTURE, 
        select_chains=["A", "B"],  # Alpha and Beta subunits
        exclude_waters=True,
        use_author_chains=True  # Match by auth_asym_id for PDB files
    )
    display_result("Split OPM Structure", split_opm_result)
    
    if split_opm_result["success"] and split_opm_result["protein_files"]:
        OPM_PROTEIN_FILES = split_opm_result["protein_files"]
        print(f"\nProtein files to clean: {OPM_PROTEIN_FILES}")
else:
    print("⚠ Missing OPM_STRUCTURE - run previous test first")


In [None]:
# Test 8.3: Clean each protein chain individually, then merge
print("Test 8.3: Cleaning OPM protein chains individually...")
print("Note: This may take a while for large membrane proteins")
print("Important: Clean each chain separately to handle termini correctly, then merge")

if 'OPM_PROTEIN_FILES' in dir():
    cleaned_files = []
    
    for i, pdb_file in enumerate(OPM_PROTEIN_FILES):
        print(f"\n--- Cleaning protein {i+1}/{len(OPM_PROTEIN_FILES)}: {pdb_file} ---")
        clean_result = clean_protein(
            pdb_file=pdb_file,
            ph=7.4,
            add_hydrogens=True
        )
        
        if clean_result["success"]:
            cleaned_files.append(clean_result["output_file"])
            print(f"  ✓ Cleaned: {clean_result['output_file']}")
        else:
            print(f"  ✗ Failed: {clean_result.get('errors', [])}")
    
    print(f"\n--- Merging {len(cleaned_files)} cleaned protein chains ---")
    
    if len(cleaned_files) > 1:
        merge_opm_result = merge_structures(
            pdb_files=cleaned_files,
            output_dir=split_opm_result["output_dir"],
            output_name="protein_merged"
        )
        display_result("Merge Cleaned Proteins", merge_opm_result)
        
        if merge_opm_result["success"]:
            OPM_PROTEIN_CLEAN = merge_opm_result["output_file"]
            print(f"\nMerged cleaned protein: {OPM_PROTEIN_CLEAN}")
    elif len(cleaned_files) == 1:
        OPM_PROTEIN_CLEAN = cleaned_files[0]
        print(f"\nCleaned protein: {OPM_PROTEIN_CLEAN}")
    else:
        print("⚠ No proteins were successfully cleaned")
else:
    print("⚠ Missing OPM_PROTEIN_FILES - run previous test first")


In [None]:
# Test 8.4: Embed OPM structure in membrane
print("Test 8.4: Embedding 4HYT in POPC membrane...")
print("Note: Using preoriented=True since OPM structures are already oriented")
print("Note: This test may take 5-10 minutes for large membrane proteins...")

if 'OPM_PROTEIN_CLEAN' in dir():
    membrane_opm_result = embed_in_membrane(
        pdb_file=OPM_PROTEIN_CLEAN,
        output_name="4hyt_membrane",
        lipids="POPC",
        ratio="1",
        dist=15.0,
        dist_wat=17.5,
        preoriented=True,  # OPM structures are pre-oriented!
        salt=True,
        keepligs=True,
        nloop=30,
        nloop_all=100
    )
    display_result("embed_in_membrane (4HYT from OPM)", membrane_opm_result)
    
    if membrane_opm_result["success"]:
        MEMBRANE_4HYT = membrane_opm_result["output_file"]
        print(f"\nMembrane structure: {MEMBRANE_4HYT}")
        print(f"Total atoms: {membrane_opm_result['statistics'].get('total_atoms', 'N/A')}")
else:
    print("⚠ Missing OPM_PROTEIN_CLEAN - run previous tests first")


---
## Utility: List Available Lipids


In [None]:
# List available lipids
print("Available Lipids in packmol-memgen")
print("="*50)

lipids_info = list_available_lipids()

print("\nCommon Lipids:")
for lipid, desc in lipids_info["common_lipids"].items():
    print(f"  {lipid}: {desc}")

print("\nExample Compositions:")
for name, comp in lipids_info["example_compositions"].items():
    print(f"  {name}: lipids={comp['lipids']}, ratio={comp['ratio']}")

print(f"\n{lipids_info['hint']}")


---
## Summary

This notebook tested:
1. `run_antechamber_robust` - PDB generation with preserved atom names
2. `merge_structures` - Protein + ligand merging
3. `merge_structures` - Multiple chain merging
4. `solvate_structure` - Basic water box solvation
5. `solvate_structure` - Different salt concentrations
6. `embed_in_membrane` - Single lipid membrane
7. `embed_in_membrane` - Mixed lipid membrane


In [None]:
# Final summary
print("\n" + "="*60)
print(" Test Summary")
print("="*60)

tests = [
    ("Test 1: antechamber PDB generation", 'antechamber_result' in dir() and antechamber_result.get('success')),
    ("Test 2: merge_structures (protein+ligand)", 'merge_result' in dir() and merge_result.get('success')),
    ("Test 3: merge_structures (multiple)", 'merge_multi_result' in dir() and merge_multi_result.get('success')),
    ("Test 4: solvate_structure (basic)", 'solvate_result' in dir() and solvate_result.get('success')),
    ("Test 5: solvate_structure (KCl)", 'solvate_kcl_result' in dir() and solvate_kcl_result.get('success')),
    ("Test 6: embed_in_membrane (POPC)", 'membrane_result' in dir() and membrane_result.get('success')),
    ("Test 7: embed_in_membrane (DOPE:DOPG)", 'membrane_mixed_result' in dir() and membrane_mixed_result.get('success')),
]

passed = 0
for name, success in tests:
    status = "✓ PASS" if success else "✗ FAIL/NOT RUN"
    print(f"  {status}: {name}")
    if success:
        passed += 1

print(f"\nTotal: {passed}/{len(tests)} tests passed")
