# Structure Server - Comprehensive Test Suite

This notebook tests all MCP tools in `structure_server.py`:

1. **fetch_molecules** - Fetch structures from PDB/AlphaFold/PDB-REDO
2. **split_molecules** - Split multi-chain structures into individual chains
3. **clean_protein** - Clean protein structures for MD simulation
4. **create_mutated_structutre** - Create mutated structures with FASPR

Each tool is tested for:
- Normal operation
- Edge cases
- Error handling (LLM-friendly error responses)


In [None]:
# Setup
import sys
sys.path.insert(0, '..')

from pathlib import Path
import json
import importlib
import asyncio

# For running async functions in notebook
import nest_asyncio
nest_asyncio.apply()

print("Setup complete")


In [None]:
# Check dependencies
print("Checking dependencies...\n")

deps = {
    "gemmi": "Structure parsing (mmCIF/PDB)",
    "pdbfixer": "Protein structure cleaning",
    "openmm": "Molecular simulation",
    "httpx": "Async HTTP client"
}

for module, desc in deps.items():
    try:
        __import__(module)
        print(f"✓ {module}: {desc}")
    except ImportError:
        print(f"✗ {module}: {desc} (NOT INSTALLED)")

# Check FASPR
print("\nChecking external tools...")
from common.base import BaseToolWrapper
faspr = BaseToolWrapper("FASPR", conda_env="mcp-md")
print(f"{'✓' if faspr.is_available() else '✗'} FASPR (side-chain packing)")


In [None]:
# Import and reload the structure server module
import servers.structure_server as structure_module
importlib.reload(structure_module)

# Import tools directly
from servers.structure_server import (
    fetch_molecules,
    split_molecules,
    clean_protein,
    create_mutated_structutre
)

print("Structure server tools imported successfully")


In [None]:
# Helper function to display results nicely
def show_result(result: dict, title: str = "Result"):
    """Display result dictionary with formatting"""
    print(f"\n{'='*60}")
    print(f" {title}")
    print(f"{'='*60}")
    
    # Check success status
    if result.get('success'):
        print("\n✓ SUCCESS")
    else:
        print("\n✗ FAILED")
    
    # Show errors if any
    if result.get('errors'):
        print("\nErrors:")
        for err in result['errors']:
            print(f"  - {err}")
    
    # Show warnings if any
    if result.get('warnings'):
        print("\nWarnings:")
        for warn in result['warnings']:
            print(f"  - {warn}")
    
    # Show key fields
    skip_keys = {'success', 'errors', 'warnings', 'operations'}
    print("\nDetails:")
    for k, v in result.items():
        if k not in skip_keys:
            if isinstance(v, (dict, list)) and len(str(v)) > 100:
                print(f"  {k}: [complex data, {len(v) if isinstance(v, list) else 'dict'}]")
            else:
                print(f"  {k}: {v}")
    
    # Show operations if present
    if result.get('operations'):
        print("\nOperations:")
        for op in result['operations']:
            status_icon = "✓" if op.get('status') in ['success', 'detected', 'added', 'replaced'] else "○"
            print(f"  {status_icon} {op.get('step')}: {op.get('status')} - {op.get('details', '')[:60]}")

print("Helper function defined")


---
## Test 1: fetch_molecules

Test fetching structures from different sources.


In [None]:
# Test 1.1: Fetch from PDB (small protein: 1CRN - crambin)
print("Test 1.1: Fetch 1CRN from PDB")

result = asyncio.run(fetch_molecules("1CRN", source="pdb"))
show_result(result, "Fetch 1CRN from PDB")

# Verify file exists
if result['success'] and result['file_path']:
    print(f"\nFile size: {Path(result['file_path']).stat().st_size} bytes")


In [None]:
# Test 1.2: Fetch non-existent PDB ID (error handling)
print("Test 1.2: Fetch non-existent PDB ID")

result = asyncio.run(fetch_molecules("XXXX", source="pdb"))
show_result(result, "Fetch Invalid PDB ID")

# Check that error handling is LLM-friendly
assert not result['success'], "Should fail for invalid PDB ID"
assert len(result['errors']) > 0, "Should have error messages"
print("\n✓ Error handling works correctly")


---
## Test 2: split_molecules

Test splitting multi-chain structures into individual chain files.


In [None]:
# Test 2.1: Split 1AKE (homodimer with ligand)
print("Test 2.1: Split 1AKE structure")

# First fetch 1AKE
fetch_result = asyncio.run(fetch_molecules("1AKE", source="pdb"))
if fetch_result['success']:
    result = split_molecules(fetch_result['file_path'])
    show_result(result, "Split 1AKE")
    
    # Show chain files
    if result['success']:
        print("\nProtein files:")
        for f in result['protein_files']:
            print(f"  - {f}")
        print("\nNon-protein files:")
        for f in result['non_protein_files']:
            print(f"  - {f}")
else:
    print("Failed to fetch 1AKE for split test")


In [None]:
# Test 2.2: Split with chain selection
print("Test 2.2: Split 1AKE - select only chain A")

if fetch_result['success']:
    result = split_molecules(
        fetch_result['file_path'],
        select_chains=['A']
    )
    show_result(result, "Split 1AKE (Chain A only)")
else:
    print("Skipped - 1AKE not available")


---
## Test 3: clean_protein

Test protein structure cleaning with PDBFixer.


In [None]:
# Test 3.1: Clean 1CRN (crambin - has disulfide bonds)
print("Test 3.1: Clean 1CRN (crambin with disulfide bonds)")

# First fetch and split
fetch_result = asyncio.run(fetch_molecules("1CRN", source="pdb"))
if fetch_result['success']:
    split_result = split_molecules(fetch_result['file_path'])
    if split_result['success'] and split_result['protein_files']:
        protein_pdb = split_result['protein_files'][0]
        
        result = clean_protein(protein_pdb)
        show_result(result, "Clean 1CRN")
        
        # Check disulfide bonds
        if result.get('disulfide_bonds'):
            print("\nDisulfide bonds detected:")
            for bond in result['disulfide_bonds']:
                print(f"  {bond['residue1']} <-> {bond['residue2']}")
    else:
        print("Failed to split 1CRN")
else:
    print("Failed to fetch 1CRN")


In [None]:
# Test 3.2: Clean with custom options
print("Test 3.2: Clean with custom options (no termini capping)")

if fetch_result['success'] and split_result['success']:
    protein_pdb = split_result['protein_files'][0]
    
    result = clean_protein(
        protein_pdb,
        cap_termini=True,
        ph=7.0
    )
    show_result(result, "Clean with Custom Options")
else:
    print("Skipped - previous test failed")


In [None]:
# Test 3.3: Clean non-existent file (error handling)
print("Test 3.3: Clean non-existent file")

result = clean_protein("/nonexistent/protein.pdb")
show_result(result, "Clean Non-existent File")

assert not result['success'], "Should fail for non-existent file"
print("\n✓ File not found error handling works")


---
## Test 4: Integration Test

Test a complete workflow: fetch -> split -> clean


In [None]:
# Test 4.1: Complete workflow for 1CRN
print("Test 4.1: Complete workflow for 1CRN")
print("="*60)

# Step 1: Fetch
print("\nStep 1: Fetching 1CRN...")
fetch_result = asyncio.run(fetch_molecules("1CRN", source="pdb"))
if not fetch_result['success']:
    print(f"Failed: {fetch_result['errors']}")
else:
    print(f"✓ Fetched: {fetch_result['file_path']}")
    print(f"  Format: {fetch_result['file_format']}, Atoms: {fetch_result['num_atoms']}")

    # Step 2: Split
    print("\nStep 2: Splitting structure...")
    split_result = split_molecules(fetch_result['file_path'])
    if not split_result['success']:
        print(f"Failed: {split_result['errors']}")
    else:
        print(f"✓ Split into {len(split_result['protein_files'])} protein file(s)")
        print(f"  Output dir: {split_result['output_dir']}")

        # Step 3: Clean
        print("\nStep 3: Cleaning protein...")
        protein_pdb = split_result['protein_files'][0]
        clean_result = clean_protein(protein_pdb, ph=7.4)
        if not clean_result['success']:
            print(f"Failed: {clean_result['errors']}")
        else:
            print(f"✓ Cleaned: {clean_result['output_file']}")
            print(f"  Final atoms: {clean_result['statistics'].get('final_atoms', 'N/A')}")
            print(f"  Disulfide bonds: {len(clean_result.get('disulfide_bonds', []))}")

print("\n" + "="*60)
print("Workflow complete!")


In [None]:
# Test 4.2: Complete workflow for multi-chain protein (1AKE)
print("Test 4.2: Complete workflow for 1AKE (homodimer)")
print("="*60)

# Step 1: Fetch
print("\nStep 1: Fetching 1AKE...")
fetch_result = asyncio.run(fetch_molecules("1AKE", source="pdb"))
if not fetch_result['success']:
    print(f"Failed: {fetch_result['errors']}")
else:
    print(f"✓ Fetched: {fetch_result['file_path']}")
    print(f"  Chains: {fetch_result['chains']}")

    # Step 2: Split - select only chain A
    print("\nStep 2: Splitting (chain A only)...")
    split_result = split_molecules(
        fetch_result['file_path'],
        select_chains=['A']
    )
    if not split_result['success']:
        print(f"Failed: {split_result['errors']}")
    else:
        print(f"✓ Extracted chain A")
        print(f"  Protein files: {len(split_result['protein_files'])}")
        print(f"  Non-protein files: {len(split_result['non_protein_files'])}")

        # Step 3: Clean chain A
        print("\nStep 3: Cleaning chain A...")
        protein_pdb = split_result['protein_files'][0]
        clean_result = clean_protein(
            protein_pdb,
            ignore_terminal_missing_residues=True,
            ph=7.4
        )
        if not clean_result['success']:
            print(f"Failed: {clean_result['errors']}")
        else:
            print(f"✓ Cleaned: {clean_result['output_file']}")
            stats = clean_result.get('statistics', {})
            print(f"  Initial residues: {stats.get('initial_residues', 'N/A')}")
            print(f"  Final residues: {stats.get('final_residues', 'N/A')}")
            print(f"  Final atoms: {stats.get('final_atoms', 'N/A')}")

print("\n" + "="*60)
print("Workflow complete!")


---
## Summary

This notebook tested all tools in `structure_server.py`:

| Tool | Tests | Purpose |
|------|-------|---------|
| `fetch_molecules` | 2 | Download structures from PDB |
| `split_molecules` | 2 | Split multi-chain structures |
| `clean_protein` | 3 | Clean and prepare proteins for MD |
| Integration | 2 | End-to-end workflows |

### Key Features Tested:
- **LLM-friendly error handling**: All tools return structured `success`/`errors`/`warnings` fields
- **Disulfide bond detection**: CYS residues are renamed to CYX for Amber compatibility
- **Chain selection**: Split specific chains from multi-chain structures
- **Custom cleaning options**: Control termini capping, protonation, etc.


In [None]:
print("All tests completed!")
