# SplitComplex: Binder/Target PDB Splitter

Use this notebook inside the BindCraft environment to split a bound complex into separate binder and target PDB files. Configure the chains and output names below, then run the final cell to create clean component structures under the specified output directory.


In [1]:
from pathlib import Path
from typing import Iterable, Dict, List

BINDCRAFT_ROOT = Path(r'/mnt/e/Code/BindCraft').resolve()
INPUT_DIR = (BINDCRAFT_ROOT / 'InputTargets').resolve()
OUTPUT_ROOT = (BINDCRAFT_ROOT / 'Results' / 'SplitComplex').resolve()
OUTPUT_ROOT.mkdir(parents=True, exist_ok=True)


In [2]:
# --- User configuration ---
COMPLEX_PDB = INPUT_DIR / '1OP9.pdb'  # Path to bound complex
OUTPUT_SUBDIR = OUTPUT_ROOT / 'SplitComplex'  # Where split files will be written

# Specify chains belonging to each component
BINDER_CHAINS = ['A']  
TARGET_CHAINS = ['B']  

# Names for the output files (without extension)
BINDER_NAME = 'HL6_camel_VHH_fragment'
TARGET_NAME = 'HumanLysozyme'


In [3]:
def _collect_available_chains(pdb_lines: List[str]) -> List[str]:
    chains = []
    seen = set()
    for line in pdb_lines:
        if line.startswith(('ATOM', 'HETATM')):
            chain_id = line[21] if len(line) > 21 else ' ' 
            chain_id = chain_id if chain_id.strip() else ' ' 
            if chain_id not in seen:
                seen.add(chain_id)
                chains.append(chain_id)
    return chains


def _filter_pdb_lines(pdb_lines: List[str], chains: Iterable[str]) -> List[str]:
    selected = set(chains)
    body = []
    for line in pdb_lines:
        rec = line[:6]
        if rec.startswith(('ATOM', 'HETATM', 'ANISOU', 'TER')):
            chain_id = line[21] if len(line) > 21 else ' ' 
            chain_id = chain_id if chain_id.strip() else ' ' 
            if chain_id in selected:
                body.append(line)
        elif rec.startswith('MODEL') or rec.startswith('ENDMDL'):
            # ignore MODEL records, assume single model output
            continue
    return body


def split_complex(complex_path: Path, binder_chains: Iterable[str], target_chains: Iterable[str], output_dir: Path, binder_name: str, target_name: str) -> Dict[str, Path]:
    pdb_lines = complex_path.read_text().splitlines()
    pdb_lines = [line + '\n' for line in pdb_lines]

    available = _collect_available_chains(pdb_lines)
    missing_binder = sorted(set(binder_chains) - set(available))
    missing_target = sorted(set(target_chains) - set(available))
    if missing_binder or missing_target:
        raise ValueError(f'Missing chains. Binder missing: {missing_binder}, Target missing: {missing_target}')

    output_dir.mkdir(parents=True, exist_ok=True)

    header = []
    for line in pdb_lines:
        if line.startswith(('ATOM', 'HETATM', 'MODEL')):
            break
        header.append(line)

    def _write_component(filename: Path, chains: Iterable[str]):
        body = _filter_pdb_lines(pdb_lines, chains)
        with filename.open('w') as handle:
            handle.writelines(header)
            handle.writelines(body)
            handle.write('END\n')

    binder_path = output_dir / f'{binder_name}.pdb'
    _write_component(binder_path, binder_chains)

    target_path = output_dir / f'{target_name}.pdb'
    _write_component(target_path, target_chains)

    return {
        'binder_path': binder_path,
        'target_path': target_path,
        'available_chains': available,
    }


In [4]:
result = split_complex(
    complex_path=COMPLEX_PDB,
    binder_chains=BINDER_CHAINS,
    target_chains=TARGET_CHAINS,
    output_dir=OUTPUT_SUBDIR,
    binder_name=BINDER_NAME,
    target_name=TARGET_NAME,
)

print(f'Complex: {COMPLEX_PDB}')
print('Available chains: ' + ', '.join(result['available_chains']))
print(f"Binder chains ({BINDER_NAME}): {', '.join(BINDER_CHAINS)} -> {result['binder_path']}")
print(f"Target chains ({TARGET_NAME}): {', '.join(TARGET_CHAINS)} -> {result['target_path']}")


Complex: /mnt/e/Code/BindCraft/InputTargets/1OP9.pdb
Available chains: A, B
Binder chains (HL6_camel_VHH_fragment): A -> /mnt/e/Code/BindCraft/Results/SplitComplex/SplitComplex/HL6_camel_VHH_fragment.pdb
Target chains (HumanLysozyme): B -> /mnt/e/Code/BindCraft/Results/SplitComplex/SplitComplex/HumanLysozyme.pdb
