In [1]:
!python -m vsx.bench.cli version
!python -m vsx.bench.cli paths
!python -m vsx.bench.cli show-config




proteosync 0.1.0
REPO_ROOT=/Users/hike/code/proteosync1
RAW_DIR=/Users/hike/code/proteosync1/artifacts/structures/raw
STD_DIR=/Users/hike/code/proteosync1/artifacts/structures/std
POCKETS_DIR=/Users/hike/code/proteosync1/artifacts/pockets
{
  "project": {
    "name": "proteosync",
    "version": "0.1.0"
  },
  "paths": {
    "artifacts": "artifacts",
    "raw": "artifacts/structures/raw",
    "std": "artifacts/structures/std",
    "pockets": "artifacts/pockets",
    "data": "/Users/hike/code/proteosync1/artifacts/data"
  },
  "run": {
    "log_level": "INFO"
  }
}


In [2]:
#Verify the seed & summarize chains
from pathlib import Path
from Bio.PDB import MMCIFParser, PDBParser, is_aa
from vsx.utils.paths import DATA_DIR

# auto-pick PDB vs CIF (you moved a CIF, so this should pick the CIF)
pdb = Path(DATA_DIR/"GLP1R"/"seed_structure.pdb")
cif = Path(DATA_DIR/"GLP1R"/"seed_structure.cif")
p = pdb if pdb.exists() else cif

print("seed:", p, "| exists:", p.exists(), "| size:", p.stat().st_size if p.exists() else 0)
assert p.exists(), "Seed structure not found."

# parse
if p.suffix.lower() in {".cif", ".mmcif"}:
    s = MMCIFParser(QUIET=True).get_structure("GLP1R", str(p))
else:
    s = PDBParser(QUIET=True).get_structure("GLP1R", str(p))

# chain lengths (AA residues only)
chain_len = {}
for m in s:
    for ch in m:
        chain_len[ch.id] = sum(1 for r in ch if is_aa(r, standard=False))

print("chain lengths:", chain_len)

# choose receptor (longest chain) and peptide candidates (5–120 aa)
receptor = max(chain_len, key=chain_len.get) if chain_len else None
pep_candidates = [cid for cid,n in chain_len.items() if 5 <= n <= 120]
print("receptor:", receptor, "| peptide candidates:", pep_candidates)


seed: /Users/hike/code/proteosync1/artifacts/data/GLP1R/seed_structure.cif | exists: True | size: 328980
chain lengths: {'A': 463, 'B': 31}
receptor: A | peptide candidates: ['B']


In [3]:
#Build peptide-interface pocket (safe: skips if no peptide)
from Bio.PDB import Selection
from vsx.utils.paths import POCKETS_DIR

if not pep_candidates:
    print("No peptide chain detected → skipping autopocket.")
else:
    pep = min(pep_candidates, key=lambda c: chain_len[c])  # shortest peptide
    print("Using receptor:", receptor, "| peptide:", pep)

    # helpers
    def aa_atoms(chain):
        for res in chain:
            if is_aa(res, standard=False):
                for atom in res.get_atoms():
                    yield res, atom

    model = next(iter(s))
    receptor_chain = model[receptor]
    peptide_chain  = model[pep]

    cutoff = 6.0
    pep_atoms = [a for _,a in aa_atoms(peptide_chain)]
    pocket = set()
    for res, atom in aa_atoms(receptor_chain):
        ax,ay,az = atom.coord
        for a2 in pep_atoms:
            bx,by,bz = a2.coord
            if (ax-bx)**2 + (ay-by)**2 + (az-bz)**2 <= cutoff**2:
                ident = (receptor, int(res.id[1]), (res.id[2] or "").strip(), res.resname.strip())
                pocket.add(ident)
                break

    out_dir = POCKETS_DIR/"GLP1R"
    out_dir.mkdir(parents=True, exist_ok=True)
    out_tsv = out_dir/"peptide_interface.tsv"
    with out_tsv.open("w") as f:
        f.write("chain\tresid\ticode\tresname\n")
        for ch,resid,icode,rname in sorted(pocket, key=lambda x: (x[1], x[2])):
            f.write(f"{ch}\t{resid}\t{icode}\t{rname}\n")
    print("wrote:", out_tsv, "| residues:", len(pocket))


Using receptor: A | peptide: B
wrote: /Users/hike/code/proteosync1/artifacts/pockets/GLP1R/peptide_interface.tsv | residues: 68


In [5]:
#peek at the pocket file
from pathlib import Path
from vsx.utils.paths import POCKETS_DIR
p = POCKETS_DIR/"GLP1R"/"peptide_interface.tsv"
print("exists:", p.exists(), "| path:", p)
if p.exists():
    print("\n".join(p.read_text().splitlines()[:15]))


exists: True | path: /Users/hike/code/proteosync1/artifacts/pockets/GLP1R/peptide_interface.tsv
chain	resid	icode	resname
A	29		THR
A	30		VAL
A	31		SER
A	32		LEU
A	33		TRP
A	34		GLU
A	35		THR
A	36		VAL
A	39		TRP
A	43		ARG
A	67		ASP
A	68		GLU
A	69		TYR
A	88		TYR
