In [1]:
import numpy as np
import pandas as pd

In [3]:
def _parse_atom_field(field: str):
    """
    HBPLUS atom field (cols 1–13 or 15–27) → chain, resnum, icode, resname, atom

    Layout (1-based):
      1   chain ID
      2–5 residue number
      6   insertion code
      7–9 residue name
      10–13 atom name
    """
    field = field.rstrip("\n")
    # field should be 13 chars, but be defensive
    field = field.ljust(13)

    chain   = field[0].strip() or None
    resnum  = field[1:5].strip()
    icode   = field[5].strip() or None
    resname = field[6:9].strip()
    atom    = field[9:13].strip()

    resnum = int(resnum) if resnum else None
    return chain, resnum, icode, resname, atom


def _to_float(x: str):
    x = x.strip()
    if not x or x in {"-1", "-1.0", "-1.00"}:
        return np.nan
    return float(x)


def _to_int(x: str):
    x = x.strip()
    return int(x) if x else None


def parse_hbplus_hb2(path: str) -> pd.DataFrame:
    """
    Parse an HBPLUS .hb2 (or .hhb) hydrogen-bond file into a pandas DataFrame.

    Columns (examples):
      don_chain, don_resnum, don_icode, don_resname, don_atom
      acc_chain, acc_resnum, acc_icode, acc_resname, acc_atom
      DA_dist, HA_dist, DHA_angle, HAA_angle, DAA_angle
      cat_d, cat_a, seq_gap, CA_dist, hb_index
    """
    records = []

    with open(path, "r") as f:
        for line in f:
            # skip obvious headers / short lines
            if len(line) < 75:
                continue
            # try to see if this looks like a data line:
            try:
                int(line[1:5])   # donor residue number
                int(line[15:19]) # acceptor residue number
            except ValueError:
                continue  # not a bond line

            # --- fixed-width slices (0-based indices) ---
            donor_field    = line[0:13]   # cols 1–13
            acceptor_field = line[14:27]  # cols 15–27

            DA_dist   = _to_float(line[27:32])  # 28–32
            cat       = line[33:35]            # 34–35
            seq_gap   = _to_int(line[36:39])   # 37–39
            CA_dist   = _to_float(line[40:45]) # 41–45
            DHA_angle = _to_float(line[46:51]) # 47–51
            HA_dist   = _to_float(line[52:57]) # 53–57
            HAA_angle = _to_float(line[58:63]) # 59–63
            DAA_angle = _to_float(line[64:69]) # 65–69
            hb_index  = _to_int(line[70:75])   # 71–75

            don_chain, don_resnum, don_icode, don_resname, don_atom = _parse_atom_field(donor_field)
            acc_chain, acc_resnum, acc_icode, acc_resname, acc_atom = _parse_atom_field(acceptor_field)

            cat = cat.strip()
            cat_d = cat[0] if len(cat) > 0 else None  # M/S/H
            cat_a = cat[1] if len(cat) > 1 else None

            records.append({
                "don_chain": don_chain,
                "don_resnum": don_resnum,
                "don_icode": don_icode,
                "don_resname": don_resname,
                "don_atom": don_atom,

                "acc_chain": acc_chain,
                "acc_resnum": acc_resnum,
                "acc_icode": acc_icode,
                "acc_resname": acc_resname,
                "acc_atom": acc_atom,

                "DA_dist": DA_dist,
                "HA_dist": HA_dist,
                "DHA_angle": DHA_angle,
                "HAA_angle": HAA_angle,
                "DAA_angle": DAA_angle,

                "cat_d": cat_d,
                "cat_a": cat_a,
                "seq_gap": seq_gap,
                "CA_dist": CA_dist,
                "hb_index": hb_index,
            })

    return pd.DataFrame.from_records(records)


In [51]:
no_tcrs = {"tcr14", "tcr420", "tcr520"}
yes_tcrs = {"tcr363", "tcr374", "tcr462"}
yes_hb = dict()
for tcr in yes_tcrs:
    yes_hb[tcr] = parse_hbplus_hb2(f"/workspaces/tcr_structure_embedding/data/killing_assay/tcrmodel2_out/{tcr}/ranked_0.hb2")
    yes_hb[tcr].to_csv(f"/workspaces/tcr_structure_embedding/data/killing_assay/tcrmodel2_out/{tcr}/ranked_0_hb.csv", index=False)
    
no_hb = dict()
for tcr in no_tcrs:
    no_hb[tcr] = parse_hbplus_hb2(f"/workspaces/tcr_structure_embedding/data/killing_assay/tcrmodel2_out/{tcr}/ranked_0.hb2")
    no_hb[tcr].to_csv(f"/workspaces/tcr_structure_embedding/data/killing_assay/tcrmodel2_out/{tcr}/ranked_0_hb.csv", index=False)

In [64]:
hb_df = yes_hb["tcr363"]

tcr_chains = ["D", "E"]       # TCR α/β
mhcpep_chains = ["A", "C"]    # MHC + peptide

mask = (
    (hb_df["don_chain"].isin(mhcpep_chains) & hb_df["acc_chain"].isin(tcr_chains)) |
    (hb_df["acc_chain"].isin(mhcpep_chains) & hb_df["don_chain"].isin(tcr_chains))
)

hb_df[mask]

Unnamed: 0,don_chain,don_resnum,don_icode,don_resname,don_atom,acc_chain,acc_resnum,acc_icode,acc_resname,acc_atom,DA_dist,HA_dist,DHA_angle,HAA_angle,DAA_angle,cat_d,cat_a,seq_gap,CA_dist,hb_index
32,A,65,-,GLN,NE2,D,27,-,ASN,O,3.02,2.39,119.6,97.8,90.7,S,M,-1,7.35,33
33,C,4,-,ARG,NH2,D,30,-,SER,O,2.69,1.82,142.2,119.7,112.1,S,M,-1,10.15,34
89,D,84,-,THR,OG1,A,58,-,GLU,OE1,2.84,1.89,171.7,147.1,144.4,S,S,-1,8.19,90
90,D,87,-,LYS,NZ,A,61,-,ASP,OD2,2.73,1.73,170.6,149.3,146.4,S,S,-1,11.22,91
105,A,69,-,THR,OG1,D,111,-,GLY,O,3.11,2.17,167.4,156.3,158.2,S,M,-1,7.21,106
106,D,114,-,THR,OG1,A,155,-,GLN,OE1,2.55,1.61,164.7,124.9,124.1,S,S,-1,6.78,107
107,D,132,-,ASP,N,C,5,-,CYS,SG,3.14,2.39,130.8,105.4,116.6,M,S,-1,5.1,108
108,D,132,-,ASP,N,A,155,-,GLN,OE1,2.85,2.26,116.0,160.8,146.7,M,S,-1,8.06,109
110,D,134,-,TRP,NE1,C,6,-,PHE,O,2.72,1.88,138.5,122.1,134.2,S,M,-1,7.94,111
184,A,80,-,ASN,ND2,E,70,-,ASP,OD2,3.4,2.45,158.1,131.0,136.7,S,S,-1,9.43,185


In [65]:
hb_df = yes_hb["tcr374"]

tcr_chains = ["D", "E"]       # TCR α/β
mhcpep_chains = ["A", "C"]    # MHC + peptide

mask = (
    (hb_df["don_chain"].isin(mhcpep_chains) & hb_df["acc_chain"].isin(tcr_chains)) |
    (hb_df["acc_chain"].isin(mhcpep_chains) & hb_df["don_chain"].isin(tcr_chains))
)

hb_df[mask]

Unnamed: 0,don_chain,don_resnum,don_icode,don_resname,don_atom,acc_chain,acc_resnum,acc_icode,acc_resname,acc_atom,DA_dist,HA_dist,DHA_angle,HAA_angle,DAA_angle,cat_d,cat_a,seq_gap,CA_dist,hb_index
70,A,151,-,ARG,NH1,D,67,-,ASP,OD1,2.93,2.06,143.0,94.0,105.3,S,S,-1,11.31,71
71,A,151,-,ARG,NH2,D,67,-,ASP,OD1,2.79,1.87,150.2,155.3,149.8,S,S,-1,11.31,72
108,C,5,-,CYS,SG,D,113,-,LEU,O,2.83,1.75,133.1,104.8,108.3,S,M,-1,3.87,109
109,D,114,-,ALA,N,C,4,-,ARG,O,2.46,1.82,117.2,147.8,126.4,M,M,-1,4.8,110
148,E,30,-,ASN,ND2,A,76,-,GLU,OE1,2.86,1.88,163.6,146.1,148.1,S,S,-1,10.72,149
180,A,65,-,GLN,NE2,E,57,-,TYR,OH,2.99,2.08,148.0,109.7,108.6,S,S,-1,11.83,181
188,A,65,-,GLN,NE2,E,70,-,ASP,OD2,2.85,2.03,136.0,114.6,120.9,S,S,-1,8.83,189
197,E,85,-,LYS,NZ,A,76,-,GLU,OE2,2.69,1.69,170.4,110.5,106.9,S,S,-1,10.82,198


In [66]:
hb_df = yes_hb["tcr462"]

tcr_chains = ["D", "E"]       # TCR α/β
mhcpep_chains = ["A", "C"]    # MHC + peptide

mask = (
    (hb_df["don_chain"].isin(mhcpep_chains) & hb_df["acc_chain"].isin(tcr_chains)) |
    (hb_df["acc_chain"].isin(mhcpep_chains) & hb_df["don_chain"].isin(tcr_chains))
)

hb_df[mask]

Unnamed: 0,don_chain,don_resnum,don_icode,don_resname,don_atom,acc_chain,acc_resnum,acc_icode,acc_resname,acc_atom,DA_dist,HA_dist,DHA_angle,HAA_angle,DAA_angle,cat_d,cat_a,seq_gap,CA_dist,hb_index
65,D,66,-,LYS,NZ,A,161,-,GLU,OE2,2.88,2.04,139.5,141.1,144.1,S,S,-1,8.77,66
99,C,4,-,ARG,NH1,D,110,-,LEU,O,2.65,2.21,104.5,137.4,151.5,S,M,-1,11.0,100
100,A,62,-,ARG,NH1,D,111,-,PHE,O,2.67,1.7,161.9,127.7,124.4,S,M,-1,7.28,101
101,C,4,-,ARG,NH2,D,112,-,THR,O,2.97,1.98,165.9,96.9,101.4,S,M,-1,9.95,102
102,C,4,-,ARG,NE,D,135,-,ASN,OD1,3.24,2.39,141.1,168.0,160.7,S,S,-1,9.0,103
103,C,4,-,ARG,NH2,D,135,-,ASN,OD1,2.88,1.94,154.1,124.3,127.3,S,S,-1,9.0,104
202,E,112,-,GLN,NE2,C,4,-,ARG,O,2.77,1.92,139.1,167.0,153.2,S,M,-1,8.83,203
203,A,155,-,GLN,NE2,E,134,-,ASN,O,2.73,1.96,130.5,95.8,108.3,S,M,-1,8.12,204


In [67]:
hb_df = no_hb["tcr14"]

tcr_chains = ["D", "E"]       # TCR α/β
mhcpep_chains = ["A", "C"]    # MHC + peptide

mask = (
    (hb_df["don_chain"].isin(mhcpep_chains) & hb_df["acc_chain"].isin(tcr_chains)) |
    (hb_df["acc_chain"].isin(mhcpep_chains) & hb_df["don_chain"].isin(tcr_chains))
)

hb_df[mask]

Unnamed: 0,don_chain,don_resnum,don_icode,don_resname,don_atom,acc_chain,acc_resnum,acc_icode,acc_resname,acc_atom,DA_dist,HA_dist,DHA_angle,HAA_angle,DAA_angle,cat_d,cat_a,seq_gap,CA_dist,hb_index
35,C,4,-,ARG,NH2,D,30,-,ASP,OD1,3.09,2.13,158.9,116.2,113.6,S,S,-1,11.18,36
66,A,151,-,ARG,NH2,D,67,-,ASP,OD1,2.94,1.95,169.5,125.0,125.8,S,S,-1,11.75,67
67,A,151,-,ARG,NH1,D,67,-,ASP,OD2,3.21,2.22,168.7,108.4,110.7,S,S,-1,11.75,68
101,C,4,-,ARG,NH1,D,111,-,ASP,OD2,2.82,1.81,176.1,134.2,132.9,S,S,-1,5.2,102
170,A,69,-,THR,OG1,E,68,-,ALA,O,3.2,2.27,162.5,118.8,123.9,S,M,-1,4.36,171
204,C,8,-,VAL,N,E,111,-,ILE,O,3.34,2.37,159.7,129.4,134.2,M,M,-1,4.9,205
210,A,155,-,GLN,NE2,E,113,-,SER,OG,2.97,2.0,160.9,147.3,152.6,S,S,-1,9.54,211
211,E,134,-,THR,OG1,A,150,-,ALA,O,2.63,1.68,168.1,159.4,155.7,S,M,-1,6.08,212


In [68]:
hb_df = no_hb["tcr420"]

tcr_chains = ["D", "E"]       # TCR α/β
mhcpep_chains = ["A", "C"]    # MHC + peptide

mask = (
    (hb_df["don_chain"].isin(mhcpep_chains) & hb_df["acc_chain"].isin(tcr_chains)) |
    (hb_df["acc_chain"].isin(mhcpep_chains) & hb_df["don_chain"].isin(tcr_chains))
)

hb_df[mask]

Unnamed: 0,don_chain,don_resnum,don_icode,don_resname,don_atom,acc_chain,acc_resnum,acc_icode,acc_resname,acc_atom,DA_dist,HA_dist,DHA_angle,HAA_angle,DAA_angle,cat_d,cat_a,seq_gap,CA_dist,hb_index
75,A,151,-,ARG,NH1,D,67,-,ASP,OD1,2.63,1.64,165.5,114.9,119.6,S,S,-1,10.82,76
76,A,151,-,ARG,NH2,D,67,-,ASP,OD1,3.2,2.42,133.0,155.4,153.0,S,S,-1,10.82,77
110,C,4,-,ARG,NH1,D,111,-,GLU,O,2.68,1.7,161.7,127.2,120.5,S,M,-1,6.71,111
111,A,62,-,ARG,NH2,D,112,-,ASN,OD1,2.61,1.84,130.3,135.6,123.8,S,S,-1,7.42,112
187,E,68,-,THR,OG1,A,65,-,GLN,OE1,3.36,2.49,151.3,150.0,142.5,S,S,-1,7.75,188
218,E,134,-,GLN,NE2,C,4,-,ARG,O,2.95,1.97,165.4,143.3,138.6,S,M,-1,9.11,219


In [69]:
hb_df = no_hb["tcr520"]

tcr_chains = ["D", "E"]       # TCR α/β
mhcpep_chains = ["A", "C"]    # MHC + peptide

mask = (
    (hb_df["don_chain"].isin(mhcpep_chains) & hb_df["acc_chain"].isin(tcr_chains)) |
    (hb_df["acc_chain"].isin(mhcpep_chains) & hb_df["don_chain"].isin(tcr_chains))
)

hb_df[mask]

Unnamed: 0,don_chain,don_resnum,don_icode,don_resname,don_atom,acc_chain,acc_resnum,acc_icode,acc_resname,acc_atom,DA_dist,HA_dist,DHA_angle,HAA_angle,DAA_angle,cat_d,cat_a,seq_gap,CA_dist,hb_index
34,A,167,-,TRP,NE1,D,32,-,ASN,OD1,2.79,1.96,137.4,150.4,149.9,S,S,-1,9.22,35
65,D,67,-,TYR,OH,A,166,-,GLU,OE2,2.47,1.51,174.5,122.4,124.3,S,S,-1,10.77,66
66,D,68,-,LYS,NZ,A,166,-,GLU,OE1,2.76,1.81,154.6,142.0,144.6,S,S,-1,10.25,67
91,A,62,-,ARG,NH2,D,110,-,ASP,OD1,2.99,2.01,165.3,107.4,108.9,S,S,-1,9.43,92
92,A,62,-,ARG,NE,D,110,-,ASP,OD2,2.69,1.7,166.6,121.4,124.9,S,S,-1,9.43,93
158,E,60,-,TYR,OH,A,76,-,GLU,OE2,2.54,1.63,158.1,135.1,143.2,S,S,-1,11.49,159
200,E,111,-,ARG,NH1,A,150,-,ALA,O,2.63,1.93,124.3,150.2,143.7,S,M,-1,6.93,201
203,A,151,-,ARG,NH1,E,136,-,GLU,OE1,2.64,1.95,123.2,111.6,93.1,S,S,-1,10.25,204
204,A,151,-,ARG,NH1,E,136,-,GLU,OE2,2.61,1.74,142.3,103.3,94.5,S,S,-1,10.25,205
205,A,151,-,ARG,NH2,E,136,-,GLU,OE2,2.7,1.82,144.6,91.5,97.6,S,S,-1,10.25,206


In [None]:
from Bio.PDB import PDBParser, NeighborSearch, Selection
import numpy as np
import pandas as pd
import math
from biotite.structure.info import vdw_radius_single

def atom_id_tuple(atom):
    res = atom.get_parent()
    chain = res.get_parent()
    return (
        chain.id,
        res.get_id()[1],   # residue number
        res.get_resname(), # residue name
        atom.get_id(),     # atom name
    )

def get_vdw_radius(atom):
    elem = atom.element.strip().upper()
    return vdw_radius_single(elem)  # default ~C

def find_vdw_contacts(atoms_A, atoms_B, ns, tol=0.5, max_dist=4.5):
    """
    Find vdW contacts between atoms_A and atoms_B.
    max_dist is a prefilter cutoff for NeighborSearch.
    """
    vdw_records = []

    # prefilter: all atom pairs within max_dist
    close_pairs = ns.search_all(max_dist)

    # We only want pairs where one atom is in A and the other in B
    atoms_A_set = set(atoms_A)
    atoms_B_set = set(atoms_B)

    for a1, a2 in close_pairs:
        if (a1 in atoms_A_set and a2 in atoms_B_set) or \
           (a2 in atoms_A_set and a1 in atoms_B_set):

            r1 = get_vdw_radius(a1)
            r2 = get_vdw_radius(a2)
            d = (a1.coord - a2.coord)
            dist = np.linalg.norm(d)
            cutoff = r1 + r2 + tol

            if dist <= cutoff:
                (c1, rnum1, resn1, at1) = atom_id_tuple(a1)
                (c2, rnum2, resn2, at2) = atom_id_tuple(a2)
                vdw_records.append({
                    "chain1": c1,
                    "resnum1": rnum1,
                    "resname1": resn1,
                    "atom1": at1,
                    "chain2": c2,
                    "resnum2": rnum2,
                    "resname2": resn2,
                    "atom2": at2,
                    "distance": dist,
                    "vdw_cutoff": cutoff,
                })

    return pd.DataFrame(vdw_records)


In [50]:
def get_vdw(pdb_path):
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure("complex", pdb_path)
    model = structure[0]  # first model

    # Define which chains belong to each partner
    chains_A = ["D", "E"]  # TCR α/β
    chains_B = ["A", "C"]  # HLA heavy chain + peptide

    atoms_A = [atom for chain in model if chain.id in chains_A
            for atom in chain.get_atoms()]
    atoms_B = [atom for chain in model if chain.id in chains_B
            for atom in chain.get_atoms()]

    ns = NeighborSearch(list(model.get_atoms()))
    vdw_df = find_vdw_contacts(atoms_A, atoms_B, ns)
    return vdw_df

yes_vdw = dict()
for tcr in yes_tcrs:
    yes_vdw[tcr] = get_vdw(f"/workspaces/tcr_structure_embedding/data/killing_assay/tcrmodel2_out/{tcr}/ranked_0.pdb")
    yes_vdw[tcr].to_csv(f"/workspaces/tcr_structure_embedding/data/killing_assay/tcrmodel2_out/{tcr}/ranked_0_vdw.csv", index=False)
    
no_vdw = dict()
for tcr in no_tcrs:
    no_vdw[tcr] = get_vdw(f"/workspaces/tcr_structure_embedding/data/killing_assay/tcrmodel2_out/{tcr}/ranked_0.pdb")
    no_vdw[tcr].to_csv(f"/workspaces/tcr_structure_embedding/data/killing_assay/tcrmodel2_out/{tcr}/ranked_0_vdw.csv", index=False)



In [73]:
hb_df = yes_vdw["tcr363"]

tcr_chains = ["D", "E"]       # TCR α/β
mhcpep_chains = ["A", "C"]    # MHC + peptide

mask = (
    (hb_df["chain1"].isin(mhcpep_chains) & hb_df["chain2"].isin(tcr_chains)) |
    (hb_df["chain2"].isin(mhcpep_chains) & hb_df["chain1"].isin(tcr_chains))
)

hb_df[mask]

Unnamed: 0,chain1,resnum1,resname1,atom1,chain2,resnum2,resname2,atom2,distance,vdw_cutoff
0,D,84,THR,OG1,A,58,GLU,OE1,2.842864,3.54
1,D,84,THR,HG1,A,58,GLU,CD,3.021770,3.30
2,D,84,THR,HG1,A,58,GLU,OE1,1.889530,3.12
3,D,30,SER,OG,A,62,ARG,HG3,2.865077,3.12
4,D,30,SER,OG,A,62,ARG,HA,2.875092,3.12
...,...,...,...,...,...,...,...,...,...,...
340,D,132,ASP,OD1,A,150,ALA,O,3.447050,3.54
341,D,111,GLY,O,A,69,THR,HG1,2.168724,3.12
342,D,111,GLY,O,A,69,THR,OG1,3.112494,3.54
343,D,110,LEU,O,A,69,THR,HG1,2.896418,3.12


In [74]:
hb_df = yes_vdw["tcr374"]

tcr_chains = ["D", "E"]       # TCR α/β
mhcpep_chains = ["A", "C"]    # MHC + peptide

mask = (
    (hb_df["chain1"].isin(mhcpep_chains) & hb_df["chain2"].isin(tcr_chains)) |
    (hb_df["chain2"].isin(mhcpep_chains) & hb_df["chain1"].isin(tcr_chains))
)

hb_df[mask]

Unnamed: 0,chain1,resnum1,resname1,atom1,chain2,resnum2,resname2,atom2,distance,vdw_cutoff
0,E,30,ASN,HD21,A,76,GLU,OE1,1.882111,3.12
1,E,30,ASN,HD21,A,76,GLU,CD,3.003793,3.30
2,E,32,GLU,OE1,C,8,VAL,HG22,2.826509,3.12
3,E,32,GLU,OE1,C,8,VAL,CG2,3.446995,3.72
4,E,32,GLU,OE1,C,8,VAL,HG21,3.044492,3.12
...,...,...,...,...,...,...,...,...,...,...
330,D,113,LEU,O,A,155,GLN,OE1,3.437631,3.54
331,D,113,LEU,C,C,5,CYS,SG,3.418736,4.00
332,D,113,LEU,C,C,5,CYS,CB,3.494192,3.90
333,D,113,LEU,CB,C,5,CYS,CB,3.697534,3.90


In [75]:
hb_df = yes_vdw["tcr462"]

tcr_chains = ["D", "E"]       # TCR α/β
mhcpep_chains = ["A", "C"]    # MHC + peptide

mask = (
    (hb_df["chain1"].isin(mhcpep_chains) & hb_df["chain2"].isin(tcr_chains)) |
    (hb_df["chain2"].isin(mhcpep_chains) & hb_df["chain1"].isin(tcr_chains))
)

hb_df[mask]

Unnamed: 0,chain1,resnum1,resname1,atom1,chain2,resnum2,resname2,atom2,distance,vdw_cutoff
0,D,66,LYS,HZ2,A,161,GLU,CD,3.120335,3.30
1,D,66,LYS,HZ2,A,161,GLU,OE2,2.035717,3.12
2,D,66,LYS,HZ1,A,161,GLU,HB3,2.445999,2.70
3,D,66,LYS,HZ2,A,161,GLU,HB3,2.372186,2.70
4,D,66,LYS,HZ2,A,161,GLU,CB,2.839009,3.30
...,...,...,...,...,...,...,...,...,...,...
275,D,32,TYR,HE2,A,163,LEU,HD12,2.676822,2.70
276,D,32,TYR,CE2,A,62,ARG,HH21,2.384736,3.30
277,D,32,TYR,HE2,A,62,ARG,HH21,1.940723,2.70
278,E,134,ASN,HD21,C,4,ARG,HD3,2.341162,2.70


In [72]:
hb_df = no_vdw["tcr14"]

tcr_chains = ["D", "E"]       # TCR α/β
mhcpep_chains = ["A", "C"]    # MHC + peptide

mask = (
    (hb_df["chain1"].isin(mhcpep_chains) & hb_df["chain2"].isin(tcr_chains)) |
    (hb_df["chain2"].isin(mhcpep_chains) & hb_df["chain1"].isin(tcr_chains))
)

hb_df[mask]

Unnamed: 0,chain1,resnum1,resname1,atom1,chain2,resnum2,resname2,atom2,distance,vdw_cutoff
0,D,30,ASP,OD1,C,4,ARG,HH12,2.053375,3.12
1,D,30,ASP,OD1,C,4,ARG,CZ,3.527892,3.72
2,D,30,ASP,OD1,C,4,ARG,NH1,2.948179,3.57
3,D,31,THR,OG1,A,163,LEU,HD23,2.873241,3.12
4,D,31,THR,OG1,A,163,LEU,CD2,3.626406,3.72
...,...,...,...,...,...,...,...,...,...,...
291,D,66,PHE,CE1,A,158,ALA,CB,3.706267,3.90
292,D,111,ASP,HB2,C,4,ARG,C,3.189532,3.30
293,D,40,TYR,CZ,A,155,GLN,HG3,3.056461,3.30
294,D,111,ASP,O,C,4,ARG,C,3.621631,3.72


In [70]:
hb_df = no_vdw["tcr520"]

tcr_chains = ["D", "E"]       # TCR α/β
mhcpep_chains = ["A", "C"]    # MHC + peptide

mask = (
    (hb_df["chain1"].isin(mhcpep_chains) & hb_df["chain2"].isin(tcr_chains)) |
    (hb_df["chain2"].isin(mhcpep_chains) & hb_df["chain1"].isin(tcr_chains))
)

hb_df[mask]

Unnamed: 0,chain1,resnum1,resname1,atom1,chain2,resnum2,resname2,atom2,distance,vdw_cutoff
0,E,136,GLU,OE2,A,151,ARG,HH11,1.737484,3.12
1,E,136,GLU,CD,A,151,ARG,HH11,2.364838,3.30
2,E,136,GLU,OE1,A,151,ARG,HH11,2.413516,3.12
3,E,136,GLU,OE2,A,151,ARG,NH1,2.610369,3.57
4,E,136,GLU,CD,A,151,ARG,NH1,2.982246,3.75
...,...,...,...,...,...,...,...,...,...,...
382,D,112,LEU,CD2,C,4,ARG,HG2,3.187596,3.30
383,D,112,LEU,HD22,A,66,ILE,HG12,2.430976,2.70
384,D,112,LEU,HD22,A,66,ILE,CG1,3.239764,3.30
385,E,113,ASP,CB,C,4,ARG,HH22,3.078936,3.30


In [71]:
hb_df = no_vdw["tcr420"]

tcr_chains = ["D", "E"]       # TCR α/β
mhcpep_chains = ["A", "C"]    # MHC + peptide

mask = (
    (hb_df["chain1"].isin(mhcpep_chains) & hb_df["chain2"].isin(tcr_chains)) |
    (hb_df["chain2"].isin(mhcpep_chains) & hb_df["chain1"].isin(tcr_chains))
)

hb_df[mask]

Unnamed: 0,chain1,resnum1,resname1,atom1,chain2,resnum2,resname2,atom2,distance,vdw_cutoff
0,E,30,ASN,HD22,A,76,GLU,OE1,2.754589,3.12
1,E,30,ASN,ND2,A,76,GLU,OE1,3.449637,3.57
2,E,60,SER,HB2,A,72,GLN,CG,3.017424,3.30
3,E,60,SER,HB2,A,72,GLN,HG2,2.217708,2.70
4,E,85,LYS,HZ1,A,76,GLU,OE2,2.699744,3.12
...,...,...,...,...,...,...,...,...,...,...
198,D,135,ASN,ND2,C,4,ARG,HH22,2.989097,3.15
199,D,135,ASN,ND2,C,4,ARG,NH2,3.359159,3.60
200,D,135,ASN,ND2,C,4,ARG,HH21,3.124904,3.15
201,E,134,GLN,HE21,C,4,ARG,O,1.967191,3.12
