# Pinder system

In [1]:
from pathlib import Path

from pinder.core import PinderSystem, get_index


Example usage of Pinder index API shown below. For more detailed usage examples, check the `pinder-index`notebook.  

In [2]:
index = get_index()
hetero_test_apo = index.query(
    '(uniprot_L != uniprot_R) and split == "test" and (apo_R and apo_L)'
)
hetero_test_apo.reset_index(drop=True, inplace=True)
hetero_test_apo


Unnamed: 0,split,id,pdb_id,cluster_id,cluster_id_R,cluster_id_L,pinder_s,pinder_xl,pinder_af2,uniprot_R,...,apo_L,apo_R_quality,apo_L_quality,chain1_neff,chain2_neff,chain_R,chain_L,contains_antibody,contains_antigen,contains_enzyme
0,test,3k1i__D1_O25709--3k1i__A1_O25448,3k1i,cluster_26031_5179,cluster_26031,cluster_5179,True,True,False,O25709,...,True,high,high,12.351562,514.0,D1,A1,False,False,False
1,test,6qta__A1_G0SHE6--6qta__B1_G0SC29,6qta,cluster_11327_11328,cluster_11327,cluster_11328,False,True,False,G0SHE6,...,True,high,high,115.75,689.5,A1,B1,False,False,False
2,test,3vf0__B1_Q8IY67--3vf0__A2_P18206,3vf0,cluster_5612_993,cluster_993,cluster_5612,True,True,False,Q8IY67,...,True,high,high,251.125,35.53125,B1,A2,False,False,False
3,test,4aye__D1_Q9JXV4--4aye__A1_P08603,4aye,cluster_3949_4866,cluster_3949,cluster_4866,True,True,False,Q9JXV4,...,True,high,high,14.546875,310.0,D1,A1,False,False,False
4,test,2w8b__A1_P0A855--2w8b__H1_P0A912,2w8b,cluster_15535_1924,cluster_15535,cluster_1924,True,True,False,P0A855,...,True,high,high,308.25,1150.0,A1,H1,False,False,False
5,test,5y4r__A1_O87131--5y4r__B1_Q9HVI1,5y4r,cluster_8825_8826,cluster_8825,cluster_8826,True,True,False,O87131,...,True,high,high,610.0,144.875,A1,B1,False,False,True
6,test,3egv__A1_Q84BQ9--3egv__B1_Q5SLP6,3egv,cluster_33015_371,cluster_33015,cluster_371,True,True,False,Q84BQ9,...,True,high,high,806.5,637.0,A1,B1,False,False,True
7,test,6wjc__A1_P11229--6wjc__B1_Q8QGR0,6wjc,cluster_1057_1356,cluster_1057,cluster_1356,True,True,False,P11229,...,True,high,high,462.75,518.5,A1,B1,False,False,True
8,test,6tx3__B1_Q9NWY4--6tx3__A1_Q9UGN5,6tx3,cluster_11866_335,cluster_11866,cluster_335,True,True,False,Q9NWY4,...,True,high,high,59.46875,213.0,B1,A1,False,False,True
9,test,2grx__A1_P06971--2grx__C1_P02929,2grx,cluster_12107_8897,cluster_12107,cluster_8897,True,True,False,P06971,...,True,high,high,288.0,562.5,A1,C1,False,False,False


In [3]:
pinder_id = list(hetero_test_apo.id)[2]
pinder_id

'3vf0__B1_Q8IY67--3vf0__A2_P18206'

## PinderSystem API - base class representing `Structure`'s in a pinder entry

In [4]:
# Simplest interface - get a single pinder system
ps = PinderSystem(pinder_id)
ps


PinderSystem(
entry = IndexEntry(
    (
        'split',
        'test',
    ),
    (
        'id',
        '3vf0__B1_Q8IY67--3vf0__A2_P18206',
    ),
    (
        'pdb_id',
        '3vf0',
    ),
    (
        'cluster_id',
        'cluster_5612_993',
    ),
    (
        'cluster_id_R',
        'cluster_993',
    ),
    (
        'cluster_id_L',
        'cluster_5612',
    ),
    (
        'pinder_s',
        True,
    ),
    (
        'pinder_xl',
        True,
    ),
    (
        'pinder_af2',
        False,
    ),
    (
        'uniprot_R',
        'Q8IY67',
    ),
    (
        'uniprot_L',
        'P18206',
    ),
    (
        'holo_R_pdb',
        '3vf0__B1_Q8IY67-R.pdb',
    ),
    (
        'holo_L_pdb',
        '3vf0__A2_P18206-L.pdb',
    ),
    (
        'predicted_R_pdb',
        'af__Q8IY67.pdb',
    ),
    (
        'predicted_L_pdb',
        'af__P18206.pdb',
    ),
    (
        'apo_R_pdb',
        '3smz__A1_Q8IY67.pdb',
    ),
    (
        'apo_L_pdb',
        '

In [5]:
holo_L, holo_R = ps.holo_ligand, ps.holo_receptor
pred_L, pred_R = ps.pred_ligand, ps.pred_receptor
apo_L, apo_R = ps.apo_ligand, ps.apo_receptor

holo_L


Structure(
    filepath=/Users/danielkovtun/.local/share/pinder/2024-02/test_set_pdbs/3vf0__A2_P18206-L.pdb,
    uniprot_map=/Users/danielkovtun/.local/share/pinder/2024-02/mappings/3vf0__A2_P18206-L.parquet,
    pinder_id='3vf0__A2_P18206-L',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (1380,),
)

## Classify system difficulty based on degree of conformational shift in unbound and bound 

In [6]:
ps.unbound_difficulty("apo")

{'Fnat': 0.5421686746987951,
 'Fnonnat': 0.3076923076923077,
 'common_contacts': 45,
 'differing_contacts': 20,
 'bound_contacts': 83,
 'unbound_contacts': 65,
 'fnonnat_R': 0.2857142857142857,
 'fnonnat_L': 0.0,
 'fnat_R': 0.5882352941176471,
 'fnat_L': 0.7692307692307693,
 'difficulty': 'Rigid-body',
 'I-RMSD': 1.1255295,
 'matched_interface_chains': 2,
 'holo_receptor_interface_res': 34,
 'holo_ligand_interface_res': 26,
 'apo_receptor_interface_res': 28,
 'apo_ligand_interface_res': 20,
 'L-RMSD': 0.94191545,
 'R-RMSD': 1.4988927,
 'unbound_id': '3smz__A1_Q8IY67--5l0h__A1_P18206',
 'unbound_body': 'receptor_ligand',
 'monomer_name': 'apo'}

In [7]:
ps.unbound_difficulty("predicted")

{'Fnat': 0.5662650602409639,
 'Fnonnat': 0.9225700164744646,
 'common_contacts': 47,
 'differing_contacts': 560,
 'bound_contacts': 83,
 'unbound_contacts': 607,
 'fnonnat_R': 0.8435374149659864,
 'fnonnat_L': 0.8670520231213873,
 'fnat_R': 0.6764705882352942,
 'fnat_L': 0.8846153846153846,
 'difficulty': 'Difficult',
 'I-RMSD': 3.424884,
 'matched_interface_chains': 2,
 'holo_receptor_interface_res': 34,
 'holo_ligand_interface_res': 26,
 'apo_receptor_interface_res': 147,
 'apo_ligand_interface_res': 173,
 'L-RMSD': 2.1201644,
 'R-RMSD': 0.6285352,
 'unbound_id': 'af__Q8IY67--af__P18206',
 'unbound_body': 'receptor_ligand',
 'monomer_name': 'predicted'}

## Illustrating utilities available in `Structure` instances

In [8]:
holo_L.filter("atom_name", mask=["CA"])


Structure(
    filepath=/Users/danielkovtun/.local/share/pinder/2024-02/test_set_pdbs/3vf0__A2_P18206-L.pdb,
    uniprot_map=<class 'pandas.core.frame.DataFrame'> with shape (283, 14),
    pinder_id='3vf0__A2_P18206-L',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (178,),
)

In [9]:
apo_L.filter("atom_name", mask=["CA"])


Structure(
    filepath=/Users/danielkovtun/.local/share/pinder/2024-02/pdbs/5l0h__A1_P18206.pdb,
    uniprot_map=<class 'pandas.core.frame.DataFrame'> with shape (176, 14),
    pinder_id='5l0h__A1_P18206',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (173,),
)

## Can also filter "in place" rather than returning a copy (a la pandas)

In [10]:
apo_L.filter("atom_name", mask=["CA"], copy=False)

In [11]:
(
    ps.apo_ligand.filter("atom_name", mask=["CA"]),
    ps.holo_ligand.filter("atom_name", mask=["CA"])
)


(Structure(
     filepath=/Users/danielkovtun/.local/share/pinder/2024-02/pdbs/5l0h__A1_P18206.pdb,
     uniprot_map=<class 'pandas.core.frame.DataFrame'> with shape (176, 14),
     pinder_id='5l0h__A1_P18206',
     atom_array=<class 'biotite.structure.AtomArray'> with shape (173,),
 ),
 Structure(
     filepath=/Users/danielkovtun/.local/share/pinder/2024-02/test_set_pdbs/3vf0__A2_P18206-L.pdb,
     uniprot_map=<class 'pandas.core.frame.DataFrame'> with shape (283, 14),
     pinder_id='3vf0__A2_P18206-L',
     atom_array=<class 'biotite.structure.AtomArray'> with shape (178,),
 ))

## Create masked unbound complex aligned to bound for apo 

In [12]:
apo_complex = ps.create_apo_complex()
apo_complex


Structure(
    filepath=/Users/danielkovtun/.local/share/pinder/2024-02/pdbs/3smz__A1_Q8IY67--5l0h__A1_P18206.pdb,
    uniprot_map=<class 'pandas.core.frame.DataFrame'> with shape (460, 14),
    pinder_id='3smz__A1_Q8IY67--5l0h__A1_P18206',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (2355,),
)

In [13]:
apo_complex.dataframe

Unnamed: 0,chain_id,res_name,res_code,res_id,atom_name,b_factor,ins_code,hetero,element,x,y,z
0,R,HIS,H,1,N,0.0,,False,N,69.377380,105.086411,-51.232784
1,R,HIS,H,1,CA,0.0,,False,C,69.471100,103.738152,-50.680832
2,R,HIS,H,1,C,0.0,,False,C,70.264641,103.716263,-49.368172
3,R,HIS,H,1,O,0.0,,False,O,70.223633,102.714134,-48.653946
4,R,HIS,H,1,CB,0.0,,False,C,70.093376,102.773239,-51.701916
...,...,...,...,...,...,...,...,...,...,...,...,...
2350,L,ARG,R,170,CA,0.0,,False,C,45.438419,86.089317,-54.533417
2351,L,LYS,K,171,CA,0.0,,False,C,44.941048,83.492149,-57.280712
2352,L,THR,T,172,CA,0.0,,False,C,46.387222,85.555809,-60.138443
2353,L,PRO,P,173,CA,0.0,,False,C,49.873405,84.154137,-59.526482


### What's going on under the hood 

In [14]:
ps = PinderSystem(pinder_id)
apo_L, apo_R = ps.apo_ligand, ps.apo_receptor

# After getting the "in common" masked structures, they can be superimposed
apo_R, holo_R = apo_R.align_common_sequence(ps.aligned_holo_R)
apo_L, holo_L = apo_L.align_common_sequence(ps.aligned_holo_L)

# Rmsd after superposition (without outlier removal) is stored in `rms`
R_super, rms, _ = apo_R.superimpose(holo_R)
L_super, rms, _ = apo_L.superimpose(holo_L)

In [15]:
L_super

Structure(
    filepath=/Users/danielkovtun/.local/share/pinder/2024-02/pdbs/5l0h__A1_P18206.pdb,
    uniprot_map=/Users/danielkovtun/.local/share/pinder/2024-02/mappings/5l0h__A1_P18206.parquet,
    pinder_id='5l0h__A1_P18206',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (1327,),
)

In [16]:
# Now we can create the complexes using Structure.__add__ methods
apo_binary = R_super + L_super
holo_binary = holo_R + holo_L

apo_binary

Structure(
    filepath=/Users/danielkovtun/.local/share/pinder/2024-02/pdbs/3smz__A1_Q8IY67--5l0h__A1_P18206.pdb,
    uniprot_map=<class 'pandas.core.frame.DataFrame'> with shape (460, 14),
    pinder_id='3smz__A1_Q8IY67--5l0h__A1_P18206',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (3510,),
)

In [17]:
# Alternatively, there exist utils for creating the masked apo and predicted complex
pred_complex = ps.create_pred_complex()
pred_complex


Structure(
    filepath=/Users/danielkovtun/.local/share/pinder/2024-02/pdbs/af__Q8IY67--af__P18206.pdb,
    uniprot_map=None,
    pinder_id='af__Q8IY67--af__P18206',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (3566,),
)

In [18]:
masked_complex_dir = Path("./").absolute() / "unbound_complexes"
masked_complex_dir.mkdir(exist_ok=True, parents=True)

masked_complex_dir



PosixPath('/Users/danielkovtun/dev/pinder_dataset/examples/unbound_complexes')

In [19]:
# If the output PDB filepath is omitted, the structure will be written to Structure.filepath, which may overwrite
# In this case, it would be a new file composed of the added complex filepaths if we omit
pred_complex.to_pdb(masked_complex_dir / "pred_complex.pdb")
apo_complex.to_pdb(masked_complex_dir / "apo_complex.pdb")


In [20]:
(masked_complex_dir / "pred_complex.pdb").unlink()
(masked_complex_dir / "apo_complex.pdb").unlink()

## Structures have `resolved_pdb2uniprot` and `resolved_uniprot2pdb` properties

They return dicts of resolved residue numbers mapped from pdb numbering to uniprot numbering, and vice versa


The full mapping is available in `Structure.uniprot_mapping`
To get only the _resolved_ mapping, access the `Structure.resolved_mapping` attrribute.

In [21]:
apo_L.resolved_pdb2uniprot

{2: 960,
 3: 961,
 4: 962,
 5: 963,
 6: 964,
 7: 965,
 8: 966,
 9: 967,
 10: 968,
 11: 969,
 12: 970,
 13: 971,
 14: 972,
 15: 973,
 16: 974,
 18: 976,
 19: 977,
 20: 978,
 21: 979,
 22: 980,
 23: 981,
 24: 982,
 25: 983,
 26: 984,
 27: 985,
 28: 986,
 29: 987,
 30: 988,
 31: 989,
 32: 990,
 33: 991,
 34: 992,
 35: 993,
 36: 994,
 37: 995,
 38: 996,
 39: 997,
 40: 998,
 41: 999,
 42: 1000,
 43: 1001,
 44: 1002,
 45: 1003,
 46: 1004,
 47: 1005,
 48: 1006,
 49: 1007,
 50: 1008,
 51: 1009,
 52: 1010,
 53: 1011,
 54: 1012,
 55: 1013,
 56: 1014,
 57: 1015,
 58: 1016,
 59: 1017,
 60: 1018,
 61: 1019,
 62: 1020,
 63: 1021,
 64: 1022,
 65: 1023,
 66: 1024,
 67: 1025,
 68: 1026,
 69: 1027,
 70: 1028,
 71: 1029,
 72: 1030,
 73: 1031,
 74: 1032,
 75: 1033,
 76: 1034,
 77: 1035,
 78: 1036,
 79: 1037,
 80: 1038,
 81: 1039,
 82: 1040,
 83: 1041,
 84: 1042,
 85: 1043,
 86: 1044,
 87: 1045,
 88: 1046,
 89: 1047,
 90: 1048,
 91: 1049,
 92: 1050,
 93: 1051,
 94: 1052,
 95: 1053,
 96: 1054,
 97: 1055,
 9

In [22]:
apo_complex.resolved_mapping

Unnamed: 0,entry_id,entity_id,asym_id,pdb_strand_id,resi,resi_pdb,resi_auth,resn,one_letter_code_can,resolved,one_letter_code_uniprot,resi_uniprot,uniprot_acc,chain
0,,1,A,A,3,39,39.0,LEU,L,1,L,39,,A1
1,,1,A,A,4,40,40.0,ASP,D,1,D,40,,A1
2,,1,A,A,5,41,41.0,PRO,P,1,P,41,,A1
3,,1,A,A,6,42,42.0,GLU,E,1,E,42,,A1
4,,1,A,A,7,43,43.0,GLU,E,1,E,43,,A1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
446,,1,A,A,170,1128,1128.0,ARG,R,1,R,1128,,A1
447,,1,A,A,171,1129,1129.0,LYS,K,1,K,1129,,A1
448,,1,A,A,172,1130,1130.0,THR,T,1,T,1130,,A1
449,,1,A,A,173,1131,1131.0,PRO,P,1,P,1131,,A1


## Case with multiple available apo structures

In [23]:
pinder_id = "1ldt__A1_P00761--1ldt__B1_P80424"


In [24]:
ps_canon = PinderSystem(pinder_id)
ps_canon.entry.apo_R_alt



['1s6f__A1_P00761.pdb',
 '1s85__A1_P00761.pdb',
 '1s84__A1_P00761.pdb',
 '2a32__A1_P00761.pdb',
 '1s6h__A1_P00761.pdb',
 '1s5s__A1_P00761.pdb',
 '1fni__A1_P00761.pdb',
 '1s81__A1_P00761.pdb',
 '1fmg__A1_P00761.pdb',
 '1qqu__A1_P00761.pdb',
 '2a31__A1_P00761.pdb',
 '1fn6__A1_P00761.pdb',
 '1s83__A1_P00761.pdb']

In [25]:
ps_canon.entry.apo_L_alt

['2kmq__A1_P80424.pdb', '2kmp__A1_P80424.pdb', '2kmr__A1_P80424.pdb']

### Specify 2kmr as apo ligand and 1fmg as apo receptor
Note: the default `apo_receptor` and `apo_ligand` are determined based on the selected canonical apo monomer.

The canonical monomers were selected based on their sequence overlap and difficulty metrics calculated in `pinder.eval.dockq.unbound`

In [26]:

ps = PinderSystem(pinder_id, apo_ligand_pdb_code="2kmr", apo_receptor_pdb_code="1fmg")
ps

PinderSystem(
entry = IndexEntry(
    (
        'split',
        'train',
    ),
    (
        'id',
        '1ldt__A1_P00761--1ldt__B1_P80424',
    ),
    (
        'pdb_id',
        '1ldt',
    ),
    (
        'cluster_id',
        'cluster_312_5091',
    ),
    (
        'cluster_id_R',
        'cluster_312',
    ),
    (
        'cluster_id_L',
        'cluster_5091',
    ),
    (
        'pinder_s',
        False,
    ),
    (
        'pinder_xl',
        False,
    ),
    (
        'pinder_af2',
        False,
    ),
    (
        'uniprot_R',
        'P00761',
    ),
    (
        'uniprot_L',
        'P80424',
    ),
    (
        'holo_R_pdb',
        '1ldt__A1_P00761-R.pdb',
    ),
    (
        'holo_L_pdb',
        '1ldt__B1_P80424-L.pdb',
    ),
    (
        'predicted_R_pdb',
        'af__P00761.pdb',
    ),
    (
        'predicted_L_pdb',
        'af__P80424.pdb',
    ),
    (
        'apo_R_pdb',
        '1s82__A1_P00761.pdb',
    ),
    (
        'apo_L_pdb',
      

## Classify system difficulty based on degree of conformational shift in unbound and bound 

In [27]:
ps.unbound_difficulty("apo")

{'Fnat': 0.847457627118644,
 'Fnonnat': 0.5,
 'common_contacts': 50,
 'differing_contacts': 50,
 'bound_contacts': 59,
 'unbound_contacts': 100,
 'fnonnat_R': 0.4318181818181818,
 'fnonnat_L': 0.35,
 'fnat_R': 0.8620689655172413,
 'fnat_L': 1.0,
 'difficulty': 'Difficult',
 'I-RMSD': 2.3008885,
 'matched_interface_chains': 2,
 'holo_receptor_interface_res': 29,
 'holo_ligand_interface_res': 13,
 'apo_receptor_interface_res': 44,
 'apo_ligand_interface_res': 20,
 'L-RMSD': 9.371291,
 'R-RMSD': 0.47199318,
 'unbound_id': '1fmg__A1_P00761--2kmr__A1_P80424',
 'unbound_body': 'receptor_ligand',
 'monomer_name': 'apo'}

In [28]:
ps.unbound_difficulty("predicted")

{'Fnat': 0.847457627118644,
 'Fnonnat': 0.05660377358490566,
 'common_contacts': 50,
 'differing_contacts': 3,
 'bound_contacts': 59,
 'unbound_contacts': 53,
 'fnonnat_R': 0.07692307692307693,
 'fnonnat_L': 0.0,
 'fnat_R': 0.8275862068965517,
 'fnat_L': 0.9230769230769231,
 'difficulty': 'Rigid-body',
 'I-RMSD': 1.2526181,
 'matched_interface_chains': 2,
 'holo_receptor_interface_res': 29,
 'holo_ligand_interface_res': 13,
 'apo_receptor_interface_res': 26,
 'apo_ligand_interface_res': 12,
 'L-RMSD': 3.9255776,
 'R-RMSD': 0.40761378,
 'unbound_id': 'af__P00761--af__P80424',
 'unbound_body': 'receptor_ligand',
 'monomer_name': 'predicted'}

## A bunch of other features of the `Structure` class are illustrated below

In [29]:
apo_L.atom_array[0:10]

array([
	Atom(np.array([36.778, 16.379, 60.821], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="N", element="N"),
	Atom(np.array([37.219, 15.836, 59.542], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="CA", element="C"),
	Atom(np.array([36.18 , 16.079, 58.452], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="C", element="C"),
	Atom(np.array([36.532, 16.338, 57.297], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="O", element="O"),
	Atom(np.array([37.509, 14.339, 59.666], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="CB", element="C"),
	Atom(np.array([38.703, 14.01 , 60.548], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="CG", element="C"),
	Atom(np.array([38.916, 12.517, 60.706], dtype=float32), chain_id="L", 

In [30]:
apo_L.coords[0:10]

array([[36.778, 16.379, 60.821],
       [37.219, 15.836, 59.542],
       [36.18 , 16.079, 58.452],
       [36.532, 16.338, 57.297],
       [37.509, 14.339, 59.666],
       [38.703, 14.01 , 60.548],
       [38.916, 12.517, 60.706],
       [37.981, 11.727, 60.572],
       [40.151, 12.123, 60.994],
       [34.852, 15.989, 58.853]], dtype=float32)

In [31]:
apo_L.residue_names

['ALA',
 'ARG',
 'ASN',
 'ASP',
 'CYS',
 'GLN',
 'GLU',
 'GLY',
 'HIS',
 'ILE',
 'LEU',
 'LYS',
 'MET',
 'PHE',
 'PRO',
 'SER',
 'THR',
 'TRP',
 'VAL']

In [32]:
apo_L.sequence

'QPVNQPILAAAQSLHEATKWSSKGNDIIAAAKRMALLMAEMSRLVRGGSGTKRALIQCAKDIAKASDEVTRLAKEVAKQCTDKRIRTNLLQVCERIPTISTQLKILSTVKATMLGRTNISDEESEQATEMLVHNAQNLMQSVKETVREAEAASIKIRTDAGFTLRWVRKTPW'

In [33]:
apo_L.atom_array.res_name


array(['GLN', 'GLN', 'GLN', ..., 'TRP', 'TRP', 'TRP'], dtype='<U5')

In [34]:
apo_L.atom_array[apo_L.backbone_mask][0:10]


array([
	Atom(np.array([36.778, 16.379, 60.821], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="N", element="N"),
	Atom(np.array([37.219, 15.836, 59.542], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="CA", element="C"),
	Atom(np.array([36.18 , 16.079, 58.452], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="C", element="C"),
	Atom(np.array([34.852, 15.989, 58.853], dtype=float32), chain_id="L", res_id=3, ins_code="", res_name="PRO", hetero=False, atom_name="N", element="N"),
	Atom(np.array([33.88 , 16.232, 57.771], dtype=float32), chain_id="L", res_id=3, ins_code="", res_name="PRO", hetero=False, atom_name="CA", element="C"),
	Atom(np.array([33.954, 17.664, 57.252], dtype=float32), chain_id="L", res_id=3, ins_code="", res_name="PRO", hetero=False, atom_name="C", element="C"),
	Atom(np.array([33.798, 17.833, 55.943], dtype=float32), chain_id="L", r

In [35]:
apo_L.atom_array[apo_L.calpha_mask][0:10]


array([
	Atom(np.array([37.219, 15.836, 59.542], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="CA", element="C"),
	Atom(np.array([33.88 , 16.232, 57.771], dtype=float32), chain_id="L", res_id=3, ins_code="", res_name="PRO", hetero=False, atom_name="CA", element="C"),
	Atom(np.array([33.848, 19.155, 55.33 ], dtype=float32), chain_id="L", res_id=4, ins_code="", res_name="VAL", hetero=False, atom_name="CA", element="C"),
	Atom(np.array([30.979, 21.004, 53.668], dtype=float32), chain_id="L", res_id=5, ins_code="", res_name="ASN", hetero=False, atom_name="CA", element="C"),
	Atom(np.array([31.94 , 20.151, 50.049], dtype=float32), chain_id="L", res_id=6, ins_code="", res_name="GLN", hetero=False, atom_name="CA", element="C"),
	Atom(np.array([30.278, 23.116, 48.249], dtype=float32), chain_id="L", res_id=7, ins_code="", res_name="PRO", hetero=False, atom_name="CA", element="C"),
	Atom(np.array([32.112, 25.664, 50.491], dtype=float32), chain_id="L

In [36]:
apo_L.chain_sequence


{'L': ['Q',
  'P',
  'V',
  'N',
  'Q',
  'P',
  'I',
  'L',
  'A',
  'A',
  'A',
  'Q',
  'S',
  'L',
  'H',
  'E',
  'A',
  'T',
  'K',
  'W',
  'S',
  'S',
  'K',
  'G',
  'N',
  'D',
  'I',
  'I',
  'A',
  'A',
  'A',
  'K',
  'R',
  'M',
  'A',
  'L',
  'L',
  'M',
  'A',
  'E',
  'M',
  'S',
  'R',
  'L',
  'V',
  'R',
  'G',
  'G',
  'S',
  'G',
  'T',
  'K',
  'R',
  'A',
  'L',
  'I',
  'Q',
  'C',
  'A',
  'K',
  'D',
  'I',
  'A',
  'K',
  'A',
  'S',
  'D',
  'E',
  'V',
  'T',
  'R',
  'L',
  'A',
  'K',
  'E',
  'V',
  'A',
  'K',
  'Q',
  'C',
  'T',
  'D',
  'K',
  'R',
  'I',
  'R',
  'T',
  'N',
  'L',
  'L',
  'Q',
  'V',
  'C',
  'E',
  'R',
  'I',
  'P',
  'T',
  'I',
  'S',
  'T',
  'Q',
  'L',
  'K',
  'I',
  'L',
  'S',
  'T',
  'V',
  'K',
  'A',
  'T',
  'M',
  'L',
  'G',
  'R',
  'T',
  'N',
  'I',
  'S',
  'D',
  'E',
  'E',
  'S',
  'E',
  'Q',
  'A',
  'T',
  'E',
  'M',
  'L',
  'V',
  'H',
  'N',
  'A',
  'Q',
  'N',
  'L',
  'M',
  'Q',
  'S',
  'V',
 

In [37]:
apo_complex.chain_sequence

{'L': ['Q',
  'P',
  'V',
  'N',
  'Q',
  'P',
  'I',
  'L',
  'A',
  'A',
  'A',
  'Q',
  'S',
  'L',
  'H',
  'E',
  'A',
  'T',
  'K',
  'W',
  'S',
  'S',
  'K',
  'G',
  'N',
  'D',
  'I',
  'I',
  'A',
  'A',
  'A',
  'K',
  'R',
  'M',
  'A',
  'L',
  'L',
  'M',
  'A',
  'E',
  'M',
  'S',
  'R',
  'L',
  'V',
  'R',
  'G',
  'G',
  'S',
  'G',
  'T',
  'K',
  'R',
  'A',
  'L',
  'I',
  'Q',
  'C',
  'A',
  'K',
  'D',
  'I',
  'A',
  'K',
  'A',
  'S',
  'D',
  'E',
  'V',
  'T',
  'R',
  'L',
  'A',
  'K',
  'E',
  'V',
  'A',
  'K',
  'Q',
  'C',
  'T',
  'D',
  'K',
  'R',
  'I',
  'R',
  'T',
  'N',
  'L',
  'L',
  'Q',
  'V',
  'C',
  'E',
  'R',
  'I',
  'P',
  'T',
  'I',
  'S',
  'T',
  'Q',
  'L',
  'K',
  'I',
  'L',
  'S',
  'T',
  'V',
  'K',
  'A',
  'T',
  'M',
  'L',
  'G',
  'R',
  'T',
  'N',
  'I',
  'S',
  'D',
  'E',
  'E',
  'S',
  'E',
  'Q',
  'A',
  'T',
  'E',
  'M',
  'L',
  'V',
  'H',
  'N',
  'A',
  'Q',
  'N',
  'L',
  'M',
  'Q',
  'S',
  'V',
 

In [38]:
apo_L.sequence

'QPVNQPILAAAQSLHEATKWSSKGNDIIAAAKRMALLMAEMSRLVRGGSGTKRALIQCAKDIAKASDEVTRLAKEVAKQCTDKRIRTNLLQVCERIPTISTQLKILSTVKATMLGRTNISDEESEQATEMLVHNAQNLMQSVKETVREAEAASIKIRTDAGFTLRWVRKTPW'