In [1]:
import numpy as np
import torch

In [2]:
atom_types = [
    'N', 'CA', 'C', 'CB', 'O', 'CG', 'CG1', 'CG2', 'OG', 'OG1', 'SG', 'CD',
    'CD1', 'CD2', 'ND1', 'ND2', 'OD1', 'OD2', 'SD', 'CE', 'CE1', 'CE2', 'CE3',
    'NE', 'NE1', 'NE2', 'OE1', 'OE2', 'CH2', 'NH1', 'NH2', 'OH', 'CZ', 'CZ2',
    'CZ3', 'NZ', 'OXT'
]

In [3]:
restypes = [
    'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P',
    'S', 'T', 'W', 'Y', 'V'
]

In [4]:
restype_1to3 = {
    'A': 'ALA',
    'R': 'ARG',
    'N': 'ASN',
    'D': 'ASP',
    'C': 'CYS',
    'Q': 'GLN',
    'E': 'GLU',
    'G': 'GLY',
    'H': 'HIS',
    'I': 'ILE',
    'L': 'LEU',
    'K': 'LYS',
    'M': 'MET',
    'F': 'PHE',
    'P': 'PRO',
    'S': 'SER',
    'T': 'THR',
    'W': 'TRP',
    'Y': 'TYR',
    'V': 'VAL',
}
restype_3to1 = {v: k for k, v in restype_1to3.items()}

In [5]:
#alphafold分出来的4类片段
chi_angles_atoms = {
    'ALA': [],
    # Chi5 in arginine is always 0 +- 5 degrees, so ignore it.
    'ARG': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'],
            ['CB', 'CG', 'CD', 'NE'], ['CG', 'CD', 'NE', 'CZ']],
    'ASN': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'OD1']],
    'ASP': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'OD1']],
    'CYS': [['N', 'CA', 'CB', 'SG']],
    'GLN': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'],
            ['CB', 'CG', 'CD', 'OE1']],
    'GLU': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'],
            ['CB', 'CG', 'CD', 'OE1']],
    'GLY': [],
    'HIS': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'ND1']],
    'ILE': [['N', 'CA', 'CB', 'CG1'], ['CA', 'CB', 'CG1', 'CD1']],
    'LEU': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']],
    'LYS': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'],
            ['CB', 'CG', 'CD', 'CE'], ['CG', 'CD', 'CE', 'NZ']],
    'MET': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'SD'],
            ['CB', 'CG', 'SD', 'CE']],
    'PHE': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']],
    'PRO': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD']],
    'SER': [['N', 'CA', 'CB', 'OG']],
    'THR': [['N', 'CA', 'CB', 'OG1']],
    'TRP': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']],
    'TYR': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']],
    'VAL': [['N', 'CA', 'CB', 'CG1']],
}


In [6]:
tankbind_src_folder_path = "../tankbind/"
import sys
sys.path.insert(0, tankbind_src_folder_path)
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
import time
import gvp
import gvp.data
from feature_utils import get_protein_feature
pdb_name = "6scm"
cp_name = "SOS1"
# csv_name with .csv
csv_name = "SOS1-ID-SMILES.csv"
right_pocket = "pocket_4"
distinguish_by_timestamp = True
base_pre = f"./NCIVS/"
timetag = time.strftime("%m%d%H%M")

if distinguish_by_timestamp:
    pre = f"{base_pre}/{cp_name}-{pdb_name}-{timetag}"
else:
    pre = f"{base_pre}/{cp_name}-{pdb_name}"

os.system(f"mkdir -p {pre}")
os.system(f"rm -rf {pre}/sdfs")
os.system(f"mkdir -p {pre}/sdfs")
os.system(f"rm -rf {pre}/PDBs")
os.system(f"mkdir -p {pre}/PDBs")
os.system(f"rm -rf {pre}/p2rank")
os.system(f"mkdir -p {pre}/p2rank")
os.system(f"cp ./inputs/{csv_name} {pre}")
os.system(f"cp ./inputs/{pdb_name}.pdb {pre}")

proteinName = pdb_name
proteinFile = f"{pre}/{proteinName}.pdb"

In [7]:
from Bio.PDB import PDBParser
from feature_utils import get_clean_res_list
parser = PDBParser(QUIET=True)
protein_dict = {}
proteinName = pdb_name
proteinFile = f"{pre}/{proteinName}.pdb"
s = parser.get_structure("example", proteinFile)
res_list = list(s.get_residues())
clean_res_list = get_clean_res_list(res_list, ensure_ca_exist=True)  ##保证['CA']在残基内部

In [8]:
res_list = [res for res in clean_res_list if (('N' in res) and ('CA' in res) and ('C' in res) and ('O' in res))]

In [9]:
## define x1,x2,x3,x4
def dihedral_angle(a, b, c, d):
    """return the dihedral angle of plan abc and bcd"""
    v1 = a - b
    v2 = b - c
    v3 = d - c

    c1 = np.cross(v1, v2)
    c2 = np.cross(v3, v2)
    c3 = np.cross(c2, c1)

    v2_mag = np.linalg.norm(v2)
    return np.arctan2(np.dot(c3, v2), v2_mag * np.dot(c1, c2))

def sidechain_angle(res_list, chi_angles_atoms):
    """
    return 
    1. list of sidechain angles chi1,2,3,4, shape=[len(res_list, 4)]
    2. the ids of res which is not complete
    """
    double_res_list = ['HIS', 'ASN', 'GLN', 'TRP', 'TYR']
    dihedral_angle_list_t = []
    broken_res_id = []
    for res in res_list:
        dihedral_angle_list = []
        chi_angles_list = chi_angles_atoms[res.resname]
        chi_angles_list_len = len(chi_angles_list)
        if chi_angles_list_len != 0:
            for group in chi_angles_list:
                if (group[0] not in res) or (group[1] not in res) or (group[2] not in res) or (group[3] not in res):
                    # print(res.get_full_id()[3][1])
                    broken_res_id.append(res.get_full_id()[3][1])
                    continue
                angle_x = dihedral_angle(res[group[0]].coord, res[group[1]].coord,\
                                                res[group[2]].coord, res[group[3]].coord)
                dihedral_angle_list.append(angle_x)
        if len(dihedral_angle_list) != 4:
            for i in range(4 - len(dihedral_angle_list)):
                dihedral_angle_list.append(0)
        if res.get_full_id()[3][1] not in broken_res_id:
            if res.resname in double_res_list:
                dihedral_angle_list_t.append(dihedral_angle_list) # double res according to the sidec num
                dihedral_angle_list_t.append(dihedral_angle_list)
            else:
                dihedral_angle_list_t.append(dihedral_angle_list) 
    broken_res_id = list(set(broken_res_id))
    broken_res_id.sort()
    return dihedral_angle_list_t, broken_res_id

In [10]:
dihedral_angle_list_t, broken_res_id = sidechain_angle(res_list, chi_angles_atoms)

In [11]:
len(dihedral_angle_list_t)

535

In [12]:
res_list[0].get_full_id()

('example', 0, 'A', (' ', 564, ' '))

In [15]:
def clean_broken_res(res_list, broken_res_id):
    """remove broken res from broken_res_id and double res according to the sidec num"""
    clean_broken_res = []
    clean_broken_res_double = []
    double_res_list = ['HIS', 'ASN', 'GLN', 'TRP', 'TYR']
    for res in res_list:
        if res.get_full_id()[3][1] not in broken_res_id:
            clean_broken_res.append(res)
            if res.resname in double_res_list:
                clean_broken_res_double.append(res)
                clean_broken_res_double.append(res)
            else:
                clean_broken_res_double.append(res)
    return clean_broken_res, clean_broken_res_double
clean_broken_res_l, clean_broken_res_double = clean_broken_res(res_list, broken_res_id)

In [16]:
sidec_dict = {
    'ALA': ['N'], 
    'CYS': ['SG'], 
    'ASP': ['OD2'], 
    'GLU': ['OE2'], 
    'PHE': ['CE1','CE2','CD1','CD2','CG','CZ'], 
    'GLY': ['N'], 
    'HIS': [['NE2'],['ND1']],   
    'ILE': ['CD1'], 
    'LYS': ['NZ'], 
    'LEU': ['CG'], 
    'MET': ['SD'], 
    'ASN': [['OD1'],['ND2']], 
    'PRO': ['N'], 
    'GLN': [['OE1'],['NE2']], 
    'ARG': ['CZ'], 
    'SER': ['OG'], 
    'THR': ['OG1'], 
    'VAL': ['CB'], 
    'TRP': [['NE1'], ['CE2','CD2','CE3','CZ3','CH2','CZ2']], 
    'TYR': [['OH'], ['CE1','CE2','CD1','CD2','CG','CZ']]
}

In [55]:
res_list[10]

<Residue VAL het=  resseq=574 icode= >

In [18]:
# which requires name, seq, and a list of shape N * 4 * 3
structure = {}
structure['name'] = "placeholder"
structure['seq'] = "".join([restype_3to1.get(res.resname) for res in clean_broken_res_double])
print(structure['seq'])
coords = []
for res in clean_broken_res_double:
    res_coords = []
    for atom in [res['N'], res['CA'], res['C'], res['O']]:
        res_coords.append(list(atom.coord))
    coords.append(res_coords)
    # print(coords)
structure['coords'] = coords
structure['dihedral_angle'] = dihedral_angle_list_t

EQQMRLPSADVYYRFAEPDSEENNIIFEENNMIPIIKAGTVIKLIERLTYYHHMYYADPNNFVRTFLTTYYRSFCKPQQELLSLIIERFEIPEPEPTEADRIAIENNGDQQPLSAELKRFRKEYYIQQPVQQLRVLNNVCRHHWWVEHHHHFYYDFERDAYYLLQQRMEEFIGTVRGKAMKKWWVESITKIIQQRKKIAFQQSSPPTVEWWHHISRPGHHIETFDLLTLHHPIEIARQQLTLLESDLYYRAVQQPSELVGSVWWTKEDKEINNSPNNLLKMIRHHTTNNLTLWWFEKCIVETENNLEERVAVVSRIIEILQQVFQQELNNNNFNNGVLEVVSAMNNSSPVYYRLDHHTFEQQIPSRQQKKILEEAHHELSEDHHYYKKYYLAKLRSINNPPCVPFFGIYYLTNNILKTEEGNNPEVLKRHHGKELINNFSKRRKVAEITGEIQQYYQQNNQQPYYCLRVESDIKRFFENNLNNPMGNNSMEKEFTDYYLFNNKSLEIEPRPKPLPRFPKKYYSYYPLKSPGVRPS


In [19]:
len(structure['seq']), len(coords)

(535, 535)

In [20]:
def return_coord(res, res_sidec_list):
    """
    return the coordinates of sidec
    if one atom, then return coord
    if 6 atoms, then it's a Aromatic ring, return the mean coord
    """
    if len(res_sidec_list) == 1:
        return res[res_sidec_list[0]].coord
    elif len(res_sidec_list) == 6:
        sidec_coord = [res[res_sidec_list[i]].coord for i in range(6)]
        return np.mean(np.concatenate(sidec_coord).reshape(-1,3), axis=0)

def sidec_coord(res_list, sidec_dict):
    """define the pos of sidec according to sidec_dict"""
    sidec_coords = []
    for res in res_list:
        res_sidec_list = sidec_dict[res.resname]
        if len(res_sidec_list) == 2:
            sidec_coord_1 = return_coord(res, res_sidec_list[0])
            sidec_coord_2 = return_coord(res, res_sidec_list[1])
            sidec_coords.append(sidec_coord_1)
            sidec_coords.append(sidec_coord_2)
        else:
            sidec_coords.append(return_coord(res, res_sidec_list))
    return sidec_coords
structure['sidec_coord'] = sidec_coord(clean_broken_res_l, sidec_dict)

In [34]:
np.array(protein['sidec_coord']).shape

(535, 3)

In [22]:
device = 'cpu'
protein = structure
dihedral_angle = torch.as_tensor(protein['dihedral_angle'], device=device, dtype=torch.float32)

In [23]:
dihedral_angle

tensor([[-3.0039,  2.5888, -0.6272,  0.0000],
        [-1.4306, -3.0735, -1.9133,  0.0000],
        [-1.4306, -3.0735, -1.9133,  0.0000],
        ...,
        [-1.0326, -3.0840, -0.4544,  1.6855],
        [-0.4541,  0.6580,  0.0000,  0.0000],
        [-0.8932,  0.0000,  0.0000,  0.0000]])

In [24]:
import json
import numpy as np
import tqdm, random
import torch, math
import torch.utils.data as data
import torch.nn.functional as F
import torch_geometric
import torch_cluster
def _normalize(tensor, dim=-1):
    '''
    Normalizes a `torch.Tensor` along dimension `dim` without `nan`s.
    '''
    return torch.nan_to_num(
        torch.div(tensor, torch.norm(tensor, dim=dim, keepdim=True)))
def _dihedrals(X, eps=1e-7):
        # From https://github.com/jingraham/neurips19-graph-protein-design
        
        X = torch.reshape(X[:, :3], [3*X.shape[0], 3])
        dX = X[1:] - X[:-1] #错位相减
        U = _normalize(dX, dim=-1)
        u_2 = U[:-2]
        u_1 = U[1:-1]
        u_0 = U[2:]

        # Backbone normals
        n_2 = _normalize(torch.cross(u_2, u_1), dim=-1)
        n_1 = _normalize(torch.cross(u_1, u_0), dim=-1)

        # Angle between normals
        cosD = torch.sum(n_2 * n_1, -1)
        cosD = torch.clamp(cosD, -1 + eps, 1 - eps)
        D = torch.sign(torch.sum(u_2 * n_1, -1)) * torch.acos(cosD)

        # This scheme will remove phi[0], psi[-1], omega[-1]
        D = F.pad(D, [1, 2]) 
        D = torch.reshape(D, [-1, 3])
        # Lift angle representations to the circle
        D_features = torch.cat([torch.cos(D), torch.sin(D)], 1)
        return D_features
coords = torch.as_tensor(protein['coords'], 
                         device=device, dtype=torch.float32)
mask = torch.isfinite(coords.sum(dim=(1,2)))
coords[~mask] = np.inf
dihedrals = _dihedrals(coords)

In [25]:
side_dihedrals = torch.cat([torch.cos(dihedral_angle), torch.sin(dihedral_angle)], 1)
dihedrals_t = torch.cat([dihedrals, side_dihedrals], 1)

In [26]:
dihedrals_t.shape

torch.Size([535, 14])

In [32]:
sidec_coord = torch.as_tensor(np.array(protein['sidec_coord']), device=device, dtype=torch.float32)

In [33]:
sidec_coord

tensor([[ -0.1440,  22.4280, -16.0070],
        [  4.6090,  26.3240, -18.1710],
        [  6.5540,  25.8790, -17.2000],
        ...,
        [ -9.9740, -48.9300, -23.1340],
        [ -6.4270, -44.9140, -20.8420],
        [ -3.2900, -49.2070, -19.0070]])

In [95]:
def _sidechains(X):
    n, origin, c = X[:, 0], X[:, 1], X[:, 2]
    c, n = _normalize(c - origin), _normalize(n - origin)
    bisector = _normalize(c + n)
    perp = _normalize(torch.cross(c, n))
    vec = -bisector * math.sqrt(1 / 3) - perp * math.sqrt(2 / 3)
    return vec 

def _sidechains_qsar(X, sidec_coord):
    # direction vector from center CA to sideC
    origin = X[:, 1]
    vec = _normalize(sidec_coord - origin)
    return vec 
def _orientations(X):
    forward = _normalize(X[1:] - X[:-1])
    backward = _normalize(X[:-1] - X[1:])
    forward = F.pad(forward, [0, 0, 0, 1])
    backward = F.pad(backward, [0, 0, 1, 0])
    return torch.cat([forward.unsqueeze(-2), backward.unsqueeze(-2)], -2)

In [102]:
orientations = _orientations(X_ca)
sidechains_qsar = _sidechains_qsar(coords, sidec_coord)
torch.cat([orientations, sidechains_qsar.unsqueeze(-2)], dim=-2).shape

torch.Size([535, 3, 3])

In [63]:
_sidechains_qsar(coords, sidec_coord)[10]

tensor([-0.1046,  0.6060,  0.7885])

In [64]:
_sidechains(coords)[10]

tensor([-0.2033,  0.6627,  0.7207])

In [65]:
def _rbf(D, D_min=0., D_max=20., D_count=16, device='cpu'):
    '''
    From https://github.com/jingraham/neurips19-graph-protein-design
    
    Returns an RBF embedding of `torch.Tensor` `D` along a new axis=-1.
    That is, if `D` has shape [...dims], then the returned tensor will have
    shape [...dims, D_count].
    '''
    D_mu = torch.linspace(D_min, D_max, D_count, device=device)
    D_mu = D_mu.view([1, -1])
    D_sigma = (D_max - D_min) / D_count
    D_expand = torch.unsqueeze(D, -1)

    RBF = torch.exp(-((D_expand - D_mu) / D_sigma) ** 2)
    return RBF

In [66]:
import torch_cluster
X_ca = coords[:, 1]
edge_index = torch_cluster.knn_graph(X_ca, k=30)
E_vectors = X_ca[edge_index[0]] - X_ca[edge_index[1]]

In [70]:
rbf = _rbf(E_vectors.norm(dim=-1), D_count=16, device=device)
rbf.shape

torch.Size([16050, 16])

In [71]:
E_vectors_side_1 = sidec_coord[edge_index[0]] - X_ca[edge_index[0]]
E_vectors_side_2 = sidec_coord[edge_index[1]] - X_ca[edge_index[1]]

In [74]:
rbf_side_1 = _rbf(E_vectors_side_1.norm(dim=-1), D_count=16, device=device)
rbf_side_2 = _rbf(E_vectors_side_2.norm(dim=-1), D_count=16, device=device)

In [78]:
def _positional_embeddings(edge_index, 
                               num_embeddings=None,
                               period_range=[2, 1000]):
    # From https://github.com/jingraham/neurips19-graph-protein-design
    num_embeddings = 16
    d = edge_index[0] - edge_index[1]

    frequency = torch.exp(
        torch.arange(0, num_embeddings, 2, dtype=torch.float32, device=device)
        * -(np.log(10000.0) / num_embeddings)
    )
    angles = d.unsqueeze(-1) * frequency
    E = torch.cat((torch.cos(angles), torch.sin(angles)), -1)
    return E
pos_embeddings = _positional_embeddings(edge_index)

In [79]:
torch.cat([rbf, pos_embeddings, rbf_side_1, rbf_side_2], dim=-1).shape

torch.Size([16050, 64])

In [94]:
E_vectors_t = torch.cat([E_vectors, E_vectors_side_1, E_vectors_side_2], dim=-1)
_normalize(E_vectors_t).view(-1,3,3).shape

torch.Size([16050, 3, 3])

In [92]:
torch.cat([_normalize(E_vectors), _normalize(E_vectors_side_1), _normalize(E_vectors_side_2)], dim=-1).unsqueeze(-2).shape

torch.Size([16050, 1, 9])

In [88]:
torch.cat([_normalize(E_vectors), _normalize(E_vectors_side_1), _normalize(E_vectors_side_2)], dim=-1).shape

torch.Size([16050, 9])

In [None]:
structure['coords'] = coords
torch.set_num_threads(1)        # this reduce the overhead, and speed up the process for me.
dataset = gvp.data.ProteinGraphDataset([structure])
protein = dataset[0]
x = (protein.x, protein.seq, protein.node_s, protein.node_v, protein.edge_index, protein.edge_s, protein.edge_v)

In [91]:
def dihedral_angle_vec(a, b, c, d):
    v1 = a - b
    v2 = b - c
    v3 = d - c

    c1 = v1.cross(v2)
    c2 = v3.cross(v2)
    c3 = c2.cross(c1)

    v2_mag = v2.norm()
    return np.arctan2(c3.dot(v2), v2_mag * c1.dot(c2))

In [94]:
a = np.array([1.,0,0])
b = np.array([1.,1.,0])
c = np.array([0,1.,0])
d = np.array([0,0,1.])
dihedral_angle(a, b, c, d) * 180 / np.pi

1.0
1.0


45.0

In [33]:
a = torch.tensor([1,0,0], dtype=torch.float)
b = torch.tensor([1,1,0], dtype=torch.float)
c = torch.tensor([0,1,0], dtype=torch.float)
d = torch.tensor([0,0,1], dtype=torch.float)
dihedral_angle(a, b, c, d) * 180 / np.pi

tensor(1.)
tensor(1.)


tensor(45.)

In [13]:
sidec = dict()
sidec['ALA'] = ['N']
sidec['GLY'] = ['N']
sidec['LLE'] = ['CD1']
sidec['LEU'] = ['CG']
sidec['PRO'] = ['N']
sidec['VAL'] = ['CB']
sidec['PHE'] = ['CE1','CE2','CD1','CD2','CG','CZ']
sidec['TRP'] = [['NE1'], ['CE2','CD2','CE3','CZ3','CH2','CZ2']]
sidec['TYR'] = [['OH'], ['CE1','CE2','CD1','CD2','CG','CZ']]
sidec['ASP'] = ['OD2']
sidec['GLU'] = ['OE2']
sidec['ARG'] = ['CZ']
sidec['HIS'] = [['NE2'],['ND1']]
sidec['LYS'] = ['NZ']
sidec['SER'] = ['OG']
sidec['THR'] = ['OG1']
sidec['CYS'] = ['SG']
sidec['MET'] = ['SD']
sidec['ASN'] = [['OD1'],['ND2']]
sidec['GLN'] = [['OE1'],['NE2']]

In [14]:
##check sidec
plus_sidec = ['TRP', 'TYR', 'HIS', 'ASN', 'GLN']
for k, v_list in sidec.items():
    if k in plus_sidec:
        for i in v_list:
            for j in i:
                if j not in atom_types:
                    print(k, j)
    else:
        for i in v_list:
            if i not in atom_types:
                    print(k, i)
        

In [1]:
def _make_rigid_transformation_4x4(ex, ey, translation):
    """Create a rigid 4x4 transformation matrix from two axes and transl."""
    # Normalize ex.
    ex_normalized = ex / np.linalg.norm(ex)

    # make ey perpendicular to ex
    ey_normalized = ey - np.dot(ey, ex_normalized) * ex_normalized
    ey_normalized /= np.linalg.norm(ey_normalized)

    # compute ez as cross product
    eznorm = np.cross(ex_normalized, ey_normalized)
    m = np.stack([ex_normalized, ey_normalized, eznorm, translation]).transpose()
    m = np.concatenate([m, [[0., 0., 0., 1.]]], axis=0)
    return m

In [5]:
rigid_group_atom_positions = {
    'ALA': [
        ['N', 0, (-0.525, 1.363, 0.000)],
        ['CA', 0, (0.000, 0.000, 0.000)],
        ['C', 0, (1.526, -0.000, -0.000)],
        ['CB', 0, (-0.529, -0.774, -1.205)],
        ['O', 3, (0.627, 1.062, 0.000)],
    ],
    'ARG': [
        ['N', 0, (-0.524, 1.362, -0.000)],
        ['CA', 0, (0.000, 0.000, 0.000)],
        ['C', 0, (1.525, -0.000, -0.000)],
        ['CB', 0, (-0.524, -0.778, -1.209)],
        ['O', 3, (0.626, 1.062, 0.000)],
        ['CG', 4, (0.616, 1.390, -0.000)],
        ['CD', 5, (0.564, 1.414, 0.000)],
        ['NE', 6, (0.539, 1.357, -0.000)],
        ['NH1', 7, (0.206, 2.301, 0.000)],
        ['NH2', 7, (2.078, 0.978, -0.000)],
        ['CZ', 7, (0.758, 1.093, -0.000)],
    ],
    'ASN': [
        ['N', 0, (-0.536, 1.357, 0.000)],
        ['CA', 0, (0.000, 0.000, 0.000)],
        ['C', 0, (1.526, -0.000, -0.000)],
        ['CB', 0, (-0.531, -0.787, -1.200)],
        ['O', 3, (0.625, 1.062, 0.000)],
        ['CG', 4, (0.584, 1.399, 0.000)],
        ['ND2', 5, (0.593, -1.188, 0.001)],
        ['OD1', 5, (0.633, 1.059, 0.000)],
    ],
    'ASP': [
        ['N', 0, (-0.525, 1.362, -0.000)],
        ['CA', 0, (0.000, 0.000, 0.000)],
        ['C', 0, (1.527, 0.000, -0.000)],
        ['CB', 0, (-0.526, -0.778, -1.208)],
        ['O', 3, (0.626, 1.062, -0.000)],
        ['CG', 4, (0.593, 1.398, -0.000)],
        ['OD1', 5, (0.610, 1.091, 0.000)],
        ['OD2', 5, (0.592, -1.101, -0.003)],
    ],
    'CYS': [
        ['N', 0, (-0.522, 1.362, -0.000)],
        ['CA', 0, (0.000, 0.000, 0.000)],
        ['C', 0, (1.524, 0.000, 0.000)],
        ['CB', 0, (-0.519, -0.773, -1.212)],
        ['O', 3, (0.625, 1.062, -0.000)],
        ['SG', 4, (0.728, 1.653, 0.000)],
    ],
    'GLN': [
        ['N', 0, (-0.526, 1.361, -0.000)],
        ['CA', 0, (0.000, 0.000, 0.000)],
        ['C', 0, (1.526, 0.000, 0.000)],
        ['CB', 0, (-0.525, -0.779, -1.207)],
        ['O', 3, (0.626, 1.062, -0.000)],
        ['CG', 4, (0.615, 1.393, 0.000)],
        ['CD', 5, (0.587, 1.399, -0.000)],
        ['NE2', 6, (0.593, -1.189, -0.001)],
        ['OE1', 6, (0.634, 1.060, 0.000)],
    ],
    'GLU': [
        ['N', 0, (-0.528, 1.361, 0.000)],
        ['CA', 0, (0.000, 0.000, 0.000)],
        ['C', 0, (1.526, -0.000, -0.000)],
        ['CB', 0, (-0.526, -0.781, -1.207)],
        ['O', 3, (0.626, 1.062, 0.000)],
        ['CG', 4, (0.615, 1.392, 0.000)],
        ['CD', 5, (0.600, 1.397, 0.000)],
        ['OE1', 6, (0.607, 1.095, -0.000)],
        ['OE2', 6, (0.589, -1.104, -0.001)],
    ],
    'GLY': [
        ['N', 0, (-0.572, 1.337, 0.000)],
        ['CA', 0, (0.000, 0.000, 0.000)],
        ['C', 0, (1.517, -0.000, -0.000)],
        ['O', 3, (0.626, 1.062, -0.000)],
    ],
    'HIS': [
        ['N', 0, (-0.527, 1.360, 0.000)],
        ['CA', 0, (0.000, 0.000, 0.000)],
        ['C', 0, (1.525, 0.000, 0.000)],
        ['CB', 0, (-0.525, -0.778, -1.208)],
        ['O', 3, (0.625, 1.063, 0.000)],
        ['CG', 4, (0.600, 1.370, -0.000)],
        ['CD2', 5, (0.889, -1.021, 0.003)],
        ['ND1', 5, (0.744, 1.160, -0.000)],
        ['CE1', 5, (2.030, 0.851, 0.002)],
        ['NE2', 5, (2.145, -0.466, 0.004)],
    ],
    'ILE': [
        ['N', 0, (-0.493, 1.373, -0.000)],
        ['CA', 0, (0.000, 0.000, 0.000)],
        ['C', 0, (1.527, -0.000, -0.000)],
        ['CB', 0, (-0.536, -0.793, -1.213)],
        ['O', 3, (0.627, 1.062, -0.000)],
        ['CG1', 4, (0.534, 1.437, -0.000)],
        ['CG2', 4, (0.540, -0.785, -1.199)],
        ['CD1', 5, (0.619, 1.391, 0.000)],
    ],
    'LEU': [
        ['N', 0, (-0.520, 1.363, 0.000)],
        ['CA', 0, (0.000, 0.000, 0.000)],
        ['C', 0, (1.525, -0.000, -0.000)],
        ['CB', 0, (-0.522, -0.773, -1.214)],
        ['O', 3, (0.625, 1.063, -0.000)],
        ['CG', 4, (0.678, 1.371, 0.000)],
        ['CD1', 5, (0.530, 1.430, -0.000)],
        ['CD2', 5, (0.535, -0.774, 1.200)],
    ],
    'LYS': [
        ['N', 0, (-0.526, 1.362, -0.000)],
        ['CA', 0, (0.000, 0.000, 0.000)],
        ['C', 0, (1.526, 0.000, 0.000)],
        ['CB', 0, (-0.524, -0.778, -1.208)],
        ['O', 3, (0.626, 1.062, -0.000)],
        ['CG', 4, (0.619, 1.390, 0.000)],
        ['CD', 5, (0.559, 1.417, 0.000)],
        ['CE', 6, (0.560, 1.416, 0.000)],
        ['NZ', 7, (0.554, 1.387, 0.000)],
    ],
    'MET': [
        ['N', 0, (-0.521, 1.364, -0.000)],
        ['CA', 0, (0.000, 0.000, 0.000)],
        ['C', 0, (1.525, 0.000, 0.000)],
        ['CB', 0, (-0.523, -0.776, -1.210)],
        ['O', 3, (0.625, 1.062, -0.000)],
        ['CG', 4, (0.613, 1.391, -0.000)],
        ['SD', 5, (0.703, 1.695, 0.000)],
        ['CE', 6, (0.320, 1.786, -0.000)],
    ],
    'PHE': [
        ['N', 0, (-0.518, 1.363, 0.000)],
        ['CA', 0, (0.000, 0.000, 0.000)],
        ['C', 0, (1.524, 0.000, -0.000)],
        ['CB', 0, (-0.525, -0.776, -1.212)],
        ['O', 3, (0.626, 1.062, -0.000)],
        ['CG', 4, (0.607, 1.377, 0.000)],
        ['CD1', 5, (0.709, 1.195, -0.000)],
        ['CD2', 5, (0.706, -1.196, 0.000)],
        ['CE1', 5, (2.102, 1.198, -0.000)],
        ['CE2', 5, (2.098, -1.201, -0.000)],
        ['CZ', 5, (2.794, -0.003, -0.001)],
    ],
    'PRO': [
        ['N', 0, (-0.566, 1.351, -0.000)],
        ['CA', 0, (0.000, 0.000, 0.000)],
        ['C', 0, (1.527, -0.000, 0.000)],
        ['CB', 0, (-0.546, -0.611, -1.293)],
        ['O', 3, (0.621, 1.066, 0.000)],
        ['CG', 4, (0.382, 1.445, 0.0)],
        # ['CD', 5, (0.427, 1.440, 0.0)],
        ['CD', 5, (0.477, 1.424, 0.0)],  # manually made angle 2 degrees larger
    ],
    'SER': [
        ['N', 0, (-0.529, 1.360, -0.000)],
        ['CA', 0, (0.000, 0.000, 0.000)],
        ['C', 0, (1.525, -0.000, -0.000)],
        ['CB', 0, (-0.518, -0.777, -1.211)],
        ['O', 3, (0.626, 1.062, -0.000)],
        ['OG', 4, (0.503, 1.325, 0.000)],
    ],
    'THR': [
        ['N', 0, (-0.517, 1.364, 0.000)],
        ['CA', 0, (0.000, 0.000, 0.000)],
        ['C', 0, (1.526, 0.000, -0.000)],
        ['CB', 0, (-0.516, -0.793, -1.215)],
        ['O', 3, (0.626, 1.062, 0.000)],
        ['CG2', 4, (0.550, -0.718, -1.228)],
        ['OG1', 4, (0.472, 1.353, 0.000)],
    ],
    'TRP': [
        ['N', 0, (-0.521, 1.363, 0.000)],
        ['CA', 0, (0.000, 0.000, 0.000)],
        ['C', 0, (1.525, -0.000, 0.000)],
        ['CB', 0, (-0.523, -0.776, -1.212)],
        ['O', 3, (0.627, 1.062, 0.000)],
        ['CG', 4, (0.609, 1.370, -0.000)],
        ['CD1', 5, (0.824, 1.091, 0.000)],
        ['CD2', 5, (0.854, -1.148, -0.005)],
        ['CE2', 5, (2.186, -0.678, -0.007)],
        ['CE3', 5, (0.622, -2.530, -0.007)],
        ['NE1', 5, (2.140, 0.690, -0.004)],
        ['CH2', 5, (3.028, -2.890, -0.013)],
        ['CZ2', 5, (3.283, -1.543, -0.011)],
        ['CZ3', 5, (1.715, -3.389, -0.011)],
    ],
    'TYR': [
        ['N', 0, (-0.522, 1.362, 0.000)],
        ['CA', 0, (0.000, 0.000, 0.000)],
        ['C', 0, (1.524, -0.000, -0.000)],
        ['CB', 0, (-0.522, -0.776, -1.213)],
        ['O', 3, (0.627, 1.062, -0.000)],
        ['CG', 4, (0.607, 1.382, -0.000)],
        ['CD1', 5, (0.716, 1.195, -0.000)],
        ['CD2', 5, (0.713, -1.194, -0.001)],
        ['CE1', 5, (2.107, 1.200, -0.002)],
        ['CE2', 5, (2.104, -1.201, -0.003)],
        ['OH', 5, (4.168, -0.002, -0.005)],
        ['CZ', 5, (2.791, -0.001, -0.003)],
    ],
    'VAL': [
        ['N', 0, (-0.494, 1.373, -0.000)],
        ['CA', 0, (0.000, 0.000, 0.000)],
        ['C', 0, (1.527, -0.000, -0.000)],
        ['CB', 0, (-0.533, -0.795, -1.213)],
        ['O', 3, (0.627, 1.062, -0.000)],
        ['CG1', 4, (0.540, 1.429, -0.000)],
        ['CG2', 4, (0.533, -0.776, 1.203)],
    ],
}

In [None]:
s = parser.get_structure("example", proteinFile)
atom_list = list(s.get_atoms())
a = [[atom] for atom in atom_list if atom.get_full_id()[3][1] == 564]
a