In [5]:
import numpy as np
import os
import matplotlib.pyplot as plt
from Bio.PDB.PDBParser import PDBParser
import pickle

import py3Dmol

import urllib

In [6]:
af_pkl_path = 'T1123.pkl'

# pkl 파일 열기
with open(af_pkl_path, 'rb') as file:
    af_pkl = pickle.load(file)
    
print(f'Keys of af_pkl : \n {list(af_pkl.keys())}')

Keys of af_pkl : 
 ['aatype', 'between_segment_residues', 'domain_name', 'residue_index', 'seq_length', 'sequence', 'deletion_matrix_int', 'msa', 'num_alignments', 'msa_uniprot_accession_identifiers', 'msa_species_identifiers', 'template_aatype', 'template_all_atom_positions', 'template_domain_names', 'template_sequence', 'template_sum_probs', 'template_all_atom_mask']


In [7]:
def restype_refer_atoms(restype):
    atoms = []
    if restype   == 'ALA' : atoms = [0,1,2,3,4]
    elif restype == 'ARG' : atoms = [0,1,2,3,4,5,11,23,29,30,32]
    elif restype == 'ASN' : atoms = [0,1,2,3,4,5,15,16]
    elif restype == 'ASP' : atoms = [0,1,2,3,4,5,16,17]
    elif restype == 'CYS' : atoms = [0,1,2,3,4,10]
    elif restype == 'GLN' : atoms = [0,1,2,3,4,5,11,25,26]
    elif restype == 'GLU' : atoms = [0,1,2,3,4,5,11,26,27]
    elif restype == 'GLY' : atoms = [0,1,2,3]
    elif restype == 'HIS' : atoms = [0,1,2,3,4,5,13,14,20,25]
    elif restype == 'ILE' : atoms = [0,1,2,3,4,6,7,12]
    elif restype == 'LEU' : atoms = [0,1,2,3,4,5,12,13]
    elif restype == 'LYS' : atoms = [0,1,2,3,4,5,11,19,35]
    elif restype == 'MET' : atoms = [0,1,2,3,4,5,18,19]
    elif restype == 'PHE' : atoms = [0,1,2,3,4,5,12,13,20,21,32]
    elif restype == 'PRO' : atoms = [0,1,2,3,4,5,11]
    elif restype == 'SER' : atoms = [0,1,2,3,4,8]
    elif restype == 'THR' : atoms = [0,1,2,3,4,7,9]
    elif restype == 'TRP' : atoms = [0,1,2,3,4,5,12,13,21,22,24,28,33,34]
    elif restype == 'TYR' : atoms = [0,1,2,3,4,5,12,13,20,21,31,32]
    elif restype == 'VAL' : atoms = [0,1,2,3,4,6,7]
    
    return atoms

In [8]:
atom_types = {"N":0,"CA":1,"C":2,"CB":3,"O":4,"CG":5,"CG1":6,"CG2":7,"OG":8,"OG1":9,"SG":10,"CD":11,"CD1":12,"CD2":13,"ND1":14,"ND2":15,"OD1":16,"OD2":17,"SD":18,\
            "CE":19,"CE1":20,"CE2":21,"CE3":22,"NE":23,"NE1":24,"NE2":25,"OE1":26,"OE2":27,"CH2":28,"NH1":29,"NH2":30,"OH":31,"CZ":32,"CZ2":33,"CZ3":34,"NZ":35,"OXT":36}

In [9]:
def readPDB(pdb_path):
    parser = PDBParser(PERMISSIVE=1)
    structure = parser.get_structure('pdb', pdb_path)
    residues = {}
    for model_id in structure:
        for chain_id in model_id:
            chain = model_id[chain_id.id]
            for residue in chain_id:
                res_name = residue.resname.strip()
                res_id = residue.id[1]
                residues[res_id] = res_name
    return residues, chain  

def get_coordinates(final_residue, residues,chain):
    first_residue = list(residues.keys())[0]
    coord = np.zeros([final_residue,37, 3])
    coord_mask = np.zeros([final_residue, 37, 1])
    unexpected_atoms = {}
    
    for i in range(1, first_residue):
        unexpected_atoms[i] = {}
    
    def get_coordinates(atom):
        vec = atom.get_vector()
        return np.array([vec[0], vec[1], vec[2]])
    
    for residue in chain:
        res_num = residue.get_id()[1]-1
        if (res_num + 1 ) > final_residue: continue
        elif res_num < 0 : continue
        res_name = residue.resname
        unexpected_atoms[res_num+1] = {}
        for atom in residue:
            atom_id = atom.get_id()
            refer_atoms = restype_refer_atoms(res_name)
            if res_name == 'MET' and atom_id == 'SE': atom_id = 'SD'
            if atom_id not in atom_types.keys()         : unexpected_atoms[res_num+1][atom_id] = {}
            elif atom_types[atom_id] not in refer_atoms : unexpected_atoms[res_num+1][atom_id] = {}
            else :
                coord[res_num,atom_types[atom_id]] = get_coordinates(atom)
                coord_mask[res_num,atom_types[atom_id]] = True
    return np.array(coord),np.array(coord_mask),unexpected_atoms

In [12]:
pdb_path = '2zbf.pdb'
residues, chain = readPDB(pdb_path)

print(residues, chain)

{0: 'ACE', 1: 'MET', 2: 'GLU', 3: 'ALA', 4: 'ALA', 5: 'HIS', 6: 'SER', 7: 'LYS', 8: 'SER', 9: 'THR', 10: 'GLU', 11: 'GLU', 12: 'CYS', 13: 'LEU', 14: 'ALA', 15: 'TYR', 16: 'PHE', 17: 'GLY', 18: 'VAL', 19: 'SER', 20: 'GLU', 21: 'THR', 22: 'THR', 23: 'GLY', 24: 'LEU', 25: 'THR', 26: 'PRO', 27: 'ASP', 28: 'GLN', 29: 'VAL', 30: 'LYS', 31: 'ARG', 32: 'HIS', 33: 'LEU', 34: 'GLU', 35: 'LYS', 36: 'TYR', 37: 'GLY', 38: 'HIS', 39: 'ASN', 40: 'GLU', 41: 'LEU', 42: 'PRO', 43: 'ALA', 44: 'GLU', 45: 'GLU', 46: 'GLY', 47: 'LYS', 48: 'SER', 49: 'LEU', 50: 'TRP', 51: 'GLU', 52: 'LEU', 53: 'VAL', 54: 'ILE', 55: 'GLU', 56: 'GLN', 57: 'PHE', 58: 'GLU', 59: 'ASP', 60: 'LEU', 61: 'LEU', 62: 'VAL', 63: 'ARG', 64: 'ILE', 65: 'LEU', 66: 'LEU', 67: 'LEU', 68: 'ALA', 69: 'ALA', 70: 'CYS', 71: 'ILE', 72: 'SER', 73: 'PHE', 74: 'VAL', 75: 'LEU', 76: 'ALA', 77: 'TRP', 78: 'PHE', 79: 'GLU', 80: 'GLU', 81: 'GLY', 82: 'GLU', 83: 'GLU', 84: 'THR', 85: 'ILE', 86: 'THR', 87: 'ALA', 88: 'PHE', 89: 'VAL', 90: 'GLU', 91: 'PRO

In [15]:
coord, coord_mask, _ = get_coordinates(2312,residues,chain)
print(coord, coord_mask)
print(coord.shape, coord_mask.shape)

[[[64.75800323 54.92699814 44.1590004 ]
  [64.06900024 54.85200119 45.43399811]
  [63.40499878 56.16899872 45.80500031]
  ...
  [ 0.          0.          0.        ]
  [ 0.          0.          0.        ]
  [ 0.          0.          0.        ]]

 [[62.26100159 56.4640007  45.19400024]
  [61.55500031 57.68700027 45.54700089]
  [61.0870018  57.4620018  46.97800064]
  ...
  [ 0.          0.          0.        ]
  [ 0.          0.          0.        ]
  [ 0.          0.          0.        ]]

 [[60.83000183 58.54100037 47.70199966]
  [60.40499878 58.43999863 49.09500122]
  [61.60300064 58.16899872 50.        ]
  ...
  [ 0.          0.          0.        ]
  [ 0.          0.          0.        ]
  [ 0.          0.          0.        ]]

 ...

 [[ 0.          0.          0.        ]
  [ 0.          0.          0.        ]
  [ 0.          0.          0.        ]
  ...
  [ 0.          0.          0.        ]
  [ 0.          0.          0.        ]
  [ 0.          0.          0.        ]]

 [