# Explore MMCif Bioassembly

In [84]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [85]:
import os
import os.path as op
import tempfile
import urllib.request

import numpy as np

import kmbio

In [139]:
relevant_keys = [
    #
    '_struct_asym.id',  # corresponds to 'label_asym_id', i.e. mmCIF chain naming
    '_struct_asym.entity_id',
#     '_atom_site.auth_asym_id',
#     '_atom_site.label_asym_id',
    #
    '_pdbx_struct_assembly.id',
    '_pdbx_struct_assembly.details',
    '_pdbx_struct_assembly.method_details',
    #
    '_pdbx_struct_assembly_gen.assembly_id',  # biological assembly id (1 to multiple)
    '_pdbx_struct_assembly_gen.oper_expression',  # specifies operation from PDBX_STRUCT_OPER_LIST
    '_pdbx_struct_assembly_gen.asym_id_list',  # pointer to _struct_asym.id
    #
    '_pdbx_struct_assembly_prop.biol_id',
    '_pdbx_struct_assembly_prop.type',
    '_pdbx_struct_assembly_prop.value',
    '_pdbx_struct_assembly_prop.details',
    #
    '_pdbx_struct_oper_list.id',
    '_pdbx_struct_oper_list.type',
    '_pdbx_struct_oper_list.name',
    '_pdbx_struct_oper_list.symmetry_operation',
    '_pdbx_struct_oper_list.matrix[1][1]',
    '_pdbx_struct_oper_list.matrix[1][2]',
    '_pdbx_struct_oper_list.matrix[1][3]',
    '_pdbx_struct_oper_list.vector[1]',
    '_pdbx_struct_oper_list.matrix[2][1]',
    '_pdbx_struct_oper_list.matrix[2][2]',
    '_pdbx_struct_oper_list.matrix[2][3]',
    '_pdbx_struct_oper_list.vector[2]',
    '_pdbx_struct_oper_list.matrix[3][1]',
    '_pdbx_struct_oper_list.matrix[3][2]',
    '_pdbx_struct_oper_list.matrix[3][3]',
    '_pdbx_struct_oper_list.vector[3]',
]

In [140]:
[c for c in sdict if c.startswith('_struct_asym')]

['_struct_asym.id',
 '_struct_asym.pdbx_blank_PDB_chainid_flag',
 '_struct_asym.pdbx_modified',
 '_struct_asym.entity_id',
 '_struct_asym.details']

In [141]:
pdb_id = '2ou1'

urllib.request.urlretrieve('http://files.rcsb.org/download/{}.cif'.format(pdb_id), '{}.cif'.format(pdb_id))

('2ou1.cif', <http.client.HTTPMessage at 0x7f3d4c66d518>)

In [142]:
sdict = kmbio.PDB.MMCIF2Dict('{}.cif'.format(pdb_id))

In [143]:
for key in relevant_keys:
    print(key, sdict.get(key, 'ERROR!!!'))

_struct_asym.id ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X']
_struct_asym.entity_id ['1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2']
_pdbx_struct_assembly.id ['1', '2', '3', '4', '5', '6', '7', '8', '9']
_pdbx_struct_assembly.details ['author_and_software_defined_assembly', 'author_and_software_defined_assembly', 'author_and_software_defined_assembly', 'author_and_software_defined_assembly', 'author_and_software_defined_assembly', 'author_and_software_defined_assembly', 'software_defined_assembly', 'software_defined_assembly', 'software_defined_assembly']
_pdbx_struct_assembly.method_details ['PISA', 'PISA', 'PISA', 'PISA', 'PISA', 'PISA', 'PISA', 'PISA', 'PISA']
_pdbx_struct_assembly_gen.assembly_id ['1', '2', '3', '4', '5', '6', '7', '8', '9']
_pdbx_struct_assembly_gen.oper_expression ['1', '1', '1', '1', '1', '1', '1', '1', '1']
_pdbx_st

In [90]:
def sdict_to_bioassembly(sdict):
    bioassembly_data = dict()
    for i, bioassembly_id in enumerate(sdict['_pdbx_struct_oper_list.id']):
        bioassembly_id = int(bioassembly_id)
        bioassembly_data[bioassembly_id] = {
            'type': sdict['_pdbx_struct_oper_list.type'][i],
            'name': sdict['_pdbx_struct_oper_list.name'][i],
            'symmetry_operation': sdict['_pdbx_struct_oper_list.symmetry_operation'][i],
            'rotation': np.array([
                [float(sdict['_pdbx_struct_oper_list.matrix[1][1]'][i]),
                 float(sdict['_pdbx_struct_oper_list.matrix[1][2]'][i]),
                 float(sdict['_pdbx_struct_oper_list.matrix[1][3]'][i])],
                [float(sdict['_pdbx_struct_oper_list.matrix[2][1]'][i]),
                 float(sdict['_pdbx_struct_oper_list.matrix[2][2]'][i]),
                 float(sdict['_pdbx_struct_oper_list.matrix[2][3]'][i])],
                [float(sdict['_pdbx_struct_oper_list.matrix[3][1]'][i]),
                 float(sdict['_pdbx_struct_oper_list.matrix[3][2]'][i]),
                 float(sdict['_pdbx_struct_oper_list.matrix[3][3]'][i])]
            ]),
            'translation': np.array([
                float(sdict['_pdbx_struct_oper_list.vector[1]'][i]),
                float(sdict['_pdbx_struct_oper_list.vector[2]'][i]),
                float(sdict['_pdbx_struct_oper_list.vector[3]'][i]),
            ])
        }
    return bioassembly_data

In [91]:
sdict_to_bioassembly(sdict)

{1: {'name': '1_555', 'rotation': array([[ 1.,  0.,  0.],
         [ 0.,  1.,  0.],
         [ 0.,  0.,  1.]]), 'symmetry_operation': 'x,y,z', 'translation': array([ 0.,  0.,  0.]), 'type': 'identity operation'},
 2: {'name': '2_455', 'rotation': array([[-1.,  0.,  0.],
         [ 0.,  1.,  0.],
         [ 0.,  0., -1.]]), 'symmetry_operation': '-x-1,y,-z', 'translation': array([-70.882,   0.   ,   0.   ]), 'type': 'crystal symmetry operation'}}

In [92]:
sdict_to_bioassembly(sdict)[1]['rotation']

array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])

In [93]:
structure = kmbio.PDB.MMCIFParser().get_structure('4dkl', '4DKL.cif')

TypeError: Can't instantiate abstract class MMCIFParser with abstract methods get_biological_assembly

In [None]:
structure.transform(
    sdict_to_bioassembly(sdict)[1]['rotation'],
    sdict_to_bioassembly(sdict)[1]['translation'],
)