## Is there a relation between nucleic acid conformation and stacking with the protein?
- Which features of the nucleic acid conformation?
- How do we define stacking?

In [1]:
from seamless.highlevel import Context, Cell, Transformer, Module
ctx = Context()

In [None]:
# HACK: X3DNA computation is non-reproducible. Never forget a result!
from seamless.core.cache.buffer_cache import buffer_cache
buffer_cache.LIFETIME_TEMP = 999999
buffer_cache.LIFETIME_TEMP_SMALL = 999999

**Select PDB**

In [2]:
ctx.pdb_code = "1B7F"
ctx.na_chain = "P"
ctx.protein_chain = "A"
ctx.na_resid = 5
ctx.protein_resid = 256

**Visualize PDB**

In [3]:
import nglview
widget = nglview.NGLWidget()

ngl_args = {}
def show_ngl(*args, **kwargs):
    if len(args):
        variable = args[0]["owner"].path[-1]
        value = args[0]["new"]
        ngl_args[variable] = value
    if len(ngl_args) < 5:
        return
    pdb_code = ngl_args["pdb_code"]
    na_chain = ngl_args["na_chain"]
    protein_chain = ngl_args["protein_chain"]
    na_resid = ngl_args["na_resid"]
    protein_resid = ngl_args["protein_resid"]

    widget.clear()
    widget.add_component("rcsb://" + pdb_code)
    selection='({0} and :{1}) or ({2} and :{3})'.format(na_resid, na_chain, protein_resid, protein_chain)
    widget.add_representation('ball+stick', selection=selection, color='blue')
    widget.center(selection)

display(widget)

ctx.pdb_code.traitlet().observe(show_ngl)
ctx.na_chain.traitlet().observe(show_ngl)
ctx.na_resid.traitlet().observe(show_ngl)
ctx.protein_chain.traitlet().observe(show_ngl)
ctx.protein_resid.traitlet().observe(show_ngl)
await ctx.computation()



NGLWidget()

In [4]:
ctx.na_resid = 6

In [5]:
ctx.na_resid = 5

**Download PDB**

In [6]:
def download_pdb(pdb_code):
    import urllib
    pdb_data = urllib.request.urlopen("https://files.rcsb.org/download/{}.pdb".format(pdb_code)).read().decode()
    return pdb_data

ctx.download_pdb = download_pdb
ctx.download_pdb.pdb_code = ctx.pdb_code
ctx.pdb_data = ctx.download_pdb

**Execute X3DNA-DSSR**

In [7]:
ctx.execute_x3dna = Transformer()
ctx.execute_x3dna.language = "docker"
ctx.execute_x3dna.docker_image = "x3dna"
ctx.execute_x3dna.code = "x3dna-dssr -i=pdb_data --json -o=RESULT"
ctx.execute_x3dna.pdb_data = ctx.pdb_data
ctx.x3dna_analysis = ctx.execute_x3dna

await ctx.computation()
ctx.x3dna_analysis.output()

Waiting for: Seamless transformer: .execute_x3dna.tf
Waiting for: Seamless transformer: .execute_x3dna.tf
Waiting for: Seamless transformer: .execute_x3dna.tf
Waiting for: Seamless transformer: .execute_x3dna.tf


Output(outputs=({'output_type': 'display_data', 'data': {'text/plain': '{\n  "atom2bases": [\n    {\n      "at…

In [8]:
print(ctx.execute_x3dna.logs)

[i] JSON output should end with the .json extension

Processing file 'pdb_data'
    total number of nucleotides: 24
    total number of amino acids: 334
    total number of base pairs: 2
    total number of atom-base capping interactions: 2
    total number of splayed-apart dinucleotides: 12
                        consolidated into units: 8
    total number of non-loop single-stranded segments: 2

Time used: 00:00:00:00




**Select chain and show X3DNA results**

In [9]:
def get_x3dna_nucleotides(x3dna_analysis, na_chain):
    return [nt for nt in x3dna_analysis["nts"] if nt["chain_name"] == na_chain]
ctx.get_x3dna_nucleotides = get_x3dna_nucleotides
ctx.get_x3dna_nucleotides.x3dna_analysis = ctx.x3dna_analysis
ctx.get_x3dna_nucleotides.na_chain = ctx.na_chain

ctx.x3dna_nucleotides = ctx.get_x3dna_nucleotides
ctx.x3dna_nucleotides.celltype = "plain"
await ctx.computation()
ctx.x3dna_nucleotides.value[0]

{'C5prime_xyz': [-5.973, 4.829, 0.272],
 'Dp': 2.834,
 'P_xyz': [-6.118, 3.551, 2.579],
 'alpha': None,
 'amplitude': 41.84,
 'bb_type': '--',
 'beta': -92.219,
 'bin': 'inc',
 'chain_name': 'P',
 'chi': 37.927,
 'cluster': '__',
 'dbn': '.',
 'delta': 127.686,
 'epsilon': -167.738,
 'epsilon_zeta': 122.17,
 'eta': None,
 'eta_base': None,
 'eta_prime': None,
 'filter_rmsd': 0.014,
 'form': '.',
 'frame': {'origin': [40.637, 51.477, 117.66],
  'quaternion': [0.735, -0.405, 0.09, -0.536],
  'rmsd': 0.014,
  'x_axis': [0.41, -0.861, 0.301],
  'y_axis': [0.715, 0.097, -0.693],
  'z_axis': [0.567, 0.499, 0.655]},
 'gamma': -161.658,
 'glyco_bond': 'syn',
 'index': 1,
 'index_chain': 1,
 'nt_code': 'G',
 'nt_id': 'P.G1',
 'nt_name': 'G',
 'nt_resnum': 1,
 'nt_type': 'RNA',
 'phase_angle': 130.258,
 'puckering': "C1'-exo",
 'splay_angle': 104.33,
 'splay_distance': 15.063,
 'splay_ratio': 0.79,
 'ssZp': -2.512,
 'sugar_class': "~C2'-endo",
 'suiteness': 0.0,
 'summary': "syn,~C2'-endo,non-st

In [10]:
def get_df_x3dna(x3dna_nucleotides):
    import pandas as pd
    df_x3dna = pd.DataFrame(x3dna_nucleotides)
    return df_x3dna.to_html()

ctx.get_df_x3dna = get_df_x3dna
ctx.get_df_x3dna.x3dna_nucleotides = ctx.x3dna_nucleotides
ctx.get_df_x3dna.pins.x3dna_nucleotides.celltype = "plain"

ctx.df_x3dna = ctx.get_df_x3dna
await ctx.translation()
ctx.df_x3dna.mimetype = "text/html"
await ctx.translation()
ctx.df_x3dna.output()

Output()

**Parse PDB into structured Numpy array using parse_pdb.py**

In [11]:
ctx.parse_pdb = Module()
ctx.parse_pdb.mount("parse_pdb.py")

def get_parsed_pdb(pdb_data):
    parsed_pdb = parse_pdb.parse_pdb(pdb_data)
    return parsed_pdb

ctx.get_parsed_pdb = get_parsed_pdb
ctx.get_parsed_pdb.parse_pdb = ctx.parse_pdb
ctx.get_parsed_pdb.pdb_data = ctx.pdb_data
ctx.parsed_pdb = ctx.get_parsed_pdb
ctx.parsed_pdb.celltype = "binary"

await ctx.computation()

In [12]:
# Does not update in response to changes in ctx.parsed_pdb
ctx.parsed_pdb.value[:2]

array([(1, b' ', b'OP3', b' ', b'  G', b'P', 1, b' ', 1, 43.063, 59.607, 115.478, 1., 68.76, b'    ', b'O'),
       (1, b' ', b'P', b' ', b'  G', b'P', 2, b' ', 1, 42.131, 58.379, 115.046, 1., 68.5 , b'    ', b'P')],
      dtype={'names':['model','hetero','name','altloc','resname','chain','index','icode','resid','x','y','z','occupancy','bfactor','segid','element'], 'formats':['<u2','S1','S4','S1','S3','S1','<u4','S1','<u2','<f4','<f4','<f4','<f4','<f4','S4','S2'], 'offsets':[0,2,3,7,8,11,12,16,18,20,24,28,32,36,40,44], 'itemsize':48, 'aligned':True})

In [13]:
# Does not update in response to changes in ctx.parsed_pdb
import parse_pdb
parse_pdb.print_atom(ctx.parsed_pdb.value[:2])

[{'model': 1,
  'hetero': b' ',
  'name': b'OP3',
  'altloc': b' ',
  'resname': b'  G',
  'chain': b'P',
  'index': 1,
  'icode': b' ',
  'resid': 1,
  'x': 43.063,
  'y': 59.607,
  'z': 115.478,
  'occupancy': 1.0,
  'bfactor': 68.76,
  'segid': b'    ',
  'element': b'O'},
 {'model': 1,
  'hetero': b' ',
  'name': b'P',
  'altloc': b' ',
  'resname': b'  G',
  'chain': b'P',
  'index': 2,
  'icode': b' ',
  'resid': 1,
  'x': 42.131,
  'y': 58.379,
  'z': 115.046,
  'occupancy': 1.0,
  'bfactor': 68.5,
  'segid': b'    ',
  'element': b'P'}]

In [14]:
def get_df_pdb(parsed_pdb):
    import numpy as np
    import pandas as pd
    df_pdb = pd.DataFrame(parsed_pdb)
    for col, dtype in df_pdb.dtypes.items():
        if dtype == np.object:  # Only process byte object columns.
            df_pdb[col] = df_pdb[col].apply(lambda x: x.decode("utf-8"))
    return df_pdb.to_html()

ctx.get_df_pdb = get_df_pdb
ctx.get_df_pdb.parsed_pdb = ctx.parsed_pdb
ctx.get_df_pdb.pins.parsed_pdb.celltype = "binary"
ctx.df_pdb = ctx.get_df_pdb

await ctx.translation()
ctx.df_pdb.mimetype = "text/html"

await ctx.translation()
ctx.df_pdb.output()

Output()

**Select protein and nucleic acid chain from parsed PDB**

In [15]:
def select_chains(parsed_pdb, protein_chain, na_chain):
    protein_atoms = parsed_pdb[parsed_pdb["chain"]==protein_chain.encode()]
    na_atoms = parsed_pdb[parsed_pdb["chain"]==na_chain.encode()]
    print(len(protein_atoms), len(na_atoms))
    return {
        "protein_atoms": protein_atoms,
        "na_atoms": na_atoms
    }
ctx.select_chains = select_chains
ctx.select_chains.parsed_pdb = ctx.parsed_pdb
ctx.select_chains.pins.parsed_pdb.celltype = "binary"
ctx.select_chains.protein_chain = ctx.protein_chain
ctx.select_chains.na_chain = ctx.na_chain
ctx.selected_chains = ctx.select_chains
ctx.protein_atoms = ctx.selected_chains.protein_atoms
ctx.na_atoms = ctx.selected_chains.na_atoms
await ctx.translation()

In [16]:
await ctx.computation()
print(ctx.select_chains.logs)

Waiting for: Seamless mixed cell: .CONNECTION_9
1365 254



**Define code to calculate stacking properties**

In [17]:
def calculate_stacking_properties(protein_atoms, protein_resid, na_atoms, na_resid):
    import numpy as np
    import scipy.spatial.distance
    res_protein = protein_atoms[protein_atoms["resid"]==protein_resid]
    assert len(res_protein)
    aa = res_protein[0]["resname"].decode().strip()
    res_na = na_atoms[na_atoms["resid"]==na_resid]
    assert len(res_na)
    nuc = res_na[0]["resname"].decode().strip()[-1] # one-letter
    coor_res_protein = np.stack((res_protein["x"], res_protein["y"], res_protein["z"])).T
    coor_res_na = np.stack((res_na["x"], res_na["y"], res_na["z"])).T

    result = {}
    dist = scipy.spatial.distance.cdist(coor_res_protein, coor_res_na)
    result["closest_distance"] = dist.min()

    sidechains = {
        "PHE": ['CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ']
    }
    sidechain_mask = np.isin(res_protein["name"], [name.encode() for name in sidechains[aa]])
    bases = {
        "U": ['C2', 'C4', 'C5', 'C6', 'N1', 'N3']
    }
    base_mask = np.isin(res_na["name"], [name.encode() for name in bases[nuc]])
    stacking_dist = dist[sidechain_mask][:,base_mask]
    result["mean_stacking_dist"] = stacking_dist.mean()
    result["std_stacking_dist"] = stacking_dist.std()

    return result

def calculate_all_properties(protein_atoms, protein_resid, na_atoms, na_resid, x3dna_nucleotides):
    stacking_properties = calculate_stacking_properties(protein_atoms, protein_resid, na_atoms, na_resid)
    x3dna_nucl = [nucl for nucl in x3dna_nucleotides if nucl["nt_resnum"] == na_resid]
    assert len(x3dna_nucl) == 1
    nucl_props = ["gamma", "delta", "chi"]
    result = {}
    for prop in nucl_props:
        result[prop] = x3dna_nucl[0][prop]
    result.update(stacking_properties)
    return result

ctx.calc_properties = Module()
import inspect
src = inspect.getsource(calculate_stacking_properties) \
+ "\n" \
+ inspect.getsource(calculate_all_properties)
ctx.calc_properties.code = src
await ctx.translation()

**Define code to integrate all properties**
- Stacking properties are computed using the code above
- A list of other properties is extracted from the X3DNA analysis

In [18]:
def get_all_properties(protein_atoms, protein_resid, na_atoms, na_resid, x3dna_nucleotides):
    return calc_properties.calculate_all_properties(
        protein_atoms, protein_resid, na_atoms, na_resid, x3dna_nucleotides
    )
ctx.get_all_properties = get_all_properties
ctx.get_all_properties.calc_properties = ctx.calc_properties
ctx.get_all_properties.protein_atoms = ctx.protein_atoms
ctx.get_all_properties.pins.protein_atoms.celltype = "binary"
ctx.get_all_properties.protein_resid = ctx.protein_resid
ctx.get_all_properties.na_atoms = ctx.na_atoms
ctx.get_all_properties.pins.na_atoms.celltype = "binary"
ctx.get_all_properties.na_resid = ctx.na_resid
ctx.get_all_properties.x3dna_nucleotides = ctx.x3dna_nucleotides
ctx.get_all_properties.pins.x3dna_nucleotides.celltype = "plain"
ctx.all_properties = ctx.get_all_properties
ctx.all_properties.celltype = "plain"
await ctx.translation()
ctx.all_properties.output()

Output()

**Calculate properties for all residue-nucleotide pair**

Instead of using the pre-selected residue and nucleotide, iterate over all

In [19]:
def get_stackings(protein_atoms, na_atoms, x3dna_nucleotides):
    import numpy as np
    print
    from .calc_properties import calculate_all_properties
    all_protein_resids = np.unique(protein_atoms["resid"])
    all_na_resids = np.unique(na_atoms["resid"])
    stackings = []
    for curr_na_resid in all_na_resids:
        for curr_protein_resid in all_protein_resids:
            try:
                properties = calculate_all_properties(
                    protein_atoms, curr_protein_resid,
                    na_atoms, curr_na_resid,
                    x3dna_nucleotides
                )
            except (KeyError, AssertionError):
                continue
            properties["na_resid"] = int(curr_na_resid)
            properties["protein_resid"] = int(curr_protein_resid)
            stackings.append(properties)
    return stackings

ctx.get_stackings = get_stackings
ctx.get_stackings.calc_properties = ctx.calc_properties
ctx.get_stackings.protein_atoms = ctx.protein_atoms
ctx.get_stackings.pins.protein_atoms.celltype = "binary"
ctx.get_stackings.na_atoms = ctx.na_atoms
ctx.get_stackings.pins.na_atoms.celltype = "binary"
ctx.get_stackings.x3dna_nucleotides = ctx.x3dna_nucleotides
ctx.get_stackings.pins.x3dna_nucleotides.celltype = "plain"
ctx.stackings = ctx.get_stackings
ctx.stackings.celltype = "plain"

def get_df_stackings(stackings):
    import pandas as pd
    df_stackings = pd.DataFrame(stackings)
    return df_stackings.to_html()

ctx.get_df_stackings = get_df_stackings
ctx.get_df_stackings.stackings = ctx.stackings
ctx.get_df_stackings.pins.stackings.celltype = "plain"
ctx.df_stackings = ctx.get_df_stackings
await ctx.translation()
ctx.df_stackings.mimetype = "text/html"
await ctx.translation()
ctx.df_stackings.output()

Output()

**Plot a nucleotide conformation property versus a stacking property**

In [20]:
def get_plot(stackings):
    from matplotlib import pyplot as plt
    import mpld3
    fig, ax = plt.subplots()
    ax.scatter(
        [stacking["chi"] for stacking in stackings],
        [stacking["closest_distance"] for stacking in stackings],
    )
    ax.set_xlabel('Chi')
    ax.set_ylabel('Closest distance')
    return mpld3.fig_to_html(fig)

ctx.get_plot = get_plot
ctx.get_plot.stackings = ctx.stackings
ctx.get_plot.pins.stackings.celltype = "plain"
ctx.plot = ctx.get_plot
await ctx.translation()
ctx.plot.mimetype = "text/html"
await ctx.translation()
ctx.plot.output()

Output()

**Wait for the computation to finish**

**Save Seamless graph in two files**
- topology (.seamless, JSON format)
- values (.zip)

In [21]:
await ctx.computation()
ctx.save_graph("initial-port.seamless")
ctx.save_zip("initial-port.zip")