In [77]:
import os
os.environ["OMP_NUM_THREADS"] = "20"
print(os.environ['OMP_NUM_THREADS'])

from __future__ import print_function
%matplotlib inline
import mdtraj as md
import numpy as np
import matplotlib.pyplot as plt
import scipy.cluster.hierarchy
from scipy.spatial.distance import squareform
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots 
import nglview as nv


20


# Functions

In [78]:
#def make_frames_comparison(traj, clusters_dict, markov_nframes):
#    contact_bycluster_dict = {}
#
#
#    for cluster, frames in clusters_dict.items():
#        markov_frames = frames[frames <= markov_nframes]
#        me_frames = frames[frames > markov_nframes]
#
#        markov_contact_map = md.compute_contacts(traj[markov_frames], scheme='closest')
#
#        markov_pairs_count = pd.DataFrame(columns=['markov_ai', 'markov_aj', 'markov_average_distance', 'markov_probability'])
#        markov_pairs_count['markov_ai'] = markov_contact_map[1][:,0]
#        markov_pairs_count['markov_aj'] = markov_contact_map[1][:,1]
#        markov_pairs_count['markov_average_distance'] = (markov_contact_map[0] < 0.55).mean(axis=0)
#        markov_pairs_count['markov_probability'] = ((markov_contact_map[0] < 0.55).sum(axis=0)/len(markov_frames)) # Axis 0 sulle colonne (ho 780 elementi = al numero dei pairs)
#        markov_pairs_count = markov_pairs_count[markov_pairs_count.markov_average_distance != 0]
#
#        me_contact_map = md.compute_contacts(traj[me_frames], scheme='closest')
#        me_pairs_count = pd.DataFrame(columns=['me_ai', 'me_aj', 'me_average_distance', 'me_probability'])
#        me_pairs_count['me_ai'] = me_contact_map[1][:,0]
#        me_pairs_count['me_aj'] = me_contact_map[1][:,1]
#        me_pairs_count['me_average_distance'] = (me_contact_map[0] < 0.55).mean(axis=0)
#        me_pairs_count['me_probability'] = ((me_contact_map[0] < 0.55).sum(axis=0)/len(me_frames)) # Axis 0 sulle colonne (ho 780 elementi = al numero dei pairs)
#        me_pairs_count = me_pairs_count[me_pairs_count.me_average_distance != 0]
#
#        pairs_count = pd.concat([markov_pairs_count, me_pairs_count], axis=1)
#        
#        if (pairs_count['markov_ai'] == pairs_count['me_ai']) and (pairs_count['markov_aj'] == pairs_count['me_aj']):
#            pairs_count = pairs_count.rename(columns={'markov_ai':'ai', 'markov_aj':'aj'})
#            pairs_count.drop(columns=['me_ai', 'me_aj'], inplace=True)
#
#        contact_bycluster_dict[cluster] = pairs_count
#
#
#    return contact_bycluster_dict

In [79]:
#rmsd_matrix = np.empty((traj.n_frames, traj.n_frames))
#for i in range(traj.n_frames):
#    rmsd_matrix[i] = md.rmsd(traj, traj, i, atom_indices=backbone)
#print('Max pairwise rmsd: %f nm' % np.max(rmsd_matrix))

In [80]:
def clustering(rmsd_matrix, rmsd_cutoff:float, frame_list):
    clust_counter = 1
    frame_clust_dict = {}

    while rmsd_matrix.size != 0:
        #treshold = (rmsd_matrix < rmsd_cutoff).sum(axis=0)
        #print(f'Cluster #{clust_counter} has {treshold.max()} similar structures')
        max_clust = np.where((rmsd_matrix < rmsd_cutoff).sum(axis=0)==(rmsd_matrix < rmsd_cutoff).sum(axis=0).max())[0][0]
        #print(f'index:{max_clust}')

        frames_to_keep = frame_list[rmsd_matrix[max_clust] < rmsd_cutoff]
        indices_to_keep = np.where(rmsd_matrix[max_clust] < rmsd_cutoff)

        frame_clust_dict[clust_counter] = frames_to_keep
        
        rmsd_matrix = np.delete(rmsd_matrix, indices_to_keep, axis=0)
        rmsd_matrix = np.delete(rmsd_matrix, indices_to_keep, axis=1)
        frame_list = np.delete(frame_list, indices_to_keep)

        clust_counter += 1


    return frame_clust_dict


In [81]:
def make_frames_comparison(traj, clusters_dict, markov_nframes, me_traj_nframes, full_traj_nframes):
    frames_comparison_dict = {}

    for cluster, frames in clusters_dict.items():
        markov_frames = frames[frames <= markov_nframes]
        me_frames = frames[frames > markov_nframes]

        #markov_trace = go.Histogram(y=[(len(markov_frames)/markov_nframes)*full_traj_nframes])
        #me_trace = go.Histogram(y=[(len(me_frames)/me_traj_nframes)*full_traj_nframes])
        #frames_comparison_dict[cluster] = (markov_trace, me_trace)
        frames_comparison_dict[cluster] = ((len(markov_frames)/markov_nframes)*full_traj_nframes, (len(me_frames)/me_traj_nframes)*full_traj_nframes)

    return frames_comparison_dict

In [82]:
def clustering_wrapper(multiego_structure:str, multiego_trajectory:str, cutoff:float):
    markov_traj = md.load('/home/emanuele/ABeta/markov/mdtraj_red_noh.xtc', top='/home/emanuele/ABeta/markov/mdtraj_reduced-noh.gro')
    me_traj = md.load(multiego_trajectory, top=multiego_structure)



    #backbone_markov_traj = markov_traj.atom_slice(markov_traj.topology.select('backbone'))
    #backbone_me_traj = me_traj.atom_slice(me_traj.topology.select('backbone'))
    #full_traj = md.join([backbone_markov_traj, backbone_me_traj], check_topology=True, discard_overlapping_frames=False)



    full_traj = md.join([markov_traj, me_traj], check_topology=True, discard_overlapping_frames=False)

    full_rmsd_traj = full_traj.atom_slice(full_traj.topology.select('backbone'))

    #markov_nframes = backbone_markov_traj.n_frames
    #me_traj_nframes = backbone_me_traj.n_frames
    #full_traj_nframes = full_traj.n_frames

    markov_nframes = markov_traj.n_frames
    me_traj_nframes = me_traj.n_frames
    full_traj_nframes = full_traj.n_frames

    frame_list = np.arange(0, full_traj_nframes)

    print('Making the RMSD matrix')

    rmsd_matrix = np.empty((full_traj_nframes, full_traj_nframes))
    for i in range(full_traj_nframes):
        #rmsd_matrix[i] = md.rmsd(full_traj, full_traj, i)
        rmsd_matrix[i] = md.rmsd(full_rmsd_traj, full_rmsd_traj, i)#, atom_indices=full_trajnframes.topology.select('backbone'))

    print('Performing clustering')
    clusters_dict = clustering(rmsd_matrix, cutoff, frame_list)



    print('Saving the trajectories')
    clust_size_dict, clust_trajectories = {}, {}
    for clust, frames in clusters_dict.items():
        clust_size_dict[clust] = len(frames)
        clust_trajectories[clust] = full_traj[frames]
        traj_tosave = full_traj[frames]
        traj_tosave.save(f'clusters_trajectories/clust{clust}.xtc')
   
    frame_percentage(markov_nframes, me_traj_nframes, full_traj_nframes, clust_size_dict)

    frames_comparison_dict = make_frames_comparison(full_traj, clusters_dict, markov_nframes, me_traj_nframes, full_traj_nframes)
    #print(frames_comparison_dict)
    return frames_comparison_dict, clust_size_dict, clust_trajectories


# Plots

In [83]:
def frame_percentage(markov_nframes, me_traj_nframes, full_traj_nframes, clust_size_dict):
    print(f'Markov frames = {markov_nframes}')
    print(f'Multi-eGO frames = {me_traj_nframes}')
    print(f'Merged frames = {full_traj_nframes}')

    print(f'The 60% of frames are {(60*full_traj_nframes)/100}')
    print(f'The 70% of frames are {(70*full_traj_nframes)/100}')
    print(f'The 80% of frames are {(80*full_traj_nframes)/100}')
    print(f'The 90% of frames are {(90*full_traj_nframes)/100}')

    first_10frames = sum({value for key, value in clust_size_dict.items() if key<11})
    first_15frames = sum({value for key, value in clust_size_dict.items() if key<16})

    print(f'The first 10 clusters includes {first_10frames} as the {np.around((first_10frames*100)/full_traj_nframes, decimals=2)}% of the total')
    print(f'The first 15 clusters includes {first_15frames} as the {np.around((first_15frames*100)/full_traj_nframes, decimals=2)}% of the total')


In [84]:
greta_dir = '/home/emanuele/ABeta/greta_co5.5_ex2/epsilon_310/monomer_test/native_278K'
greta_310_structure = f'{greta_dir}/box_abeta_greta.gro'
greta_310_trajectory =f'{greta_dir}/md_traj_red.xtc'

%cd {greta_dir}
%mkdir {'clusters_trajectories'}

greta310_probability_dict, greta310_clustsize, clust_trajectories_dict = clustering_wrapper(greta_310_structure, greta_310_trajectory, 0.78)


/home/emanuele/ABeta/greta_co5.5_ex2/epsilon_310/monomer_test/native_278K
mkdir: cannot create directory ‘clusters_trajectories’: File exists
Making the RMSD matrix
Performing clustering
Saving the trajectories
Markov frames = 3136
Multi-eGO frames = 3382
Merged frames = 6518
The 60% of frames are 3910.8
The 70% of frames are 4562.6
The 80% of frames are 5214.4
The 90% of frames are 5866.2
The first 10 clusters includes 5139 as the 78.84% of the total
The first 15 clusters includes 5434 as the 83.37% of the total


In [104]:
fig = go.Figure()

nclust = 11
nclust_list = list(range(1, nclust))

markov_nclust_norm, me_nclust_norm = [], []
for n in range(1, nclust):
    markov_nclust_norm.append(greta310_probability_dict[n][0])
    me_nclust_norm.append(greta310_probability_dict[n][1])

fig.add_trace(go.Bar(x=nclust_list, y=markov_nclust_norm, name='Markov Frames'))
fig.add_trace(go.Bar(x=nclust_list, y=me_nclust_norm, name='Multi-eGO Frames'))
fig.update_layout(height=800, width=800, plot_bgcolor='white')
fig

In [86]:
w = nv.show_mdtraj(clust_trajectories_dict[1].superpose(clust_trajectories_dict[1]))

In [87]:
w

NGLWidget(max_frame=2224)

In [105]:
w = nv.show_mdtraj(clust_trajectories_dict[10].superpose(clust_trajectories_dict[10]))

In [106]:
w

NGLWidget(max_frame=105)

In [108]:
pip install dash

Collecting dash
  Downloading dash-2.0.0-py3-none-any.whl (7.3 MB)
     |████████████████████████████████| 7.3 MB 13.0 MB/s            
[?25hCollecting dash-table==5.0.0
  Downloading dash_table-5.0.0.tar.gz (3.4 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting dash-html-components==2.0.0
  Downloading dash_html_components-2.0.0.tar.gz (3.8 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting dash-core-components==2.0.0
  Downloading dash_core_components-2.0.0.tar.gz (3.4 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting flask-compress
  Downloading Flask_Compress-1.10.1-py3-none-any.whl (7.9 kB)
Collecting brotli
  Downloading Brotli-1.0.9-cp38-cp38-manylinux1_x86_64.whl (357 kB)
     |████████████████████████████████| 357 kB 64.1 MB/s            
Building wheels for collected packages: dash-core-components, dash-html-components, dash-table
  Building wheel for dash-core-components (setup.py) ... [?25ldone
[?25h  Created wheel for dash-

In [109]:
import dash
import dash_bio as dashbio
from dash import dcc, html
from dash.dependencies import Input, Output
from dash.exceptions import PreventUpdate
import dash_bio_utils.ngl_parser as ngl_parser

app = dash.Dash(__name__)

data_path = "https://raw.githubusercontent.com/plotly/datasets/master/Dash_Bio/Molecular/"


dropdown_options = [
    {"label": "1BNA", "value": "1BNA"},
    {"label": "MPRO", "value": "MPRO"},
    {"label": "PLPR", "value": "PLPR"},
    {"label": "5L73", "value": "5L73"},
    {"label": "NSP2", "value": "NSP2"}
]

app.layout = html.Div([
    dcc.Markdown('''
    ### NglMoleculeViewer Controls

    * Rotate Stage: Left-click on the viewer and move the mouse to rotate the stage.
    * Zoom: Use the mouse scroll-wheel to zoom in and out of the viewer.
    * Pan: Right click on the viewer to pan the stage.
    * Individual Molecule Interaction: Left click on the molecule to interact with, then hold the
    `CTRL` key and use right and left click mouse buttons to rotate and pan individual molecules.
    '''),
    dcc.Dropdown(
        id="default-ngl-molecule-dropdown",
        options=dropdown_options,
        placeholder="Select a molecule",
        value="1BNA"
    ),
    dashbio.NglMoleculeViewer(id="default-ngl-molecule"),
])

@app.callback(
    Output("default-ngl-molecule", 'data'),
    Output("default-ngl-molecule", "molStyles"),
    Input("default-ngl-molecule-dropdown", "value")
)
def return_molecule(value):

    if (value is None):
        raise PreventUpdate

    molstyles_dict = {
        "representations": ["cartoon", "axes+box"],
        "chosenAtomsColor": "white",
        "chosenAtomsRadius": 1,
        "molSpacingXaxis": 100,
    }

    data_list = [ngl_parser.get_data(data_path=data_path, pdb_id=value, color='red',reset_view=True, local=False)]

    return data_list, molstyles_dict

if __name__ == '__main__':
    app.run_server(debug=True)

ModuleNotFoundError: No module named 'dash_bio'