In [4]:
import MDAnalysis
from MDAnalysis import analysis
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [5]:
### Number of peptides ####
##gromacs configuration file###
gro_file = 'fullsys.gro'
##compressed trajectory file###
trajectory_file = 'final.xtc'
u = MDAnalysis.Universe('%s'%gro_file, '%s'%trajectory_file)
##### Sim start and sim end time
start = 0 
end = 200

In [6]:
################################# INSERT INFO FOR PEPTIDE1 ##########################################
peptide1 = dict()
#### Single letter sequence #####
peptide1['sequence'] = 'FVQWFSKFLGRIL'
#### Peptide name #####
peptide1['peptide_name'] = 'Temporin L'
#### Number of peptides #####
peptide1['pepnum'] = 8
### DO NOT EDIT ###
peptide1['resnum'] = len(peptide1['sequence'])
peptide1['restot'] = peptide1['pepnum']*peptide1['resnum']
peptide1['starting_resid'] = 1

In [7]:
#Select peptides - Test with combinations of peptides with different sizes (should work though!!)
def select_peptides(peptide):
    peptide_selections = []
    for i in range(0,peptide['pepnum']):
        start = (i*peptide['resnum']) + peptide['starting_resid']
        end = (i+1)*peptide['resnum'] + (peptide['starting_resid']-1)
        selection = "resid %s"%start + ":%s"%end
        peptide_selections.append(selection)
    return peptide_selections
peptide_selections = select_peptides(peptide1)

In [8]:
peptide_selections

['resid 1:13',
 'resid 14:26',
 'resid 27:39',
 'resid 40:52',
 'resid 53:65',
 'resid 66:78',
 'resid 79:91',
 'resid 92:104']

In [9]:
# Use networkx to find connectivity using graph theory
import networkx 
from networkx.algorithms.components.connected import connected_components


def to_graph(l):
    G = networkx.Graph()
    for part in l:
        # each sublist is a bunch of nodes
        G.add_nodes_from(part)
        # it also imlies a number of edges:
        G.add_edges_from(to_edges(part))
    return G

def to_edges(l):
    """ 
        treat `l` as a Graph and returns it's edges 
        to_edges(['a','b','c','d']) -> [(a,b), (b,c),(c,d)]
    """
    it = iter(l)
    last = next(it)

    for current in it:
        yield last, current
        last = current    



In [1]:
def cluster_analysis(peptide_selections):
    import MDAnalysis.analysis.distances
    aggregate_list = []
    aggregate_final = []
    clust_max = []
    clust_num =[]
    for ts in u.trajectory[0::50]:
        #loop through one iterator
        aggregate_list = []

        for i in range(0,len(peptide_selections)):
            #loop through second iterator
            for j in range(i,len(peptide_selections)):
                if i != j:
                    #Selection and distasnce calculation
                    selection1 = u.select_atoms('%s'%peptide_selections[i]).positions
                    selection2 = u.select_atoms('%s'%peptide_selections[j]).positions
                    dist = MDAnalysis.analysis.distances.distance_array(selection1, selection2, 
                                                                        box=None, result=None, backend='serial')
                    min_dist = np.min(dist)
                    #Apply cut off of 6 angstrom - could this be stratified better?
                    if min_dist < 6:
                        aggregate_list.append([i+1,j+1])

        #Run graphing and connectivity analysis
        G = to_graph(aggregate_list)
        cluster = list((connected_components(G)))
        aggregate_final.append(cluster)
        #Calculate number of custers
        clust_num.append([u.trajectory.time,len(cluster)])
        #Calculate largest cluster
        #set as 0 to allow np.max later
        clust_size = [0]
        for clust in cluster:
            clust_size.append((len(list(clust))))
        clust_max.append([u.trajectory.time,np.max(clust_size)])
    return clust_max,clust_num,aggregate_final,aggregate_list

#Call function, return varibles
max_clust_size,number_of_clusters,cluster_ids,aggregate_list = cluster_analysis(peptide_selections)

NameError: name 'peptide_selections' is not defined

In [137]:
#Save for later analysis, possibly as a heatmap. 
np.savetxt("max_clust_size_%s.csv"%peptide1['peptide_name'], max_clust_size, delimiter=",")
np.savetxt("number_of_clusters_%s.csv"%peptide1['peptide_name'], number_of_clusters, delimiter=",")



In [11]:
cluster_ids

[[{4, 5}],
 [],
 [{2, 8}, {5, 6}],
 [{2, 8}, {5, 6}],
 [{2, 8}, {5, 6}],
 [{2, 8}, {4, 5, 6}],
 [{2, 8}, {4, 5, 6}],
 [{2, 8}, {4, 5, 6}],
 [{2, 8}, {4, 5, 6}],
 [{2, 8}, {4, 5, 6}],
 [{1, 7}, {2, 8}, {4, 5, 6}],
 [{1, 4, 5, 6, 7}, {2, 8}],
 [{1, 7}, {2, 8}, {4, 5, 6}],
 [{1, 7}, {2, 8}, {4, 5, 6}],
 [{1, 7}, {2, 8}, {4, 5, 6}],
 [{1, 7}, {2, 8}, {4, 5, 6}],
 [{1, 7}, {2, 8}, {4, 5, 6}],
 [{1, 7}, {2, 8}, {4, 5, 6}],
 [{1, 7}, {2, 8}, {4, 5, 6}],
 [{1, 7}, {2, 8}, {4, 5, 6}],
 [{1, 7}, {2, 8}, {4, 5, 6}],
 [{1, 7}, {2, 8}, {4, 5, 6}],
 [{1, 7}, {2, 8}, {4, 5, 6}],
 [{1, 7}, {2, 8}, {4, 5, 6}],
 [{1, 7}, {2, 8}, {4, 5, 6}],
 [{1, 7}, {2, 8}, {4, 5, 6}],
 [{1, 7}, {2, 8}, {4, 5, 6}],
 [{1, 7}, {2, 8}, {4, 5, 6}],
 [{1, 7}, {2, 8}, {4, 5, 6}],
 [{1, 7}, {2, 8}, {4, 5, 6}],
 [{1, 7}, {2, 8}, {4, 5, 6}],
 [{1, 7}, {2, 8}, {4, 5, 6}],
 [{1, 7}, {2, 8}, {4, 5, 6}],
 [{1, 7}, {2, 8}, {4, 5, 6}],
 [{1, 7}, {2, 8}, {4, 5, 6}],
 [{1, 7}, {2, 8}, {4, 5, 6}],
 [{1, 7}, {2, 8}, {4, 5, 6}],
 [{1, 7},

In [12]:
aggregate_list

[[1, 7], [2, 8], [4, 5], [4, 6], [5, 6]]