In [1]:
from protein_configuration import distance_residue, distance_cutoff
import MDAnalysis
from MDAnalysis.analysis import distances
import itertools
from numpy.lib.function_base import average
import pandas as pd
import dask
import dask.multiprocessing
from dask.distributed import Client
#dask.config.set(scheduler='processes')
#client = Client()
#client

In [2]:
#directory = '/home/emanuele/TTR/greta_cutoff_55_ex_2/epsilon_0275_latestdihedrals_newljr_harp0/monomer_test/plain_MD'
directory = '/home/emanuele/ABeta'
reference_structure = f'{directory}/topol.gro'
reference_trajectory = f'{directory}/reduced_traj_red.xtc'

In [3]:
def make_pairs(atomgroup, frame_index, pairs_ai, pairs_aj):

    framed_peptide = atomgroup.universe.trajectory[frame_index]
    self_distance = distances.self_distance_array(framed_peptide.positions)
    monomer_pairs_df = pd.DataFrame(columns=['ai', 'aj','ai_name', 'aj_name', 'ai_resnum', 'aj_resnum', 'distances'])
    monomer_pairs_df['ai'] = pairs_ai
    monomer_pairs_df['aj'] = pairs_aj
    monomer_pairs_df['distances'] = self_distance
    monomer_pairs_df = monomer_pairs_df[monomer_pairs_df['distances'] < distance_cutoff]
    monomer_pairs_df[['ai_name','ai_resnum']] = monomer_pairs_df.ai.str.split("_", expand=True)
    monomer_pairs_df[['aj_name','aj_resnum']] = monomer_pairs_df.aj.str.split("_", expand=True)
    monomer_pairs_df = monomer_pairs_df.astype({"ai_resnum": int, "aj_resnum": int})
    monomer_pairs_df.drop(monomer_pairs_df[abs(monomer_pairs_df['aj_resnum'] - monomer_pairs_df['ai_resnum']) < distance_residue].index, inplace=True)

    return monomer_pairs_df    

In [4]:
u = MDAnalysis.Universe(reference_structure, reference_trajectory)
peptides = u.select_atoms('all')
print('Residues: ', u.residues)
print('Atoms: ', len(peptides))

atomtypes = []
for atom in peptides:
    atp = str(atom.name) + '_' + str(atom.resnum)
    atomtypes.append(atp)

pairs_list = list(itertools.combinations(atomtypes, 2))
pairs_ai, pairs_aj = [], []
for n in range(0, len(pairs_list)):
    i = pairs_list[n][0]
    pairs_ai.append(i)
    j = pairs_list[n][1]
    pairs_aj.append(j)

print('Pairs list: ',len(pairs_list))
print('Number of frames: ', len(u.trajectory))
total_frames = len(u.trajectory)    

Residues:  <ResidueGroup [<Residue ASP, 1>, <Residue ALA, 2>, <Residue GLU, 3>, ..., <Residue VAL, 40>, <Residue ILE, 41>, <Residue ALA, 42>]>
Atoms:  627
Pairs list:  196251
Number of frames:  7921


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  residx = np.zeros_like(criteria[0], dtype=np.int)


In [14]:
job_list = []
for frame_index in range(u.trajectory.n_frames):
    print(f'{frame_index} out of {total_frames}')
    job_list.append(dask.delayed(make_pairs(atomgroup=peptides, frame_index=frame_index, pairs_ai=pairs_ai, pairs_aj=pairs_aj)))

monomer_pairs_df = pd.concat(dask.compute(job_list)[0])
monomer_pairs_df

0 out of 7921
1 out of 7921
2 out of 7921
3 out of 7921
4 out of 7921
5 out of 7921
6 out of 7921
7 out of 7921
8 out of 7921
9 out of 7921
10 out of 7921
11 out of 7921
12 out of 7921
13 out of 7921
14 out of 7921
15 out of 7921
16 out of 7921
17 out of 7921
18 out of 7921
19 out of 7921
20 out of 7921
21 out of 7921
22 out of 7921
23 out of 7921
24 out of 7921
25 out of 7921
26 out of 7921
27 out of 7921
28 out of 7921
29 out of 7921
30 out of 7921
31 out of 7921
32 out of 7921
33 out of 7921
34 out of 7921
35 out of 7921
36 out of 7921
37 out of 7921
38 out of 7921
39 out of 7921
40 out of 7921
41 out of 7921
42 out of 7921
43 out of 7921
44 out of 7921
45 out of 7921
46 out of 7921
47 out of 7921
48 out of 7921
49 out of 7921
50 out of 7921
51 out of 7921
52 out of 7921
53 out of 7921
54 out of 7921
55 out of 7921
56 out of 7921
57 out of 7921
58 out of 7921
59 out of 7921
60 out of 7921
61 out of 7921
62 out of 7921
63 out of 7921
64 out of 7921
65 out of 7921
66 out of 7921
67 ou

In [None]:
count_ai, count_aj, count_distance, count_ratio, average_distance = [], [], [], [], []

total_pairs = len(pairs_list)
for n_counter, pair in enumerate(pairs_list, 1):
    print(f'Processing {n_counter} out of {total_pairs}: {pair}')
    # filtering the data frame based on the pairs values
    count_ai.append(pair[0])
    count_aj.append(pair[1])
    # salvati il df che serve per la media delle distanze e del sigma
    counts_df = monomer_pairs_df[(monomer_pairs_df['ai'] == pair[0]) & (monomer_pairs_df['aj'] == pair[1])]
    average_distance.append(counts_df['distances'].mean())
    count_distance.append(len(counts_df))
    count_ratio.append(len(counts_df)/len(u.trajectory))


In [None]:

pairs_count = pd.DataFrame(columns=['ai', 'aj', 'count', 'ratio', 'average_distance'])
pairs_count['ai'] = count_ai
pairs_count['aj'] = count_aj
pairs_count['count'] = count_distance
pairs_count['ratio'] = count_ratio
pairs_count['average_distance'] = average_distance
pairs_count.sort_values(by = ['ratio'], inplace = True, ascending=False)
pairs_count


In [None]:
file = open('monomer_pairs2.txt', 'w')
file.write(pairs_count.to_string(index=False, header=False))
file.close()

In [None]:
#def pairs_counter(pair, monomer_pairs_df):
#    #print(f'Processing {n_counter} out of {total_pairs}: {pair}')
#    # filtering the data frame based on the pairs values
#    count_ai.append(pair[0])
#    count_aj.append(pair[1])
#    # salvati il df che serve per la media delle distanze e del sigma
#    counts_df = monomer_pairs_df[(monomer_pairs_df['ai'] == pair[0]) & (monomer_pairs_df['aj'] == pair[1])]
#    average_distance.append(counts_df['distances'].mean())
#    count_distance.append(len(counts_df))
#    count_ratio.append(len(counts_df)/len(u.trajectory))
#
#
#    return average_distance, count_distance, count_ratio


In [None]:
#job_list = []
#for pair in pairs_list:
#    job_list.append(dask.delayed(pairs_counter(pair=pair, monomer_pairs_df=monomer_pairs_df)))
#
#pairs_count = pd.concat(dask.compute(job_list)[0])
#pairs_count