-
Notifications
You must be signed in to change notification settings - Fork 6
/
PyMOL_VisFeatDiffs.py
180 lines (163 loc) · 8.98 KB
/
PyMOL_VisFeatDiffs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# This file is part of PySFD.
#
# Copyright (c) 2018 Sebastian Stolzenberg,
# Computational Molecular Biology Group,
# Freie Universitaet Berlin (GER)
#
# for any feedback or questions, please contact the author:
# Sebastian Stolzenberg <ss629@cornell.edu>
#
# PySFD is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
##############################################################################
#
# PyMOL_VisFeatDiffs
#
from pymol.cgo import *
import numpy as np
import pandas as pd
class PyMOL_VisFeatDiffs(object):
'''
Visualizes Significant Feature Differences that are
common among the pairs of simulated ensembles defined in l_SDApair, but that are
not significantly different among all pairs defined in l_SDA_not_pair
Parameters
----------
* l_SDApair : list of 2-d tuples of str
pairs of simulated ensembles compaired by
comp_feature_diffs
* l_SDA_not_pair : list of 2-d tuples of str
pairs of simulated ensembles compaired by
comp_feature_diffs
* feature_func_name : name of the feature function used, e.g. "srf.chi1.std_err"
* stattype : type of statistics, i.e. either "samplebatches" or "raw"
* num_sigma : float, level of statistical significance, measured in multiples of standard errors
* num_funits : float, level of biological significance, measured in multiples of feature units
(Note: significance is defined by both statistical AND biological significance !)
* intrajformat : is the trajectory format
* df_rgn_seg_res_bb : pandas DataFrame (default: None) that defines
regions by segIDs and resIDs for coarse-grained results, e.g.:
df_rgn_seg_res_bb = pd.DataFrame({'rgn' : ["a1", "a2", "b1", "b2", "c"],
'seg' : ["A", "A", "B", "B", "C"],
'res' : [range(4,83), range(83,185), range(4,95), range(95,191), range(102,121)]})
if None, just use non-coarse-grained results
* outdir : output directory path (not needed right now)
* myview : specific PyMOL view to save visualizations, e.g.,
"\
-0.635461390, 0.528695464, -0.562786579,\
-0.764068067, -0.535845578, 0.359358341,\
-0.111567713, 0.658326149, 0.744472742,\
0.000470890, 0.000115473, -42.329708099,\
0.563536644, -9.008704185, -3.723406792,\
-116.611633301, 200.864410400, 0.000000000"
'''
def __init__(self, l_SDApair, l_not_SDApair, feature_func_name, stattype, nsigma, nfunit, intrajformat, df_rgn_seg_res_bb=None, VisFeatDiffsDir=None, outdir=None, myview=None):
self.l_SDApair = l_SDApair
self.l_not_SDApair = l_not_SDApair
self.feature_func_name = feature_func_name
self.stattype = stattype
self.nsigma = nsigma
self.nfunit = nfunit
self.intrajformat = intrajformat
self.df_rgn_seg_res_bb = df_rgn_seg_res_bb
self.VisFeatDiffsDir = VisFeatDiffsDir
self.outdir = outdir
if (df_rgn_seg_res_bb is not None) and (not isinstance(df_rgn_seg_res_bb, pd.DataFrame)):
print(df_rgn_seg_res_bb)
raise ValueError("df_rgn_seg_res_bb has to be either None or a pandas DataFrame!")
if myview is None:
self.myview = "\
0.652665138, 0.205351248, -0.729285538,\
0.711609006, 0.164262310, 0.683099866,\
0.260070741, -0.964803815, -0.038922187,\
-0.000003442, -0.000028193, -158.983245850,\
-1.527315855, 1.568387151, -11.586393356,\
104.869468689, 213.097259521, 20.000000000"
else:
self.myview = myview
# coloring for feature difference visualizations
self.l_r = [1.0,0.0]
self.l_g = [0.0,0.0]
self.l_b = [0.0,1.0]
self.sgn2col = ["less","more"]
def _add_vis(row):
raise ValueError("Don't run _add_vis() from the parent class!")
def vis_feature_diffs( self ):
# define colors for bars indicating feature differences
cmd.set_color("less", [self.l_r[0], self.l_g[0], self.l_b[0]])
cmd.set_color("more", [self.l_r[1], self.l_g[1], self.l_b[1]])
# prepare display
cmd.bg_color("white")
cmd.set("defer_builds_mode", 3)
cmd.set("async_builds", 1)
cmd.set("cartoon_cylindrical_helices", 0)
cmd.set("ray_shadow", 0)
cmd.set("ray_opaque_background", "on")
cmd.set("valence", 0)
cmd.do("run %s/center_of_mass.py" % self.VisFeatDiffsDir)
# load in diff data
s_SDApairs = "_and_".join(["_vs_".join(x) for x in l_SDApair])
s_SDA_not_pairs = "_and_".join(["_vs_".join(x) for x in l_SDA_not_pair])
if self.df_rgn_seg_res_bb is None:
cmd.set("cartoon_color", "white")
instem="output/meta/%s/%s" % (feature_func_name, stattype)
if (len(l_SDApair)>1) or (len(l_SDA_not_pair)>0):
instem += "/common"
if (len(l_SDA_not_pair)>0):
s_SDApairs += "_not_" + s_SDA_not_pairs
infilename="%s/%s.%s.%s.nsigma_%.6f.nfunit_%.6f.dat" % (instem,
feature_func_name,
stattype,
s_SDApairs,
nsigma,
nfunit)
#with open(infilename) as infile:
# l_lbl1 = next(infile).split()
# l_lbl2 = next(infile).split()
#numdifflbls = len([x for x in l_lbl1 if x in ["sdiff", "score", "pval"]])
#numenscols = len(l_lbl2) // 2
#numlblcols = len(l_lbl1) - 2 * numenscols - numdifflbls
#newcols = l_lbl1[:numlblcols] + l_lbl1[numlblcols:-numdifflbls:2] + l_lbl1[-numdifflbls:]
#df_features = pd.read_csv(infilename, skiprows = 2, header=None, delim_whitespace = True)
#df_features.columns = pd.MultiIndex(levels=[newcols, ['', 'mf', 'sf']],
# labels=[list(range(numlblcols)) + [(numlblcols + i) for i in range(numenscols) for j in range(2)] + list(range(len(newcols)-numdifflbls, len(newcols))),
# numlblcols * [0] + numenscols * [1,2] + numdifflbls * [0] ])
df_features = pd.read_csv(infilename, header = [0, 1], sep = "\t", engine = "python")
df_features.rename(columns = { key: "" for key in [i for i in df_features.columns.get_level_values(1) if "Unnamed: " in i ] }, inplace = True)
df_features.drop(["mf", "sf"], level = 1, axis = 1, inplace=True)
df_features.columns = df_features.columns.droplevel(1)
# load reference structure for structural alignments
l_mol = np.unique([y for x in l_SDApair + l_SDA_not_pair for y in x])
refmol = "ref.%s" % l_mol[0]
cmd.load("input/%s/r_00000/%s.r_00000.prot.pdb" % (l_mol[0], l_mol[0]), refmol)
# load in conformations and add visualizations
for mymol in l_mol:
cmd.load("input/%s/r_00000/%s.r_00000.prot.pdb" % (mymol, mymol), mymol)
if intrajformat != "pdb":
cmd.load_traj("input/%s/r_00000/%s.r_00000.prot.%s" % (mymol, mymol, intrajformat), object=mymol, interval=1, start=1, stop=1, state=1)
cmd.cealign(refmol, mymol)
cmd.hide("everything",mymol)
cmd.show("cartoon" ,"/%s" % (mymol))
cmd.color("white" ,"/%s" % (mymol))
l_cgo = []
for index, row in df_features.iterrows():
if self._add_vis(mymol, row, l_cgo) is None:
continue
if len(l_cgo) != 0:
cmd.load_cgo(l_cgo,"%s.diffs" % (mymol))
cmd.delete(refmol)
if self.myview is not None:
cmd.set_view(self.myview)
#cmd.ray()
#cmd.save("" % (outdir))