-
Notifications
You must be signed in to change notification settings - Fork 30
/
reference.py
executable file
·168 lines (148 loc) · 6.71 KB
/
reference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#!/usr/bin/env python
import IMP.multifit
from IMP import ArgumentParser
__doc__ = "Compare output models to a reference structure."
# analyse the ensemble, first we will do the rmsd stuff
def get_placement_scores_from_coordinates(model_components_coords,
native_components_coords):
"""
Computes the placement score for each of the components
@param model_components_coords A list with the coordinates for each
component
@param native_components_coords A list with the coordinates for each
component in the native assembly
"""
distances = []
angles = []
for model_coords, native_coords in zip(
model_components_coords, native_components_coords):
distance, angle = get_placement_score_from_coordinates(model_coords,
native_coords)
distances.append(distance)
angles.append(angle)
return distances, angles
def get_placement_score_from_coordinates(model_coords, native_coords):
"""
Computes the position error (placement distance) and the orientation
error (placement angle) of the coordinates in model_coords respect to
the coordinates in native_coords.
placement distance - translation between the centroids of the
coordinates
placement angle - Angle in the axis-angle formulation of the rotation
aligning the two rigid bodies.
"""
native_centroid = IMP.algebra.get_centroid(native_coords)
model_centroid = IMP.algebra.get_centroid(model_coords)
translation_vector = native_centroid - model_centroid
distance = translation_vector.get_magnitude()
if (len(model_coords) != len(native_coords)):
raise ValueError(
"Mismatch in the number of members %d %d " % (
len(model_coords),
len(native_coords)))
TT = IMP.algebra.get_transformation_aligning_first_to_second(model_coords,
native_coords)
P = IMP.algebra.get_axis_and_angle(TT.get_rotation())
angle = P.second
return distance, angle
def get_rmsd(hierarchy1, hierarchy2):
xyz1 = [IMP.core.XYZ(leaf) for leaf in IMP.atom.get_leaves(hierarchy1)]
xyz2 = [IMP.core.XYZ(leaf) for leaf in IMP.atom.get_leaves(hierarchy2)]
return IMP.atom.get_rmsd(xyz1, xyz2)
def get_components_placement_scores(assembly, native_assembly, align=False):
"""
Compute the placement score of each of the children of an assembly.
@param assembly An atom.Molecule object
@param native_assembly An atom.Molecule object with the native
conformation. Obviously the atoms in assembly and
native_assembly must be the same.
@param align if True, the coordinates are aligned before the score is
calculated.
@return The function returns 2 lists. The first list contains the
placement distances of the children. The second list contains
the placement angles
"""
model_coords_per_child = [get_coordinates(c) # noqa: F821
for c in assembly.get_children()]
native_coords_per_child = [get_coordinates(c) # noqa: F821
for c in native_assembly.get_children()]
if align:
model_coords = []
_ = [model_coords.extend(x) for x in model_coords_per_child]
native_coords = []
_ = [native_coords.extend(x) for x in native_coords_per_child]
T = IMP.algebra.get_transformation_aligning_first_to_second(
model_coords, native_coords)
# get aligned coordinates
new_model_coords_per_child = []
for c in model_coords_per_child:
coords = [T.get_transformed(x) for x in c]
new_model_coords_per_child.append(coords)
model_coords_per_child = new_model_coords_per_child
distances, angles = get_placement_scores_from_coordinates(
native_coords_per_child, model_coords_per_child)
return distances, angles
def parse_args():
desc = """
Compare output models to a reference structure.
The reference structure for each subunit is read from the rightmost column
of the asmb.input file.
"""
p = ArgumentParser(description=desc)
p.add_argument("-m", "--max", type=int, dest="max", default=None,
help="maximum number of models to compare")
p.add_argument("assembly_file", help="assembly file name")
p.add_argument("proteomics_file", help="proteomics file name")
p.add_argument("mapping_file", help="mapping file name")
p.add_argument("combinations_file", help="combinations file name")
return p.parse_args()
def run(asmb_fn, proteomics_fn, mapping_fn, combs_fn, max_comb):
# get rmsd for subunits
mdl = IMP.Model()
combs = IMP.multifit.read_paths(combs_fn)
sd = IMP.multifit.read_settings(asmb_fn)
sd.set_was_used(True)
prot_data = IMP.multifit.read_proteomics_data(proteomics_fn)
mapping_data = IMP.multifit.read_protein_anchors_mapping(prot_data,
mapping_fn)
ensmb = IMP.multifit.load_ensemble(sd, mdl, mapping_data)
ensmb.set_was_used(True)
mhs = ensmb.get_molecules()
mhs_ref = []
for j, mh in enumerate(mhs):
mhs_ref.append(
IMP.atom.read_pdb(
sd.get_component_header(
j).get_reference_fn(
),
mdl))
print("number of combinations:", len(combs), max_comb)
results = []
for i, comb in enumerate(combs[:max_comb]):
if i % 500 == 0:
print(i)
ensmb.load_combination(comb)
scores = []
for j, mh in enumerate(mhs):
mh_ref = mhs_ref[j]
coords1 = []
for xyz in IMP.core.XYZs(IMP.core.get_leaves(mh)):
coords1.append(xyz.get_coordinates())
coords2 = []
for xyz in IMP.core.XYZs(IMP.core.get_leaves(mh_ref)):
coords2.append(xyz.get_coordinates())
scores.append(
get_placement_score_from_coordinates(coords1, coords2))
# scores=get_placement_score_from_coordinates(IMP.core.XYZs(IMP.atom.get_leaves(mh)),
# IMP.core.XYZs(IMP.atom.get_leaves(mh_ref)))
rmsd = get_rmsd(mhs, mhs_ref)
print(i, rmsd, scores)
results.append((rmsd, scores))
ensmb.unload_combination(comb)
return results
def main():
args = parse_args()
return run(args.assembly_file, args.proteomics_file, args.mapping_file,
args.combinations_file, args.max)
if __name__ == "__main__":
main()