From aad5b500c1f7594a88dcc2f64b2c51a8da72dfa0 Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Mon, 19 Apr 2021 10:54:59 +0100 Subject: [PATCH 01/37] track_utils performance improvement --- utils/tracks_utils.py | 53 ++++++++++++++++--------------------------- 1 file changed, 20 insertions(+), 33 deletions(-) diff --git a/utils/tracks_utils.py b/utils/tracks_utils.py index 7036036..e6e98ff 100644 --- a/utils/tracks_utils.py +++ b/utils/tracks_utils.py @@ -33,27 +33,25 @@ def retrieve_dataset_prediction(session_id, session, fname, display_settings, ca return dataset.value, session[fname.encode()] -def transform_coords_diagonal_axis(coord, distance, low_bound=False, ratio=1, y_axis=True): - if coord is None: - return None +def transform_coords_diagonal_xaxis(indices, distance, track_idx, ratio=1): + factor = distance / (1 + ratio ** 2) + if track_idx < 4: + factor = factor * -1 + return [idx + factor for idx in indices] - if y_axis: - factor = ratio * (distance / (1 + ratio ** 2)) - if low_bound: - factor = factor * -1 - else: - factor = distance / (1 + ratio ** 2) - if not low_bound: - factor = factor * -1 - return coord + factor +def transform_coords_diagonal_yaxis(prediction, state, distance, track_idx, ratio=1): + factor = ratio * (distance / (1 + ratio ** 2)) + if track_idx > 4: + factor = factor * -1 + return [idx + factor if residue == state else None for idx, residue in enumerate(prediction, 1)] def get_diagonal_trace(prediction, dataset, marker_size, sequence, alpha, color_palette): if prediction is None: return None - x_diagonal = [idx for idx in range(1, len(prediction) + 1)] + x = [idx for idx in range(1, len(prediction) + 1)] states = DatasetStates.__getattr__(dataset).value palette = color_palettes.DatasetColorPalettes.__getattr__(dataset).value.__getattr__(color_palette).value traces = [] @@ -62,14 +60,9 @@ def get_diagonal_trace(prediction, dataset, marker_size, sequence, alpha, color_ y = [idx if residue == state.value else None for idx, residue in enumerate(prediction, 1)] if not any(y): continue - - hovertext = ['Residue: {} ({}) | {}'.format(sequence[idx - 1], idx, state.name) for idx in x_diagonal] - color = palette.__getattr__(state.name).value - color = color.format(alpha) - - traces.append( - create_cmap_trace(x_diagonal, y, 'diamond', marker_size=marker_size, color=color, hovertext=hovertext) - ) + hovertext = ['Residue: {} ({}) | {}'.format(resid, idx, state.name) for idx, resid in enumerate(sequence, 1)] + color = palette.__getattr__(state.name).value.format(alpha) + traces.append(create_cmap_trace(x, y, 'diamond', marker_size=marker_size, color=color, hovertext=hovertext)) return traces @@ -84,21 +77,15 @@ def get_traces(prediction, dataset, track_idx, track_separation, marker_size, al palette = color_palettes.DatasetColorPalettes.__getattr__(dataset).value.__getattr__(color_palette).value track_origin = abs(4 - track_idx) track_distance = track_separation * track_origin - if track_idx > 4: - low_bound = True - else: - low_bound = False + + x = transform_coords_diagonal_xaxis(x_diagonal, track_distance, track_idx) for state in states: - y_diagonal = [idx if residue == state.value else None for idx, residue in enumerate(prediction, 1)] - if not any(y_diagonal): + y = transform_coords_diagonal_yaxis(prediction, state.value, track_distance, track_idx) + if not any(y): continue - - y = [transform_coords_diagonal_axis(y, track_distance, low_bound=low_bound) for y in y_diagonal] - x = [transform_coords_diagonal_axis(x, track_distance, low_bound=low_bound, y_axis=False) for x in x_diagonal] - hovertext = ['%s' % state.name for idx in enumerate(x)] - color = palette.__getattr__(state.name).value - color = color.format(alpha) + hovertext = ['%s' % state.name for i in x] + color = palette.__getattr__(state.name).value.format(alpha) traces.append(create_cmap_trace(x, y, 'diamond', marker_size=marker_size, color=color, hovertext=hovertext)) From 1bb69238e49fe31f1e59f59387410df622a55998 Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Mon, 19 Apr 2021 11:51:36 +0100 Subject: [PATCH 02/37] cmap_utils performance improvement --- utils/cmap_utils.py | 59 ++++++++++++--------------------------------- 1 file changed, 15 insertions(+), 44 deletions(-) diff --git a/utils/cmap_utils.py b/utils/cmap_utils.py index 91056f3..02ea7a8 100644 --- a/utils/cmap_utils.py +++ b/utils/cmap_utils.py @@ -39,10 +39,9 @@ def create_cmap(cmap, idx, display_settings, verbose_labels=None): for contact in cmap: res1_list.append(contact[idx_x]) res2_list.append(contact[idx_y]) - res_x_label = verbose_labels[contact[idx_x] - 1] - res_y_label = verbose_labels[contact[idx_y] - 1] - hover.append(HoverTemplates.CMAP_VERBOSE.format(contact[idx_x], contact[idx_y], contact[2], res_x_label, - res_y_label)) + xlabel = verbose_labels[contact[idx_x] - 1] + ylabel = verbose_labels[contact[idx_y] - 1] + hover.append(HoverTemplates.CMAP_VERBOSE.format(contact[idx_x], contact[idx_y], contact[2], xlabel, ylabel)) else: for contact in cmap: res1_list.append(contact[idx_x]) @@ -52,7 +51,7 @@ def create_cmap(cmap, idx, display_settings, verbose_labels=None): return res1_list, res2_list, hover -def superimpose_cmaps(reference_cmap, predicted_cmap, display_settings): +def create_cmap_sets(reference_cmap, predicted_cmap, display_settings): if display_settings.factor != 0: predicted_cmap = predicted_cmap[:int(round(display_settings.seq_length / display_settings.factor, 0))] if reference_cmap[-1] == 'PDB': @@ -66,31 +65,18 @@ def superimpose_cmaps(reference_cmap, predicted_cmap, display_settings): elif reference_cmap[-1] == 'PDB' or reference_cmap[-1] == 'DISTO': del reference_cmap[-1] - reference_contacts = [contact[:2] for contact in reference_cmap] - predicted_contacts = [contact[:2] for contact in predicted_cmap] - - matched = [] - mismatched = [] - reference = [] - - for contact in reference_cmap: - if contact[:2] in predicted_contacts: - matched.append(contact) - else: - reference.append(contact) - - for contact in predicted_cmap: - if contact[:2] not in reference_contacts: - mismatched.append(contact) + predicted_set = {(x[0], x[1]): x[2] for x in predicted_cmap} + reference_set = {(x[0], x[1]): x[2] for x in reference_cmap} - return reference, matched, mismatched + return reference_set, predicted_set def create_superimposed_cmap(reference_cmap, predicted_cmap, display_settings, verbose_labels): traces = [] - ref, match, mismatch = superimpose_cmaps(reference_cmap, predicted_cmap, display_settings) - predicted_set = {(x[0], x[1]): x[2] for x in predicted_cmap} - reference_set = {(x[0], x[1]): x[2] for x in reference_cmap} + reference_set, predicted_set = create_cmap_sets(reference_cmap, predicted_cmap, display_settings) + ref = reference_set.keys() - predicted_set.keys() + mismatch = predicted_set.keys() - reference_set.keys() + match = reference_set.keys() & predicted_set.keys() x, y, hover = process_superimposed_cmap(ref, reference_set, predicted_set, verbose_labels) traces.append(create_cmap_trace(x, y, 'circle', display_settings.contact_marker_size, 'grey', hover)) @@ -112,16 +98,8 @@ def process_superimposed_cmap(contacts, reference_set, predicted_set, verbose_la if verbose_labels is not None: for contact in contacts: - - if tuple(contact[:2]) in predicted_set.keys(): - pred_confidence = predicted_set[tuple(contact[:2])] - else: - pred_confidence = 0 - if tuple(contact[:2]) in reference_set.keys(): - ref_confidence = reference_set[tuple(contact[:2])] - else: - ref_confidence = 0 - + pred_confidence = predicted_set[contact] if contact in predicted_set.keys() else 0 + ref_confidence = reference_set[contact] if contact in reference_set.keys() else 0 res1_list.append(contact[0]) res2_list.append(contact[1]) res_1_label = verbose_labels[contact[0] - 1] @@ -132,15 +110,8 @@ def process_superimposed_cmap(contacts, reference_set, predicted_set, verbose_la hover_2.append(HoverTemplates.CMAP_SUPERIMPOSE_VERBOSE.format(*label)) else: for contact in contacts: - if tuple(contact[:2]) in predicted_set.keys(): - pred_confidence = predicted_set[tuple(contact[:2])] - else: - pred_confidence = 0 - if tuple(contact[:2]) in reference_set.keys(): - ref_confidence = reference_set[tuple(contact[:2])] - else: - ref_confidence = 0 - + pred_confidence = predicted_set[contact] if contact in predicted_set.keys() else 0 + ref_confidence = reference_set[contact] if contact in reference_set.keys() else 0 res1_list.append(contact[0]) res2_list.append(contact[1]) label = (contact[0], contact[1], ref_confidence, pred_confidence) From cc85f9bdeaea4ab7f5472d52ed2af99b6bb1d88a Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Mon, 19 Apr 2021 12:07:37 +0100 Subject: [PATCH 03/37] refactor code at heatmap_utils.py --- utils/heatmap_utils.py | 68 +++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/utils/heatmap_utils.py b/utils/heatmap_utils.py index df13597..5e46403 100644 --- a/utils/heatmap_utils.py +++ b/utils/heatmap_utils.py @@ -87,44 +87,44 @@ def populate_superimposed_heatmap(reference_cmap, secondary_cmap, heat, hover, v if verbose_labels is not None: for reference_distance in reference_cmap: - if tuple(reference_distance[:2]) in predicted_set.keys(): - predicted_distance = predicted_set[tuple(reference_distance[:2])] - else: - predicted_distance = 9 - error = abs((9 - reference_distance[3]) - (9 - predicted_distance)) - heat[reference_distance[idx_x]][reference_distance[idx_y]] = error - heat[reference_distance[idx_y]][reference_distance[idx_x]] = error - map_a_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(reference_distance[3])) - map_b_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(predicted_distance)) - hover_label_a = HoverTemplates.DISTOGRAM_SUPERIMPOSE_VERBOSE. \ - format(reference_distance[idx_y], reference_distance[idx_x], map_a_distance, map_b_distance, - error, verbose_labels[reference_distance[idx_y] - 1], - verbose_labels[reference_distance[idx_x] - 1]) - hover_label_b = HoverTemplates.DISTOGRAM_SUPERIMPOSE_VERBOSE. \ - format(reference_distance[idx_x], reference_distance[idx_y], map_a_distance, map_b_distance, - error, verbose_labels[reference_distance[idx_x] - 1], - verbose_labels[reference_distance[idx_y] - 1]) - hover[reference_distance[idx_x]][reference_distance[idx_y]] = hover_label_a - hover[reference_distance[idx_y]][reference_distance[idx_x]] = hover_label_b + residues = tuple(reference_distance[:2]) + predicted_bin = predicted_set[residues] if residues in predicted_set.keys() else 9 + reference_bin = reference_distance[3] + error = abs((9 - reference_bin) - (9 - predicted_bin)) + resid_y = reference_distance[idx_y] + resid_x = reference_distance[idx_x] + heat[resid_x][resid_y] = error + heat[resid_y][resid_x] = error + map_a_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(reference_bin)) + map_b_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(predicted_bin)) + hover_label_a = HoverTemplates.DISTOGRAM_SUPERIMPOSE_VERBOSE.format(resid_y, resid_x, map_a_distance, + map_b_distance, error, + verbose_labels[resid_y - 1], + verbose_labels[resid_x - 1]) + hover_label_b = HoverTemplates.DISTOGRAM_SUPERIMPOSE_VERBOSE.format(resid_x, resid_y, map_a_distance, + map_b_distance, error, + verbose_labels[resid_x - 1], + verbose_labels[resid_y - 1]) + hover[resid_x][resid_y] = hover_label_a + hover[resid_y][resid_x] = hover_label_b else: for reference_distance in reference_cmap: - if tuple(reference_distance[:2]) in predicted_set.keys(): - predicted_distance = predicted_set[tuple(reference_distance[:2])] - else: - predicted_distance = 9 - error = abs((9 - reference_distance[3]) - (9 - predicted_distance)) - heat[reference_distance[idx_x]][reference_distance[idx_y]] = error - heat[reference_distance[idx_y]][reference_distance[idx_x]] = error - map_a_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(reference_distance[3])) - map_b_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(predicted_distance)) - hover_label_a = HoverTemplates.DISTOGRAM_SUPERIMPOSE.format(reference_distance[idx_y], - reference_distance[idx_x], map_a_distance, + residues = tuple(reference_distance[:2]) + predicted_bin = predicted_set[residues] if residues in predicted_set.keys() else 9 + reference_bin = reference_distance[3] + error = abs((9 - reference_bin) - (9 - predicted_bin)) + resid_y = reference_distance[idx_y] + resid_x = reference_distance[idx_x] + heat[resid_x][resid_y] = error + heat[resid_y][resid_x] = error + map_a_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(reference_bin)) + map_b_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(predicted_bin)) + hover_label_a = HoverTemplates.DISTOGRAM_SUPERIMPOSE.format(resid_y, resid_x, map_a_distance, map_b_distance, error) - hover_label_b = HoverTemplates.DISTOGRAM_SUPERIMPOSE.format(reference_distance[idx_x], - reference_distance[idx_y], map_a_distance, + hover_label_b = HoverTemplates.DISTOGRAM_SUPERIMPOSE.format(resid_x, resid_y, map_a_distance, map_b_distance, error) - hover[reference_distance[idx_x]][reference_distance[idx_y]] = hover_label_a - hover[reference_distance[idx_y]][reference_distance[idx_x]] = hover_label_b + hover[resid_x][resid_y] = hover_label_a + hover[resid_y][resid_x] = hover_label_b return heat, hover From 21684b060cc74d83d4c5984a6f6fd68112064d37 Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Tue, 20 Apr 2021 14:05:18 +0100 Subject: [PATCH 04/37] refactor cmap density cache keys --- utils/cache_utils.py | 8 +++++--- utils/tests/test_cache_utils.py | 12 ++++++++++-- utils/tracks_utils.py | 2 +- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/utils/cache_utils.py b/utils/cache_utils.py index 58f1f02..28645dc 100644 --- a/utils/cache_utils.py +++ b/utils/cache_utils.py @@ -20,7 +20,9 @@ class CacheKeys(Enum): SECONDARY_STRUCTURE = loaders.DatasetReference.SECONDARY_STRUCTURE.value CONSERVATION = loaders.DatasetReference.CONSERVATION.value DISORDER = loaders.DatasetReference.DISORDER.value - METADATA_TAG = 'CONPLOT-INTERNAL-USE-ONLY-METADATA-PROTECTED-TAG' + CMAP_DENSITY = '{}_CONPLOT-INTERNAL-USE-ONLY-METADATA-DENSITY-TAG_{}' + CMAP_ERROR = '{}_{}_CONPLOT-INTERNAL-USE-ONLY-METADATA-ERROR-TAG_{}' + PROTECETED_TAG = 'CONPLOT-INTERNAL-USE-ONLY-METADATA' def retrieve_density(session_id, density_cachekey, cache): @@ -51,7 +53,7 @@ def remove_density(session_id, cache, fname): return density_list = decompress_data(density_list) - density_cachekey = '{}_{}'.format(fname, CacheKeys.METADATA_TAG.value) + density_cachekey = '{}_{}'.format(fname, CacheKeys.PROTECETED_TAG.value) for density in density_list: if density_cachekey in density: cache.hdel(session_id, density) @@ -60,7 +62,7 @@ def remove_density(session_id, cache, fname): def is_valid_fname(fname): - if CacheKeys.METADATA_TAG.value in fname or fname in [x.value for x in CacheKeys]: + if CacheKeys.PROTECETED_TAG.value in fname or any([x.value for x in CacheKeys if x.value == fname]): return False return True diff --git a/utils/tests/test_cache_utils.py b/utils/tests/test_cache_utils.py index 626a04c..7cf2162 100644 --- a/utils/tests/test_cache_utils.py +++ b/utils/tests/test_cache_utils.py @@ -78,9 +78,9 @@ def test_8(self): self.assertDictEqual(expected, self.cache.hgetall(self.session_id)) def test_9(self): - cachekey_1 = 'fname_1_{}_2'.format(cache_utils.CacheKeys.METADATA_TAG.value).encode() + cachekey_1 = cache_utils.CacheKeys.CMAP_DENSITY.value.format('fname_1', '2').encode() density_1 = [1, 2, 3, 3, 4, 5] - cachekey_2 = 'fname_2_{}_2'.format(cache_utils.CacheKeys.METADATA_TAG.value).encode() + cachekey_2 = cache_utils.CacheKeys.CMAP_DENSITY.value.format('fname_2', '2').encode() density_2 = [5, 6, 7, 8, 9, 0] cache_utils.store_density(self.session_id, cachekey_1, density_1, self.cache) @@ -90,3 +90,11 @@ def test_9(self): expected_cache = {b'id': cache_utils.compress_data(self.session_id)} cache_utils.remove_all_density(self.session_id, self.cache) self.assertDictEqual(expected_cache, self.cache.hgetall(self.session_id)) + + def test_10(self): + self.assertTrue(cache_utils.is_valid_fname('fname_1')) + self.assertTrue(cache_utils.is_valid_fname('fname_1-METADATA-DENSITY')) + self.assertFalse(cache_utils.is_valid_fname('fname_CONPLOT-INTERNAL-USE-ONLY-METADATA_1')) + self.assertFalse(cache_utils.is_valid_fname('{}_CONPLOT-INTERNAL-USE-ONLY-METADATA_{}')) + self.assertFalse(cache_utils.is_valid_fname(cache_utils.CacheKeys.CMAP_DENSITY.value.format('fname_1', '2'))) + diff --git a/utils/tracks_utils.py b/utils/tracks_utils.py index e6e98ff..93b7309 100644 --- a/utils/tracks_utils.py +++ b/utils/tracks_utils.py @@ -18,7 +18,7 @@ def retrieve_dataset_prediction(session_id, session, fname, display_settings, ca return DatasetReference.HYDROPHOBICITY.value, session[DatasetReference.HYDROPHOBICITY.value.encode()] if fname in session[DatasetReference.CONTACT_MAP.value.encode()]: - cachekey = '{}_{}_{}'.format(fname, cache_utils.CacheKeys.METADATA_TAG.value, display_settings.factor).encode() + cachekey = cache_utils.CacheKeys.CMAP_DENSITY.value.format(fname, display_settings.factor).encode() if cachekey in session.keys(): density = session[cachekey] elif cache.hexists(session_id, cachekey): From a12441a42d0c5d144e06773d73dd040cee197822 Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Wed, 21 Apr 2021 10:07:55 +0100 Subject: [PATCH 05/37] add -density and -hydrophobicity tags to track selector --- utils/cache_utils.py | 9 ++++++++- utils/plot_utils.py | 5 +++-- utils/tests/test_cache_utils.py | 2 ++ utils/tracks_utils.py | 5 +++-- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/utils/cache_utils.py b/utils/cache_utils.py index 28645dc..a7e53c4 100644 --- a/utils/cache_utils.py +++ b/utils/cache_utils.py @@ -1,4 +1,5 @@ from enum import Enum +from fast_enum import FastEnum import json import gzip import loaders @@ -25,6 +26,12 @@ class CacheKeys(Enum): PROTECETED_TAG = 'CONPLOT-INTERNAL-USE-ONLY-METADATA' +class MetadataTags(FastEnum): + DENSITY = ' - density' + HYDROPHOBICITY = ' - hydrophobicity' + TAG = 'CONPLOT-INTERNAL-USE-ONLY-METADATA' + + def retrieve_density(session_id, density_cachekey, cache): density = cache.hget(session_id, density_cachekey) return decompress_data(density) @@ -62,7 +69,7 @@ def remove_density(session_id, cache, fname): def is_valid_fname(fname): - if CacheKeys.PROTECETED_TAG.value in fname or any([x.value for x in CacheKeys if x.value == fname]): + if any([x for x in CacheKeys if x.value == fname]) or any([tag for tag in MetadataTags if tag in fname]): return False return True diff --git a/utils/plot_utils.py b/utils/plot_utils.py index dddc2b6..bb75b29 100644 --- a/utils/plot_utils.py +++ b/utils/plot_utils.py @@ -227,9 +227,10 @@ def get_available_data(session): available_cmaps = [] for cmap_fname in session[DatasetReference.CONTACT_MAP.value.encode()]: available_cmaps.append(cmap_fname) - available_tracks.append(cmap_fname) + available_tracks.append('{}{}'.format(cmap_fname, cache_utils.MetadataTags.DENSITY)) - available_tracks.append(session[DatasetReference.SEQUENCE.value.encode()]) + available_tracks.append('{}{}'.format(session[DatasetReference.SEQUENCE.value.encode()], + cache_utils.MetadataTags.HYDROPHOBICITY)) return available_tracks, available_cmaps diff --git a/utils/tests/test_cache_utils.py b/utils/tests/test_cache_utils.py index 7cf2162..58de1f6 100644 --- a/utils/tests/test_cache_utils.py +++ b/utils/tests/test_cache_utils.py @@ -96,5 +96,7 @@ def test_10(self): self.assertTrue(cache_utils.is_valid_fname('fname_1-METADATA-DENSITY')) self.assertFalse(cache_utils.is_valid_fname('fname_CONPLOT-INTERNAL-USE-ONLY-METADATA_1')) self.assertFalse(cache_utils.is_valid_fname('{}_CONPLOT-INTERNAL-USE-ONLY-METADATA_{}')) + self.assertFalse(cache_utils.is_valid_fname('fname - density')) + self.assertFalse(cache_utils.is_valid_fname('seq - hydrophobicity')) self.assertFalse(cache_utils.is_valid_fname(cache_utils.CacheKeys.CMAP_DENSITY.value.format('fname_1', '2'))) diff --git a/utils/tracks_utils.py b/utils/tracks_utils.py index 93b7309..0b76b2f 100644 --- a/utils/tracks_utils.py +++ b/utils/tracks_utils.py @@ -14,10 +14,11 @@ def calculate_density(cmap, seq_length, factor): def retrieve_dataset_prediction(session_id, session, fname, display_settings, cache): - if fname == session[DatasetReference.SEQUENCE.value.encode()]: + if cache_utils.MetadataTags.HYDROPHOBICITY in fname: return DatasetReference.HYDROPHOBICITY.value, session[DatasetReference.HYDROPHOBICITY.value.encode()] - if fname in session[DatasetReference.CONTACT_MAP.value.encode()]: + if cache_utils.MetadataTags.DENSITY in fname: + fname = fname[:-len(cache_utils.MetadataTags.DENSITY)] cachekey = cache_utils.CacheKeys.CMAP_DENSITY.value.format(fname, display_settings.factor).encode() if cachekey in session.keys(): density = session[cachekey] From feaac686bd4926ae946298be2f797a05a773e22d Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Wed, 21 Apr 2021 11:56:56 +0100 Subject: [PATCH 06/37] refactor track selector layout and add cmap diff selection to the list of available tracks --- app.py | 2 +- components/cards.py | 64 +++++++++++++++++++++++++++++++++--------- utils/cache_utils.py | 8 ++++-- utils/heatmap_utils.py | 4 +-- utils/plot_utils.py | 42 +++++++++++++++------------ utils/tracks_utils.py | 6 ++-- 6 files changed, 85 insertions(+), 41 deletions(-) diff --git a/app.py b/app.py index 88af642..f7dbf5f 100644 --- a/app.py +++ b/app.py @@ -454,7 +454,7 @@ def create_ConPlot(plot_click, refresh_click, factor, contact_marker_size, track if any([True for x in (factor, contact_marker_size, track_marker_size, track_separation) if x is None or x < 0]): app.logger.info('Session {} invalid display control value detected'.format(session_id)) return no_update, components.InvalidInputModal(), no_update, no_update - elif superimpose and ('---' in cmap_selection or len(set(cmap_selection)) == 1): + elif superimpose and ('--- Empty ---' in cmap_selection or len(set(cmap_selection)) == 1): return no_update, components.InvalidMapSelectionModal(), no_update, no_update app.logger.info('Session {} creating conplot'.format(session_id)) diff --git a/components/cards.py b/components/cards.py index 0e7d141..d91e136 100644 --- a/components/cards.py +++ b/components/cards.py @@ -1,3 +1,4 @@ +from utils import cache_utils import components import dash_core_components as dcc import dash_bootstrap_components as dbc @@ -244,6 +245,7 @@ def DisplayControlCard(available_tracks=None, selected_tracks=None, selected_cma ) elif selected_tracks is not None and len(selected_tracks) >= 9 \ and selected_cmaps is not None and len(selected_cmaps) >= 2: + tracks = get_track_options(available_tracks) return html.Div([ components.DisplayControlHeader(), html.Br(), @@ -286,23 +288,23 @@ def DisplayControlCard(available_tracks=None, selected_tracks=None, selected_cma html.H5("Active tracks", className="card-text", style={'text-align': "center"}), html.Hr(), html.Br(), - TrackSelectionCard('-4', selected_tracks[0], available_tracks=available_tracks), + dbc.Card(components.TrackLayoutSelector('-4', tracks, selected_tracks[0]), outline=False), html.Br(), - TrackSelectionCard('-3', selected_tracks[1], available_tracks=available_tracks), + dbc.Card(components.TrackLayoutSelector('-3', tracks, selected_tracks[1]), outline=False), html.Br(), - TrackSelectionCard('-2', selected_tracks[2], available_tracks=available_tracks), + dbc.Card(components.TrackLayoutSelector('-2', tracks, selected_tracks[2]), outline=False), html.Br(), - TrackSelectionCard('-1', selected_tracks[3], available_tracks=available_tracks), + dbc.Card(components.TrackLayoutSelector('-1', tracks, selected_tracks[3]), outline=False), html.Br(), - TrackSelectionCard(' 0', selected_tracks[4], available_tracks=available_tracks), + dbc.Card(components.TrackLayoutSelector('0', tracks, selected_tracks[4]), outline=False), html.Br(), - TrackSelectionCard('+1', selected_tracks[5], available_tracks=available_tracks), + dbc.Card(components.TrackLayoutSelector('+1', tracks, selected_tracks[5]), outline=False), html.Br(), - TrackSelectionCard('+2', selected_tracks[6], available_tracks=available_tracks), + dbc.Card(components.TrackLayoutSelector('+2', tracks, selected_tracks[6]), outline=False), html.Br(), - TrackSelectionCard('+3', selected_tracks[7], available_tracks=available_tracks), + dbc.Card(components.TrackLayoutSelector('+3', tracks, selected_tracks[7]), outline=False), html.Br(), - TrackSelectionCard('+4', selected_tracks[8], available_tracks=available_tracks), + dbc.Card(components.TrackLayoutSelector('+4', tracks, selected_tracks[8]), outline=False), html.Br(), html.Br(), html.H5("Colour palettes", className="card-text", style={'text-align': "center"}), @@ -335,11 +337,45 @@ def DisplayControlCard(available_tracks=None, selected_tracks=None, selected_cma raise ValueError('This should not occur! Please report.') -def TrackSelectionCard(track_idx, track_value, available_tracks): - track_options = [{'label': '---', 'value': '---'}] - track_options += [{'label': fname, 'value': fname} for fname in available_tracks] +def get_track_options(available_tracks): + track_iterator = iter(available_tracks) - return dbc.Card(components.TrackLayoutSelector(track_idx, track_options, track_value), outline=False) + fname = next(track_iterator) + track_options = [{'label': '--- Empty ---', 'value': 'Empty_1'}, + {'label': '--- Seq. Hydrophobicity ---', 'value': 'Hydrophobicity_Header', 'disabled': True}, + {'label': fname, 'value': fname}, + {'label': '--- Contact Density ---', 'value': 'Density_Header', 'disabled': True}] + + fname = next(track_iterator, None) + cmap_density = [] + while fname and cache_utils.MetadataTags.DENSITY.value in fname: + cmap_density.append({'label': fname, 'value': fname}) + fname = next(track_iterator, None) + if not cmap_density: + track_options.append({'label': '--- Empty ---', 'value': 'Empty_2'}) + else: + track_options += sorted(cmap_density, key=lambda k: k['label']) + + track_options.append({'label': '--- Contact Diff ---', 'value': 'Diff_Header', 'disabled': True}) + cmap_diff = [] + while fname and cache_utils.MetadataTags.DIFF.value in fname: + cmap_diff.append({'label': fname, 'value': fname}) + fname = next(track_iterator, None) + if not cmap_diff: + track_options.append({'label': '--- Empty ---', 'value': 'Empty_3'}) + else: + track_options += sorted(cmap_diff, key=lambda k: k['label']) + + track_options.append({'label': '--- Other Tracks ---', 'value': 'AdditionalTracks_Header', 'disabled': True}) + other_tracks = [] + while fname: + other_tracks.append({'label': fname, 'value': fname}) + fname = next(track_iterator, None) + if not other_tracks: + track_options.append({'label': '--- Empty ---', 'value': 'Empty_4'}) + else: + track_options += sorted(other_tracks, key=lambda k: k['label']) + return track_options def ColorPaletteSelectionCard(dataset, selected_palette): @@ -351,7 +387,7 @@ def ColorPaletteSelectionCard(dataset, selected_palette): def HalfSquareSelectionCard(square_idx, selection, available_cmaps): - cmap_options = [{'label': '---', 'value': '---'}] + cmap_options = [{'label': '--- Empty ---', 'value': '---'}] cmap_options += [{'label': fname, 'value': fname} for fname in available_cmaps] return dbc.Card(components.HalfSquareSelector(square_idx, cmap_options, selection), outline=False) diff --git a/utils/cache_utils.py b/utils/cache_utils.py index a7e53c4..8147bf9 100644 --- a/utils/cache_utils.py +++ b/utils/cache_utils.py @@ -1,5 +1,4 @@ from enum import Enum -from fast_enum import FastEnum import json import gzip import loaders @@ -26,9 +25,12 @@ class CacheKeys(Enum): PROTECETED_TAG = 'CONPLOT-INTERNAL-USE-ONLY-METADATA' -class MetadataTags(FastEnum): +class MetadataTags(Enum): DENSITY = ' - density' HYDROPHOBICITY = ' - hydrophobicity' + DIFF = ' - diff' + SEPARATOR = '|' + HYPHEN = '---' TAG = 'CONPLOT-INTERNAL-USE-ONLY-METADATA' @@ -69,7 +71,7 @@ def remove_density(session_id, cache, fname): def is_valid_fname(fname): - if any([x for x in CacheKeys if x.value == fname]) or any([tag for tag in MetadataTags if tag in fname]): + if any([x for x in CacheKeys if x.value == fname]) or any([tag for tag in MetadataTags if tag.value in fname]): return False return True diff --git a/utils/heatmap_utils.py b/utils/heatmap_utils.py index 5e46403..0c2c0dc 100644 --- a/utils/heatmap_utils.py +++ b/utils/heatmap_utils.py @@ -11,7 +11,7 @@ def init_heatmap(seq_length): def create_heatmap(session, display_settings, verbose_labels): heat, hover = init_heatmap(display_settings.seq_length) for idx, fname in enumerate(display_settings.cmap_selection): - if fname == '---': + if fname == '--- Empty ---': continue heat, hover = populate_heatmap(session[fname.encode()], idx, heat, hover, verbose_labels) @@ -23,7 +23,7 @@ def create_heatmap(session, display_settings, verbose_labels): def superimpose_heatmaps(session, display_settings, verbose_labels): heat, hover = init_heatmap(display_settings.seq_length) for idx, fname in enumerate(display_settings.cmap_selection): - if fname == '---': + if fname == '--- Empty ---': continue heat, hover = populate_superimposed_heatmap(session[display_settings.cmap_selection[0].encode()], session[display_settings.cmap_selection[1].encode()], diff --git a/utils/plot_utils.py b/utils/plot_utils.py index bb75b29..44380c3 100644 --- a/utils/plot_utils.py +++ b/utils/plot_utils.py @@ -3,6 +3,7 @@ from dash.dash import no_update import dash_core_components as dcc from enum import Enum +import itertools import json from loaders import DatasetReference, AdditionalDatasetReference, STATES from layouts import ContextReference @@ -58,7 +59,7 @@ def create_ConPlot(session_id, cache, trigger, selected_tracks, cmap_selection, def add_additional_tracks(session_id, session, display_settings, figure, cache): for idx, fname in enumerate(display_settings.selected_tracks): - if fname == '---': + if fname == '--- Empty ---': continue dataset, prediction = tracks_utils.retrieve_dataset_prediction(session_id, session, fname, display_settings, @@ -97,7 +98,7 @@ def add_contact_trace(session, display_settings, figure, verbose_labels): else: for idx, fname in enumerate(display_settings.cmap_selection): - if fname == '---': + if fname == '--- Empty ---': continue cmap = session[fname.encode()] @@ -210,7 +211,7 @@ def process_args(session_id, session, trigger, selected_tracks, cmap_selection, heatmap=heatmap, verbose_labels=verbose_labels) if verbose_labels: - fnames = [fname for fname in selected_tracks if fname != '---'] + fnames = [fname for fname in selected_tracks if fname != '--- Empty ---'] verbose_labels = get_verbose_labels(session_id, session, fnames, display_settings, cache) else: verbose_labels = None @@ -219,32 +220,37 @@ def process_args(session_id, session, trigger, selected_tracks, cmap_selection, def get_available_data(session): - available_tracks = [] - for dataset in AdditionalDatasetReference: - if dataset.value.encode() in session.keys() and session[dataset.value.encode()]: - available_tracks += session[dataset.value.encode()] + available_tracks = ['{}{}'.format(session[DatasetReference.SEQUENCE.value.encode()], + cache_utils.MetadataTags.HYDROPHOBICITY.value)] available_cmaps = [] - for cmap_fname in session[DatasetReference.CONTACT_MAP.value.encode()]: + cmap_fname_list = session[DatasetReference.CONTACT_MAP.value.encode()] + for cmap_fname in cmap_fname_list: available_cmaps.append(cmap_fname) - available_tracks.append('{}{}'.format(cmap_fname, cache_utils.MetadataTags.DENSITY)) + available_tracks.append('{}{}'.format(cmap_fname, cache_utils.MetadataTags.DENSITY.value)) + + if len(cmap_fname_list) > 1: + cmap_combinations = ['{} | {}{}'.format(*sorted(x), cache_utils.MetadataTags.DIFF.value) + for x in itertools.combinations(cmap_fname_list, 2)] + available_tracks += cmap_combinations - available_tracks.append('{}{}'.format(session[DatasetReference.SEQUENCE.value.encode()], - cache_utils.MetadataTags.HYDROPHOBICITY)) + for dataset in AdditionalDatasetReference: + if dataset.value.encode() in session.keys() and session[dataset.value.encode()]: + available_tracks += session[dataset.value.encode()] - return available_tracks, available_cmaps + return available_tracks, sorted(available_cmaps) def get_user_selection(cmap_selection, available_cmaps, track_selection, available_tracks): if len(cmap_selection) == 0: - cmap_selection = ['---'] * 2 + cmap_selection = ['--- Empty ---'] * 2 else: - cmap_selection = [fname if fname in available_cmaps else '---' for fname in cmap_selection] + cmap_selection = [fname if fname in available_cmaps else '--- Empty ---' for fname in cmap_selection] if len(track_selection) == 0: - track_selection = ['---'] * 9 + track_selection = ['--- Empty ---'] * 9 else: - track_selection = [track if track in available_tracks else '---' for track in track_selection] + track_selection = [track if track in available_tracks else '--- Empty ---' for track in track_selection] return track_selection, cmap_selection @@ -259,9 +265,9 @@ def get_default_layout(session): tracks.append(session[dataset.value][0]) if not any(tracks): - return ['---'] * 9, (cmap_fname, cmap_fname), selected_palettes + return ['--- Empty ---'] * 9, (cmap_fname, cmap_fname), selected_palettes else: - missing_tracks = ['---' for missing in range(0, 5 - len(tracks))] + missing_tracks = ['--- Empty ---' for missing in range(0, 5 - len(tracks))] tracks += missing_tracks return tracks[1:][::-1] + tracks, (cmap_fname, cmap_fname), selected_palettes diff --git a/utils/tracks_utils.py b/utils/tracks_utils.py index 0b76b2f..2171798 100644 --- a/utils/tracks_utils.py +++ b/utils/tracks_utils.py @@ -14,11 +14,11 @@ def calculate_density(cmap, seq_length, factor): def retrieve_dataset_prediction(session_id, session, fname, display_settings, cache): - if cache_utils.MetadataTags.HYDROPHOBICITY in fname: + if cache_utils.MetadataTags.HYDROPHOBICITY.value in fname: return DatasetReference.HYDROPHOBICITY.value, session[DatasetReference.HYDROPHOBICITY.value.encode()] - if cache_utils.MetadataTags.DENSITY in fname: - fname = fname[:-len(cache_utils.MetadataTags.DENSITY)] + if cache_utils.MetadataTags.DENSITY.value in fname: + fname = fname[:-len(cache_utils.MetadataTags.DENSITY.value)] cachekey = cache_utils.CacheKeys.CMAP_DENSITY.value.format(fname, display_settings.factor).encode() if cachekey in session.keys(): density = session[cachekey] From 8419d5db6b8621bbbfbcd976402ab932b5a2e623 Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Wed, 21 Apr 2021 18:25:23 +0100 Subject: [PATCH 07/37] plot contact diff track --- components/__init__.py | 1 + components/cards.py | 79 ++++++++----------------------- loaders/__init__.py | 15 +++++- parsers/__init__.py | 15 ++++++ utils/__init__.py | 18 +++++++ utils/cache_utils.py | 13 +++--- utils/cmap_utils.py | 13 ++++-- utils/color_palettes.py | 80 +++++++++++++++++++++++++++++++ utils/data_utils.py | 10 ++++ utils/plot_utils.py | 83 +++++++++++++++++++++++++++------ utils/tests/test_cache_utils.py | 6 +-- utils/tracks_utils.py | 77 ++++++++++++++++++++++++------ 12 files changed, 309 insertions(+), 101 deletions(-) diff --git a/components/__init__.py b/components/__init__.py index ef69f27..b4c52cf 100644 --- a/components/__init__.py +++ b/components/__init__.py @@ -11,6 +11,7 @@ class UserReadableTrackNames(Enum): heatmap = 'Heatmap' hydrophobicity = 'Hydrophobicity' density = 'Contact density' + diff = 'Contact diff' class EmailIssueReference(Enum): diff --git a/components/cards.py b/components/cards.py index d91e136..1df39d8 100644 --- a/components/cards.py +++ b/components/cards.py @@ -245,7 +245,6 @@ def DisplayControlCard(available_tracks=None, selected_tracks=None, selected_cma ) elif selected_tracks is not None and len(selected_tracks) >= 9 \ and selected_cmaps is not None and len(selected_cmaps) >= 2: - tracks = get_track_options(available_tracks) return html.Div([ components.DisplayControlHeader(), html.Br(), @@ -288,23 +287,23 @@ def DisplayControlCard(available_tracks=None, selected_tracks=None, selected_cma html.H5("Active tracks", className="card-text", style={'text-align': "center"}), html.Hr(), html.Br(), - dbc.Card(components.TrackLayoutSelector('-4', tracks, selected_tracks[0]), outline=False), + dbc.Card(components.TrackLayoutSelector('-4', available_tracks, selected_tracks[0]), outline=False), html.Br(), - dbc.Card(components.TrackLayoutSelector('-3', tracks, selected_tracks[1]), outline=False), + dbc.Card(components.TrackLayoutSelector('-3', available_tracks, selected_tracks[1]), outline=False), html.Br(), - dbc.Card(components.TrackLayoutSelector('-2', tracks, selected_tracks[2]), outline=False), + dbc.Card(components.TrackLayoutSelector('-2', available_tracks, selected_tracks[2]), outline=False), html.Br(), - dbc.Card(components.TrackLayoutSelector('-1', tracks, selected_tracks[3]), outline=False), + dbc.Card(components.TrackLayoutSelector('-1', available_tracks, selected_tracks[3]), outline=False), html.Br(), - dbc.Card(components.TrackLayoutSelector('0', tracks, selected_tracks[4]), outline=False), + dbc.Card(components.TrackLayoutSelector('0', available_tracks, selected_tracks[4]), outline=False), html.Br(), - dbc.Card(components.TrackLayoutSelector('+1', tracks, selected_tracks[5]), outline=False), + dbc.Card(components.TrackLayoutSelector('+1', available_tracks, selected_tracks[5]), outline=False), html.Br(), - dbc.Card(components.TrackLayoutSelector('+2', tracks, selected_tracks[6]), outline=False), + dbc.Card(components.TrackLayoutSelector('+2', available_tracks, selected_tracks[6]), outline=False), html.Br(), - dbc.Card(components.TrackLayoutSelector('+3', tracks, selected_tracks[7]), outline=False), + dbc.Card(components.TrackLayoutSelector('+3', available_tracks, selected_tracks[7]), outline=False), html.Br(), - dbc.Card(components.TrackLayoutSelector('+4', tracks, selected_tracks[8]), outline=False), + dbc.Card(components.TrackLayoutSelector('+4', available_tracks, selected_tracks[8]), outline=False), html.Br(), html.Br(), html.H5("Colour palettes", className="card-text", style={'text-align': "center"}), @@ -312,21 +311,23 @@ def DisplayControlCard(available_tracks=None, selected_tracks=None, selected_cma html.Br(), ColorPaletteSelectionCard('density', selected_palettes[0]), html.Br(), - ColorPaletteSelectionCard('custom', selected_palettes[1]), + ColorPaletteSelectionCard('diff', selected_palettes[1]), html.Br(), - ColorPaletteSelectionCard('heatmap', selected_palettes[2]), + ColorPaletteSelectionCard('custom', selected_palettes[2]), html.Br(), - ColorPaletteSelectionCard('hydrophobicity', selected_palettes[3]), + ColorPaletteSelectionCard('heatmap', selected_palettes[3]), html.Br(), - ColorPaletteSelectionCard('membranetopology', selected_palettes[4]), + ColorPaletteSelectionCard('hydrophobicity', selected_palettes[4]), html.Br(), - ColorPaletteSelectionCard('msa', selected_palettes[5]), + ColorPaletteSelectionCard('membranetopology', selected_palettes[5]), html.Br(), - ColorPaletteSelectionCard('conservation', selected_palettes[6]), + ColorPaletteSelectionCard('msa', selected_palettes[6]), html.Br(), - ColorPaletteSelectionCard('disorder', selected_palettes[7]), + ColorPaletteSelectionCard('conservation', selected_palettes[7]), html.Br(), - ColorPaletteSelectionCard('secondarystructure', selected_palettes[8]), + ColorPaletteSelectionCard('disorder', selected_palettes[8]), + html.Br(), + ColorPaletteSelectionCard('secondarystructure', selected_palettes[9]), html.Br(), ]) ] @@ -336,48 +337,6 @@ def DisplayControlCard(available_tracks=None, selected_tracks=None, selected_cma else: raise ValueError('This should not occur! Please report.') - -def get_track_options(available_tracks): - track_iterator = iter(available_tracks) - - fname = next(track_iterator) - track_options = [{'label': '--- Empty ---', 'value': 'Empty_1'}, - {'label': '--- Seq. Hydrophobicity ---', 'value': 'Hydrophobicity_Header', 'disabled': True}, - {'label': fname, 'value': fname}, - {'label': '--- Contact Density ---', 'value': 'Density_Header', 'disabled': True}] - - fname = next(track_iterator, None) - cmap_density = [] - while fname and cache_utils.MetadataTags.DENSITY.value in fname: - cmap_density.append({'label': fname, 'value': fname}) - fname = next(track_iterator, None) - if not cmap_density: - track_options.append({'label': '--- Empty ---', 'value': 'Empty_2'}) - else: - track_options += sorted(cmap_density, key=lambda k: k['label']) - - track_options.append({'label': '--- Contact Diff ---', 'value': 'Diff_Header', 'disabled': True}) - cmap_diff = [] - while fname and cache_utils.MetadataTags.DIFF.value in fname: - cmap_diff.append({'label': fname, 'value': fname}) - fname = next(track_iterator, None) - if not cmap_diff: - track_options.append({'label': '--- Empty ---', 'value': 'Empty_3'}) - else: - track_options += sorted(cmap_diff, key=lambda k: k['label']) - - track_options.append({'label': '--- Other Tracks ---', 'value': 'AdditionalTracks_Header', 'disabled': True}) - other_tracks = [] - while fname: - other_tracks.append({'label': fname, 'value': fname}) - fname = next(track_iterator, None) - if not other_tracks: - track_options.append({'label': '--- Empty ---', 'value': 'Empty_4'}) - else: - track_options += sorted(other_tracks, key=lambda k: k['label']) - return track_options - - def ColorPaletteSelectionCard(dataset, selected_palette): available_palettes = [] for palette in color_palettes.DatasetColorPalettes.__getattr__(dataset).value: diff --git a/loaders/__init__.py b/loaders/__init__.py index a6075a9..7d56d76 100644 --- a/loaders/__init__.py +++ b/loaders/__init__.py @@ -1,6 +1,5 @@ from enum import Enum import base64 -from parsers import HydrophobicityStates class DatasetReference(Enum): @@ -8,6 +7,7 @@ class DatasetReference(Enum): HYDROPHOBICITY = 'hydrophobicity' CONTACT_MAP = 'contact' CONTACT_DENSITY = 'density' + CONTACT_DIFF = 'diff' MEMBRANE_TOPOLOGY = 'membranetopology' SECONDARY_STRUCTURE = 'secondarystructure' CONSERVATION = 'conservation' @@ -130,6 +130,19 @@ def SequenceLoader(*args, **kwargs): 9: 'CONTACT_DENSITY_9', 10: 'CONTACT_DENSITY_10', }, + DatasetReference.CONTACT_DIFF.value:{ + 0: 'CONTACT_DIFF_0', + 1: 'CONTACT_DIFF_1', + 2: 'CONTACT_DIFF_2', + 3: 'CONTACT_DIFF_3', + 4: 'CONTACT_DIFF_4', + 5: 'CONTACT_DIFF_5', + 6: 'CONTACT_DIFF_6', + 7: 'CONTACT_DIFF_7', + 8: 'CONTACT_DIFF_8', + 9: 'CONTACT_DIFF_9', + 10: 'CONTACT_DIFF_10', + }, DatasetReference.MSA.value: { 0: 'MSA_COVERAGE_0', 1: 'MSA_COVERAGE_1', diff --git a/parsers/__init__.py b/parsers/__init__.py index c16e1e0..d78ed98 100644 --- a/parsers/__init__.py +++ b/parsers/__init__.py @@ -257,6 +257,20 @@ class DensityStates(Enum): CONTACT_DENSITY_10 = 10 +class DiffStates(Enum): + CONTACT_DIFF_0 = 0 + CONTACT_DIFF_1 = 1 + CONTACT_DIFF_2 = 2 + CONTACT_DIFF_3 = 3 + CONTACT_DIFF_4 = 4 + CONTACT_DIFF_5 = 5 + CONTACT_DIFF_6 = 6 + CONTACT_DIFF_7 = 7 + CONTACT_DIFF_8 = 8 + CONTACT_DIFF_9 = 9 + CONTACT_DIFF_10 = 10 + + class MsaStates(Enum): MSA_COVERAGE_0 = 0 MSA_COVERAGE_1 = 1 @@ -279,4 +293,5 @@ class DatasetStates(Enum): custom = CustomStates hydrophobicity = HydrophobicityStates density = DensityStates + diff = DiffStates msa = MsaStates diff --git a/utils/__init__.py b/utils/__init__.py index 8449257..c9623f6 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -170,6 +170,24 @@ def get_active_sessions(*args, **kwargs): return get_active_sessions(*args, **kwargs) +def lookup_data(*args, **kwargs): + from utils.data_utils import lookup_data + + return lookup_data(*args, **kwargs) + + +def create_cmap_sets(*args, **kwargs): + from utils.cmap_utils import create_cmap_sets + + return create_cmap_sets(*args, **kwargs) + + +def slice_predicted_reference_cmaps(*args, **kwargs): + from utils.cmap_utils import slice_predicted_reference_cmaps + + return slice_predicted_reference_cmaps(*args, **kwargs) + + def load_figure_json(*args, **kwargs): from utils.plot_utils import load_figure_json diff --git a/utils/cache_utils.py b/utils/cache_utils.py index 8147bf9..62ce7b1 100644 --- a/utils/cache_utils.py +++ b/utils/cache_utils.py @@ -13,6 +13,7 @@ class CacheKeys(Enum): DISPLAY_CONTROL_JSON = 'display_control_json' CONTACT_MAP = loaders.DatasetReference.CONTACT_MAP.value CONTACT_DENSITY = loaders.DatasetReference.CONTACT_DENSITY.value + CONTACT_DIFF = loaders.DatasetReference.CONTACT_DIFF.value CUSTOM = loaders.DatasetReference.CUSTOM.value SEQUENCE = loaders.DatasetReference.SEQUENCE.value SEQUENCE_HYDROPHOBICITY = loaders.DatasetReference.HYDROPHOBICITY.value @@ -21,7 +22,7 @@ class CacheKeys(Enum): CONSERVATION = loaders.DatasetReference.CONSERVATION.value DISORDER = loaders.DatasetReference.DISORDER.value CMAP_DENSITY = '{}_CONPLOT-INTERNAL-USE-ONLY-METADATA-DENSITY-TAG_{}' - CMAP_ERROR = '{}_{}_CONPLOT-INTERNAL-USE-ONLY-METADATA-ERROR-TAG_{}' + CMAP_DIFF = '{}_{}_CONPLOT-INTERNAL-USE-ONLY-METADATA-DIFF-TAG_{}' PROTECETED_TAG = 'CONPLOT-INTERNAL-USE-ONLY-METADATA' @@ -34,14 +35,14 @@ class MetadataTags(Enum): TAG = 'CONPLOT-INTERNAL-USE-ONLY-METADATA' -def retrieve_density(session_id, density_cachekey, cache): - density = cache.hget(session_id, density_cachekey) +def retrieve_data(session_id, cachekey, cache): + density = cache.hget(session_id, cachekey) return decompress_data(density) -def store_density(session_id, density_cachekey, density, cache): - cache.hset(session_id, density_cachekey, compress_data(density)) - store_fname(cache, session_id, density_cachekey.decode(), CacheKeys.CONTACT_DENSITY.value) +def store_data(session_id, cachekey, data, dataset, cache): + cache.hset(session_id, cachekey, compress_data(data)) + store_fname(cache, session_id, cachekey.decode(), dataset) def remove_all_density(session_id, cache): diff --git a/utils/cmap_utils.py b/utils/cmap_utils.py index 02ea7a8..c607581 100644 --- a/utils/cmap_utils.py +++ b/utils/cmap_utils.py @@ -51,20 +51,25 @@ def create_cmap(cmap, idx, display_settings, verbose_labels=None): return res1_list, res2_list, hover -def create_cmap_sets(reference_cmap, predicted_cmap, display_settings): +def slice_predicted_reference_cmaps(predicted_cmap, reference_cmap, display_settings): if display_settings.factor != 0: predicted_cmap = predicted_cmap[:int(round(display_settings.seq_length / display_settings.factor, 0))] if reference_cmap[-1] == 'PDB': - del reference_cmap[-1] + reference_cmap = reference_cmap[:-1] reference_cmap = [contact for contact in reference_cmap if contact[2] > 0] elif reference_cmap[-1] == 'DISTO': - del reference_cmap[-1] + reference_cmap = reference_cmap[:-1] reference_cmap = reference_cmap[:int(round(display_settings.seq_length / display_settings.factor, 0))] else: reference_cmap = reference_cmap[:int(round(display_settings.seq_length / display_settings.factor, 0))] elif reference_cmap[-1] == 'PDB' or reference_cmap[-1] == 'DISTO': - del reference_cmap[-1] + reference_cmap = reference_cmap[:-1] + + return reference_cmap, predicted_cmap + +def create_cmap_sets(reference_cmap, predicted_cmap, display_settings): + reference_cmap, predicted_cmap = slice_predicted_reference_cmaps(predicted_cmap, reference_cmap, display_settings) predicted_set = {(x[0], x[1]): x[2] for x in predicted_cmap} reference_set = {(x[0], x[1]): x[2] for x in reference_cmap} diff --git a/utils/color_palettes.py b/utils/color_palettes.py index 899d7dd..ca211fe 100644 --- a/utils/color_palettes.py +++ b/utils/color_palettes.py @@ -177,6 +177,20 @@ class Density_GreyColorPalette(Enum): CONTACT_DENSITY_10 = 'rgb(0,0,0,{})' +class Diff_GreyColorPalette(Enum): + CONTACT_DIFF_0 = 'rgba(255,255,255,{})' + CONTACT_DIFF_1 = 'rgba(229,229,229,{})' + CONTACT_DIFF_2 = 'rgba(204,204,204,{})' + CONTACT_DIFF_3 = 'rgba(179,179,179,{})' + CONTACT_DIFF_4 = 'rgba(153,153,153,{})' + CONTACT_DIFF_5 = 'rgba(127,127,127,{})' + CONTACT_DIFF_6 = 'rgba(102,102,102,{})' + CONTACT_DIFF_7 = 'rgba(77,77,77,{})' + CONTACT_DIFF_8 = 'rgba(51,51,51,{})' + CONTACT_DIFF_9 = 'rgba(25,25,25,{})' + CONTACT_DIFF_10 = 'rgb(0,0,0,{})' + + class Coverage_GreyColorPalette(Enum): MSA_COVERAGE_0 = 'rgba(255,255,255,{})' MSA_COVERAGE_1 = 'rgba(229,229,229,{})' @@ -219,6 +233,20 @@ class Density_Viridis(Enum): CONTACT_DENSITY_10 = sequential.Viridis[9] +class Diff_Viridis(Enum): + CONTACT_DIFF_0 = sequential.Viridis[0] + CONTACT_DIFF_1 = sequential.Viridis[0] + CONTACT_DIFF_2 = sequential.Viridis[1] + CONTACT_DIFF_3 = sequential.Viridis[2] + CONTACT_DIFF_4 = sequential.Viridis[3] + CONTACT_DIFF_5 = sequential.Viridis[4] + CONTACT_DIFF_6 = sequential.Viridis[5] + CONTACT_DIFF_7 = sequential.Viridis[6] + CONTACT_DIFF_8 = sequential.Viridis[7] + CONTACT_DIFF_9 = sequential.Viridis[8] + CONTACT_DIFF_10 = sequential.Viridis[9] + + class Coverage_Viridis(Enum): MSA_COVERAGE_0 = sequential.Viridis[0] MSA_COVERAGE_1 = sequential.Viridis[0] @@ -261,6 +289,20 @@ class Density_BuRd(Enum): CONTACT_DENSITY_10 = diverging.RdYlBu[1] +class Diff_BuRd(Enum): + CONTACT_DIFF_0 = diverging.RdYlBu[10] + CONTACT_DIFF_1 = diverging.RdYlBu[10] + CONTACT_DIFF_2 = diverging.RdYlBu[9] + CONTACT_DIFF_3 = diverging.RdYlBu[8] + CONTACT_DIFF_4 = diverging.RdYlBu[7] + CONTACT_DIFF_5 = diverging.RdYlBu[6] + CONTACT_DIFF_6 = diverging.RdYlBu[5] + CONTACT_DIFF_7 = diverging.RdYlBu[4] + CONTACT_DIFF_8 = diverging.RdYlBu[3] + CONTACT_DIFF_9 = diverging.RdYlBu[2] + CONTACT_DIFF_10 = diverging.RdYlBu[1] + + class Coverage_BuRd(Enum): MSA_COVERAGE_0 = diverging.RdYlBu[10] MSA_COVERAGE_1 = diverging.RdYlBu[10] @@ -303,6 +345,20 @@ class Density_Inferno(Enum): CONTACT_DENSITY_10 = sequential.Inferno[9] +class Diff_Inferno(Enum): + CONTACT_DIFF_0 = sequential.Inferno[0] + CONTACT_DIFF_1 = sequential.Inferno[0] + CONTACT_DIFF_2 = sequential.Inferno[1] + CONTACT_DIFF_3 = sequential.Inferno[2] + CONTACT_DIFF_4 = sequential.Inferno[3] + CONTACT_DIFF_5 = sequential.Inferno[4] + CONTACT_DIFF_6 = sequential.Inferno[5] + CONTACT_DIFF_7 = sequential.Inferno[6] + CONTACT_DIFF_8 = sequential.Inferno[7] + CONTACT_DIFF_9 = sequential.Inferno[8] + CONTACT_DIFF_10 = sequential.Inferno[9] + + class Coverage_Inferno(Enum): MSA_COVERAGE_0 = sequential.Inferno[0] MSA_COVERAGE_1 = sequential.Inferno[0] @@ -331,6 +387,20 @@ class Heatmap_Hot(Enum): BIN_10 = 'rgb(255.0, 255.0, 255.0)' +class Diff_Hot(Enum): + CONTACT_DIFF_10 = 'rgb(10.607999999999999, 0.0, 0.0)' + CONTACT_DIFF_9 = 'rgb(76.23763084702213, 0.0, 0.0)' + CONTACT_DIFF_8 = 'rgb(144.4924469279252, 0.0, 0.0)' + CONTACT_DIFF_7 = 'rgb(210.12207777494734, 0.0, 0.0)' + CONTACT_DIFF_6 = 'rgb(255.0, 23.37520639028961, 0.0)' + CONTACT_DIFF_5 = 'rgb(255.0, 91.62509548421984, 0.0)' + CONTACT_DIFF_4 = 'rgb(255.0, 157.24998884376814, 0.0)' + CONTACT_DIFF_3 = 'rgb(255.0, 225.49987793769836, 0.0)' + CONTACT_DIFF_2 = 'rgb(255.0, 255.0, 54.18729918729921)' + CONTACT_DIFF_1 = 'rgb(255.0, 255.0, 156.56240156240156)' + CONTACT_DIFF_0 = 'rgb(255.0, 255.0, 255.0)' + + class Density_Hot(Enum): CONTACT_DENSITY_10 = 'rgb(10.607999999999999, 0.0, 0.0)' CONTACT_DENSITY_9 = 'rgb(76.23763084702213, 0.0, 0.0)' @@ -383,6 +453,14 @@ class MsaCoverage_ColorPalettes(Enum): PALETTE_5 = Coverage_Hot +class Diff_ColorPalettes(Enum): + PALETTE_1 = Diff_GreyColorPalette + PALETTE_2 = Diff_Viridis + PALETTE_3 = Diff_BuRd + PALETTE_4 = Diff_Inferno + PALETTE_5 = Diff_Hot + + class Hydrophobicity_BlueGreyColorPalette(Enum): HYDROPATHY_10 = 'rgba(66,138,245,{})' HYDROPATHY_9 = 'rgba(72,137,234,{})' @@ -418,6 +496,7 @@ class HydrophobicityColorPalettes(Enum): class DatasetColorPalettes(Enum): density = Density_ColorPalettes + diff = Diff_ColorPalettes custom = Custom_ColorPalettes heatmap = Heatmap_ColorPalettes hydrophobicity = HydrophobicityColorPalettes @@ -430,6 +509,7 @@ class DatasetColorPalettes(Enum): class PaletteDefaultLayout(Enum): CONTACT_DENSITY = DatasetReference.CONTACT_DENSITY.value.encode() + CONTACT_DIFF = DatasetReference.CONTACT_DIFF.value.encode() CUSTOM = DatasetReference.CUSTOM.value.encode() HEATMAP = b'heatmap' HYDROPHOBICITY = DatasetReference.HYDROPHOBICITY.value.encode() diff --git a/utils/data_utils.py b/utils/data_utils.py index 3bea872..8017e92 100644 --- a/utils/data_utils.py +++ b/utils/data_utils.py @@ -130,3 +130,13 @@ def remove_dataset(trigger, cache, session_id, logger): cache_utils.remove_all_density(session_id, cache) elif dataset == loaders.DatasetReference.CONTACT_MAP.value: cache_utils.remove_density(session_id, cache, fname) + + +def lookup_data(session, session_id, cachekey, cache): + if cachekey in session.keys(): + data = session[cachekey] + elif cache.hexists(session_id, cachekey): + data = cache_utils.retrieve_data(session_id, cachekey, cache) + else: + return None + return data diff --git a/utils/plot_utils.py b/utils/plot_utils.py index 44380c3..bb96354 100644 --- a/utils/plot_utils.py +++ b/utils/plot_utils.py @@ -219,26 +219,82 @@ def process_args(session_id, session, trigger, selected_tracks, cmap_selection, return session, display_settings, verbose_labels, None +def separate_pdb_cmaps(session, cmap_fname_list): + non_pdb_fnames = [] + pdb_fnames = [] + + for fname in cmap_fname_list: + cmap = session[fname.encode()] + if cmap[-1] == 'PDB': + pdb_fnames.append(fname) + else: + non_pdb_fnames.append(fname) + + return pdb_fnames, non_pdb_fnames + + def get_available_data(session): - available_tracks = ['{}{}'.format(session[DatasetReference.SEQUENCE.value.encode()], - cache_utils.MetadataTags.HYDROPHOBICITY.value)] + available_tracks = [{'label': '--- Empty ---', 'value': 'Empty_1'}, + {'label': '--- Seq. Hydrophobicity ---', 'value': 'Hydrophobicity_Header', 'disabled': True}, + {'label': session[DatasetReference.SEQUENCE.value.encode()], + 'value': session[DatasetReference.SEQUENCE.value.encode()]}, + {'label': '--- Contact Density ---', 'value': 'Density_Header', 'disabled': True}] + + available_cmaps, cmap_fname_list, cmap_density = get_cmap_density_tracks(session) + + if not cmap_fname_list: + available_tracks.append({'label': '--- Empty ---', 'value': 'Empty_2'}) + available_tracks.append({'label': '--- Contact Diff ---', 'value': 'Diff_Header', 'disabled': True}) + available_tracks.append({'label': '--- Empty ---', 'value': 'Empty_3'}) + else: + available_tracks += sorted(cmap_density, key=lambda k: k['label']) + available_tracks.append({'label': '--- Contact Diff ---', 'value': 'Diff_Header', 'disabled': True}) + cmap_diff = get_cmap_diff_tracks(session, cmap_fname_list) + if not cmap_diff: + available_tracks.append({'label': '--- Empty ---', 'value': 'Empty_3'}) + else: + available_tracks += sorted(cmap_diff, key=lambda k: k['label']) + + other_tracks = get_other_tracks(session) + if not other_tracks: + available_tracks.append({'label': '--- Empty ---', 'value': 'Empty_4'}) + else: + available_tracks += sorted(other_tracks, key=lambda k: k['label']) + return available_tracks, sorted(available_cmaps) + + +def get_cmap_density_tracks(session): + cmap_density = [] available_cmaps = [] cmap_fname_list = session[DatasetReference.CONTACT_MAP.value.encode()] for cmap_fname in cmap_fname_list: available_cmaps.append(cmap_fname) - available_tracks.append('{}{}'.format(cmap_fname, cache_utils.MetadataTags.DENSITY.value)) - - if len(cmap_fname_list) > 1: - cmap_combinations = ['{} | {}{}'.format(*sorted(x), cache_utils.MetadataTags.DIFF.value) - for x in itertools.combinations(cmap_fname_list, 2)] - available_tracks += cmap_combinations - + cmap_density.append({'label': cmap_fname, 'value': cmap_fname}) + return available_cmaps, cmap_fname_list, cmap_density + + +def get_cmap_diff_tracks(session, cmap_fname_list): + pdb_fnames, non_pdb_fnames = separate_pdb_cmaps(session, cmap_fname_list) + cmap_diff = [] + for combination in itertools.combinations(non_pdb_fnames, 2): + label = '{} | {}'.format(*combination) + cmap_diff.append({'label': label, 'value': label}) + for pdb in pdb_fnames: + for permutation in itertools.permutations(cmap_fname_list, 2): + if pdb in permutation: + label = '{} | {}'.format(*permutation) + cmap_diff.append({'label': label, 'value': label}) + return cmap_diff + + +def get_other_tracks(session): + other_tracks = [{'label': '--- Other Tracks ---', 'value': 'AdditionalTracks_Header', 'disabled': True}] for dataset in AdditionalDatasetReference: if dataset.value.encode() in session.keys() and session[dataset.value.encode()]: - available_tracks += session[dataset.value.encode()] - - return available_tracks, sorted(available_cmaps) + for fname in session[dataset.value.encode()]: + other_tracks.append({'label': fname, 'value': fname}) + return other_tracks def get_user_selection(cmap_selection, available_cmaps, track_selection, available_tracks): @@ -250,7 +306,8 @@ def get_user_selection(cmap_selection, available_cmaps, track_selection, availab if len(track_selection) == 0: track_selection = ['--- Empty ---'] * 9 else: - track_selection = [track if track in available_tracks else '--- Empty ---' for track in track_selection] + available_track_labels = [track['label'] for track in available_tracks] + track_selection = [track if track in available_track_labels else '--- Empty ---' for track in track_selection] return track_selection, cmap_selection diff --git a/utils/tests/test_cache_utils.py b/utils/tests/test_cache_utils.py index 58de1f6..aaefcd1 100644 --- a/utils/tests/test_cache_utils.py +++ b/utils/tests/test_cache_utils.py @@ -83,9 +83,9 @@ def test_9(self): cachekey_2 = cache_utils.CacheKeys.CMAP_DENSITY.value.format('fname_2', '2').encode() density_2 = [5, 6, 7, 8, 9, 0] - cache_utils.store_density(self.session_id, cachekey_1, density_1, self.cache) - cache_utils.store_density(self.session_id, cachekey_2, density_2, self.cache) - output = cache_utils.retrieve_density(self.session_id, cachekey_2, self.cache) + cache_utils.store_data(self.session_id, cachekey_1, density_1, self.cache) + cache_utils.store_data(self.session_id, cachekey_2, density_2, self.cache) + output = cache_utils.retrieve_data(self.session_id, cachekey_2, self.cache) self.assertListEqual(output, density_2) expected_cache = {b'id': cache_utils.compress_data(self.session_id)} cache_utils.remove_all_density(self.session_id, self.cache) diff --git a/utils/tracks_utils.py b/utils/tracks_utils.py index 2171798..eb64a47 100644 --- a/utils/tracks_utils.py +++ b/utils/tracks_utils.py @@ -1,34 +1,83 @@ -from parsers import DatasetStates from loaders import AdditionalDatasetReference, DatasetReference -from utils import create_cmap_trace, color_palettes, cache_utils +import numpy as np +from numba import njit +from parsers import DatasetStates from sklearn.cluster import estimate_bandwidth from sklearn.neighbors import KernelDensity -import numpy as np +from utils import create_cmap_trace, color_palettes, cache_utils, lookup_data, slice_predicted_reference_cmaps -def calculate_density(cmap, seq_length, factor): +@njit() +def calculate_mcc(tp, fp, tn, fn): + denominator = (tp + fp) * (tp + fn) * (tn + fp) * (tn + fn) + denominator = np.sqrt(denominator) + if denominator == 0: + return 1 + numerator = (tp * tn - fp * fn) * 10 + if numerator < 0: + return 10 + mcc = 10 - (numerator / denominator) + return mcc + + +def slice_cmap(cmap, seq_length, factor): if cmap[-1] == 'PDB' or cmap[-1] == 'DISTO': cmap.pop(-1) - contact_list = cmap[:int(round(seq_length / factor, 0))] + return cmap[:int(round(seq_length / factor, 0))] + + +def calculate_density(cmap, seq_length, factor): + contact_list = slice_cmap(cmap, seq_length, factor) return get_contact_density(contact_list, seq_length) +def calculate_diff(cmap_1, cmap_2, display_settings): + size = display_settings.seq_length + cmap_1, cmap_2 = slice_predicted_reference_cmaps(cmap_1, cmap_2, display_settings) + cmap_1_set = {resn: {(c[0], c[1]) for c in cmap_1 if resn in (c[0], c[1])} for resn in range(1, size + 1)} + cmap_2_set = {resn: {(c[0], c[1]) for c in cmap_2 if resn in (c[0], c[1])} for resn in range(1, size + 1)} + diff = [] + + for resn in cmap_1_set.keys(): + tp = len(cmap_1_set[resn] & cmap_2_set[resn]) + fp = len(cmap_2_set[resn] - cmap_1_set[resn]) + fn = len(cmap_1_set[resn] - cmap_2_set[resn]) + tn = size - sum((tp, fp, fn)) + mcc = calculate_mcc(tp, fp, tn, fn) + diff.append(int(round(mcc, 0))) + return diff + + +def get_diff_args(fname, factor): + cmap_1 = fname.split('|')[0].rstrip().lstrip() + cmap_2 = fname.split('|')[1].rstrip().lstrip() + cachekey = cache_utils.CacheKeys.CMAP_DIFF.value.format(cmap_1, cmap_2, factor).encode() + return cmap_1, cmap_2, cachekey + + def retrieve_dataset_prediction(session_id, session, fname, display_settings, cache): - if cache_utils.MetadataTags.HYDROPHOBICITY.value in fname: + if fname == session[DatasetReference.SEQUENCE.value.encode()]: return DatasetReference.HYDROPHOBICITY.value, session[DatasetReference.HYDROPHOBICITY.value.encode()] - if cache_utils.MetadataTags.DENSITY.value in fname: - fname = fname[:-len(cache_utils.MetadataTags.DENSITY.value)] + if fname in session[DatasetReference.CONTACT_MAP.value.encode()]: cachekey = cache_utils.CacheKeys.CMAP_DENSITY.value.format(fname, display_settings.factor).encode() - if cachekey in session.keys(): - density = session[cachekey] - elif cache.hexists(session_id, cachekey): - density = cache_utils.retrieve_density(session_id, cachekey, cache) - else: + density = lookup_data(session, session_id, cachekey, cache) + if not density: density = calculate_density(session[fname.encode()], display_settings.seq_length, display_settings.factor) - cache_utils.store_density(session_id, cachekey, density, cache) + cache_utils.store_data(session_id, cachekey, density, cache_utils.CacheKeys.CONTACT_DENSITY.value, cache) + return DatasetReference.CONTACT_DENSITY.value, density + if cache_utils.MetadataTags.SEPARATOR.value in fname: + cmap_1, cmap_2, cachekey = get_diff_args(fname, display_settings.factor) + diff = lookup_data(session, session_id, cachekey, cache) + if not diff: + cmap_1 = session[cmap_1.encode()] + cmap_2 = session[cmap_2.encode()] + diff = calculate_diff(cmap_1, cmap_2, display_settings) + cache_utils.store_data(session_id, cachekey, diff, cache_utils.CacheKeys.CONTACT_DIFF.value, cache) + return DatasetReference.CONTACT_DIFF.value, diff + for dataset in AdditionalDatasetReference: if dataset.value.encode() in session.keys() and fname in session[dataset.value.encode()]: return dataset.value, session[fname.encode()] From 102e1f083bbdc5c7dabf567a9ab45bff68febb92 Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Thu, 22 Apr 2021 09:41:13 +0100 Subject: [PATCH 08/37] pdb files are never sliced with L factor --- requirements.txt | 3 ++- utils/__init__.py | 6 +++--- utils/cmap_utils.py | 34 ++++++++++++++-------------------- utils/plot_utils.py | 15 +++++---------- utils/tracks_utils.py | 12 ++++-------- 5 files changed, 28 insertions(+), 42 deletions(-) diff --git a/requirements.txt b/requirements.txt index a633f1b..355d3bd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,4 +21,5 @@ keyring~=22.0.1 keyrings.cryptfile~=1.3.6 numpy~=1.19.4 fast-enum~=1.3.0 -scikit-learn~=0.24.1 \ No newline at end of file +scikit-learn~=0.24.1 +numba~=0.53.1 \ No newline at end of file diff --git a/utils/__init__.py b/utils/__init__.py index c9623f6..dba1ce1 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -182,10 +182,10 @@ def create_cmap_sets(*args, **kwargs): return create_cmap_sets(*args, **kwargs) -def slice_predicted_reference_cmaps(*args, **kwargs): - from utils.cmap_utils import slice_predicted_reference_cmaps +def slice_cmap(*args, **kwargs): + from utils.cmap_utils import slice_cmap - return slice_predicted_reference_cmaps(*args, **kwargs) + return slice_cmap(*args, **kwargs) def load_figure_json(*args, **kwargs): diff --git a/utils/cmap_utils.py b/utils/cmap_utils.py index c607581..42bb413 100644 --- a/utils/cmap_utils.py +++ b/utils/cmap_utils.py @@ -18,11 +18,7 @@ def create_cmap_trace(x, y, symbol, marker_size, color, hovertext=None): def create_cmap(cmap, idx, display_settings, verbose_labels=None): - if cmap[-1] == 'PDB' or cmap[-1] == 'DISTO': - del cmap[-1] - - if display_settings.factor != 0: - cmap = cmap[:int(round(display_settings.seq_length / display_settings.factor, 0))] + cmap = slice_cmap(cmap, display_settings.seq_length, display_settings.factor) if idx == 1: idx_x = 0 @@ -51,25 +47,23 @@ def create_cmap(cmap, idx, display_settings, verbose_labels=None): return res1_list, res2_list, hover -def slice_predicted_reference_cmaps(predicted_cmap, reference_cmap, display_settings): - if display_settings.factor != 0: - predicted_cmap = predicted_cmap[:int(round(display_settings.seq_length / display_settings.factor, 0))] - if reference_cmap[-1] == 'PDB': - reference_cmap = reference_cmap[:-1] - reference_cmap = [contact for contact in reference_cmap if contact[2] > 0] - elif reference_cmap[-1] == 'DISTO': - reference_cmap = reference_cmap[:-1] - reference_cmap = reference_cmap[:int(round(display_settings.seq_length / display_settings.factor, 0))] - else: - reference_cmap = reference_cmap[:int(round(display_settings.seq_length / display_settings.factor, 0))] - elif reference_cmap[-1] == 'PDB' or reference_cmap[-1] == 'DISTO': - reference_cmap = reference_cmap[:-1] +def slice_cmap(cmap, seq_length, factor): + if cmap[-1] == 'PDB': + cmap = cmap[:-1] + cmap = [contact for contact in cmap if contact[2] > 0] + return cmap + elif cmap[-1] == 'DISTO': + cmap = cmap[:-1] + + if factor != 0: + cmap = cmap[:int(round(seq_length / factor, 0))] - return reference_cmap, predicted_cmap + return cmap def create_cmap_sets(reference_cmap, predicted_cmap, display_settings): - reference_cmap, predicted_cmap = slice_predicted_reference_cmaps(predicted_cmap, reference_cmap, display_settings) + reference_cmap = slice_cmap(reference_cmap, display_settings.seq_length, display_settings.factor) + predicted_cmap = slice_cmap(predicted_cmap, display_settings.seq_length, display_settings.factor) predicted_set = {(x[0], x[1]): x[2] for x in predicted_cmap} reference_set = {(x[0], x[1]): x[2] for x in reference_cmap} diff --git a/utils/plot_utils.py b/utils/plot_utils.py index bb96354..9c80e65 100644 --- a/utils/plot_utils.py +++ b/utils/plot_utils.py @@ -249,12 +249,13 @@ def get_available_data(session): else: available_tracks += sorted(cmap_density, key=lambda k: k['label']) available_tracks.append({'label': '--- Contact Diff ---', 'value': 'Diff_Header', 'disabled': True}) - cmap_diff = get_cmap_diff_tracks(session, cmap_fname_list) + cmap_diff = get_cmap_diff_tracks(cmap_fname_list) if not cmap_diff: available_tracks.append({'label': '--- Empty ---', 'value': 'Empty_3'}) else: available_tracks += sorted(cmap_diff, key=lambda k: k['label']) + available_tracks.append({'label': '--- Other Tracks ---', 'value': 'AdditionalTracks_Header', 'disabled': True}) other_tracks = get_other_tracks(session) if not other_tracks: available_tracks.append({'label': '--- Empty ---', 'value': 'Empty_4'}) @@ -274,22 +275,16 @@ def get_cmap_density_tracks(session): return available_cmaps, cmap_fname_list, cmap_density -def get_cmap_diff_tracks(session, cmap_fname_list): - pdb_fnames, non_pdb_fnames = separate_pdb_cmaps(session, cmap_fname_list) +def get_cmap_diff_tracks(cmap_fname_list): cmap_diff = [] - for combination in itertools.combinations(non_pdb_fnames, 2): + for combination in itertools.combinations(cmap_fname_list, 2): label = '{} | {}'.format(*combination) cmap_diff.append({'label': label, 'value': label}) - for pdb in pdb_fnames: - for permutation in itertools.permutations(cmap_fname_list, 2): - if pdb in permutation: - label = '{} | {}'.format(*permutation) - cmap_diff.append({'label': label, 'value': label}) return cmap_diff def get_other_tracks(session): - other_tracks = [{'label': '--- Other Tracks ---', 'value': 'AdditionalTracks_Header', 'disabled': True}] + other_tracks = [] for dataset in AdditionalDatasetReference: if dataset.value.encode() in session.keys() and session[dataset.value.encode()]: for fname in session[dataset.value.encode()]: diff --git a/utils/tracks_utils.py b/utils/tracks_utils.py index eb64a47..5a77077 100644 --- a/utils/tracks_utils.py +++ b/utils/tracks_utils.py @@ -4,7 +4,7 @@ from parsers import DatasetStates from sklearn.cluster import estimate_bandwidth from sklearn.neighbors import KernelDensity -from utils import create_cmap_trace, color_palettes, cache_utils, lookup_data, slice_predicted_reference_cmaps +from utils import create_cmap_trace, color_palettes, cache_utils, lookup_data, slice_cmap @njit() @@ -20,12 +20,6 @@ def calculate_mcc(tp, fp, tn, fn): return mcc -def slice_cmap(cmap, seq_length, factor): - if cmap[-1] == 'PDB' or cmap[-1] == 'DISTO': - cmap.pop(-1) - return cmap[:int(round(seq_length / factor, 0))] - - def calculate_density(cmap, seq_length, factor): contact_list = slice_cmap(cmap, seq_length, factor) return get_contact_density(contact_list, seq_length) @@ -33,7 +27,8 @@ def calculate_density(cmap, seq_length, factor): def calculate_diff(cmap_1, cmap_2, display_settings): size = display_settings.seq_length - cmap_1, cmap_2 = slice_predicted_reference_cmaps(cmap_1, cmap_2, display_settings) + cmap_1 = slice_cmap(cmap_1, display_settings.seq_length, display_settings.factor) + cmap_2 = slice_cmap(cmap_2, display_settings.seq_length, display_settings.factor) cmap_1_set = {resn: {(c[0], c[1]) for c in cmap_1 if resn in (c[0], c[1])} for resn in range(1, size + 1)} cmap_2_set = {resn: {(c[0], c[1]) for c in cmap_2 if resn in (c[0], c[1])} for resn in range(1, size + 1)} diff = [] @@ -59,6 +54,7 @@ def retrieve_dataset_prediction(session_id, session, fname, display_settings, ca if fname == session[DatasetReference.SEQUENCE.value.encode()]: return DatasetReference.HYDROPHOBICITY.value, session[DatasetReference.HYDROPHOBICITY.value.encode()] + # TODO: If it is a PDB lookup data should not care about the L factor since it will always be the same if fname in session[DatasetReference.CONTACT_MAP.value.encode()]: cachekey = cache_utils.CacheKeys.CMAP_DENSITY.value.format(fname, display_settings.factor).encode() density = lookup_data(session, session_id, cachekey, cache) From 7e970e7b931e118a9cbbe334368e7a32e67834ef Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Thu, 22 Apr 2021 20:14:11 +0100 Subject: [PATCH 09/37] use amise to stimate bw for kde --- requirements.txt | 3 +- utils/cache_utils.py | 1 + utils/math_utils.py | 106 ++++++++++++++++++++++++++++++++++++++++++ utils/tracks_utils.py | 33 ++----------- 4 files changed, 112 insertions(+), 31 deletions(-) create mode 100644 utils/math_utils.py diff --git a/requirements.txt b/requirements.txt index 355d3bd..9295cd6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,4 +22,5 @@ keyrings.cryptfile~=1.3.6 numpy~=1.19.4 fast-enum~=1.3.0 scikit-learn~=0.24.1 -numba~=0.53.1 \ No newline at end of file +numba~=0.53.1 +conkit~=0.12.0 \ No newline at end of file diff --git a/utils/cache_utils.py b/utils/cache_utils.py index 62ce7b1..18c2e8e 100644 --- a/utils/cache_utils.py +++ b/utils/cache_utils.py @@ -45,6 +45,7 @@ def store_data(session_id, cachekey, data, dataset, cache): store_fname(cache, session_id, cachekey.decode(), dataset) +# TODO: Need to implement the same for contact diff def remove_all_density(session_id, cache): density_list = cache.hget(session_id, CacheKeys.CONTACT_DENSITY.value) if not density_list: diff --git a/utils/math_utils.py b/utils/math_utils.py new file mode 100644 index 0000000..4004a03 --- /dev/null +++ b/utils/math_utils.py @@ -0,0 +1,106 @@ +import math +from numba import njit +import numpy as np +from sklearn.neighbors import KernelDensity + +"""Credits to Felix Simkovic; code taken from GitHub rigdenlab/conkit""" +"""Credits to Felix Simkovic; code taken from GitHub rigdenlab/conkit""" + +SQRT_PI = math.sqrt(math.pi) +SQRT_2PI = math.sqrt(2.0 * math.pi) + + +@njit(fastmath=True) +def calculate_mcc(tp, fp, tn, fn): + denominator = (tp + fp) * (tp + fn) * (tn + fp) * (tn + fn) + denominator = np.sqrt(denominator) + if denominator == 0: + return 1 + numerator = (tp * tn - fp * fn) * 10 + if numerator < 0: + return 10 + mcc = 10 - (numerator / denominator) + return mcc + + +@njit(fastmath=True) +def calculate_bowman_bw(data): + M, N = data.shape + bw = math.sqrt((data ** 2).sum() / M - (data.sum() / M) ** 2) * ((((N + 2) * M) / 4.0) ** (-1.0 / (N + 4))) + return bw + + +@njit(fastmath=True) +def calculate_amise_bw(data, n_iterations=25, eps=0.001): + data = np.asarray(data) + x0 = calculate_bowman_bw(data) + y0 = optimize_bandwidth(data, x0) + x = 0.8 * x0 + y = optimize_bandwidth(data, x) + for i in range(n_iterations): + x = x - (y * (x0 - x) / (y0 - y)) + y = optimize_bandwidth(data, x) + if abs(y) < (eps * y0): + break + return x + + +@njit(cache=True, fastmath=True) +def optimize_bandwidth(A, v): + alpha = 1.0 / (2.0 * SQRT_PI) + sigma = 1.0 + integral = get_stiffness_integral(A, v, 0.0001) + result = v - ((A.shape[0] * integral * sigma ** 4) / alpha) ** (-1.0 / (A.shape[1] + 4)) + return result + + +@njit(fastmath=True) +def get_stiffness_integral(A, v, eps): + min_ = A.min() - v * 3 + max_ = A.max() + v * 3 + dx = 1.0 * (max_ - min_) + maxn = dx / math.sqrt(eps) + if maxn > 2048: + maxn = 2048 + y1 = get_gauss_curvature(A, min_, v) + y2 = get_gauss_curvature(A, max_, v) + yy = 0.5 * dx * (y1 * y1 + y2 * y2) + n = 2 + + while n <= maxn: + dx = dx / 2.0 + y = 0.0 + for i in range(1, n, 2): + y3 = get_gauss_curvature(A, min_ + i * dx, v) + y = y + (y3 * y3) + yy = 0.5 * yy + y * dx + if n > 8 and math.fabs(y * dx - 0.5 * yy) < eps * yy: + break + n = n * 2 + + return yy + + +@njit(cache=True, fastmath=True) +def get_gauss_curvature(A, x, w): + w_sq = w * w + w_sqrt_2pi = w * SQRT_2PI + curvature = 0.0 + shape_1 = A.shape[1] + for i in range(A.shape[0]): + for j in range(shape_1): + z = (x - A[i, j]) / w + z = z * z + curvature = curvature + (shape_1 * (z - 1.0) * (math.exp(-0.5 * z) / w_sqrt_2pi) / w_sq) + return curvature / A.shape[0] + + +def get_contact_density(contact_list, seq_length): + x = np.array([i for c in contact_list for i in np.arange(c[1], c[0] + 1)], dtype=np.int64)[:, np.newaxis] + bw = calculate_amise_bw(x) + kde = KernelDensity(bandwidth=bw).fit(x) + x_fit = np.arange(1, seq_length + 1)[:, np.newaxis] + density = np.exp(kde.score_samples(x_fit)).tolist() + density_max = max(density) + density = [int(round(float(i) / density_max, 1) * 10) for i in density] + return density diff --git a/utils/tracks_utils.py b/utils/tracks_utils.py index 5a77077..31ab2de 100644 --- a/utils/tracks_utils.py +++ b/utils/tracks_utils.py @@ -1,28 +1,11 @@ from loaders import AdditionalDatasetReference, DatasetReference -import numpy as np -from numba import njit from parsers import DatasetStates -from sklearn.cluster import estimate_bandwidth -from sklearn.neighbors import KernelDensity -from utils import create_cmap_trace, color_palettes, cache_utils, lookup_data, slice_cmap - - -@njit() -def calculate_mcc(tp, fp, tn, fn): - denominator = (tp + fp) * (tp + fn) * (tn + fp) * (tn + fn) - denominator = np.sqrt(denominator) - if denominator == 0: - return 1 - numerator = (tp * tn - fp * fn) * 10 - if numerator < 0: - return 10 - mcc = 10 - (numerator / denominator) - return mcc +from utils import create_cmap_trace, color_palettes, cache_utils, lookup_data, slice_cmap, math_utils def calculate_density(cmap, seq_length, factor): contact_list = slice_cmap(cmap, seq_length, factor) - return get_contact_density(contact_list, seq_length) + return math_utils.get_contact_density(contact_list, seq_length) def calculate_diff(cmap_1, cmap_2, display_settings): @@ -38,7 +21,7 @@ def calculate_diff(cmap_1, cmap_2, display_settings): fp = len(cmap_2_set[resn] - cmap_1_set[resn]) fn = len(cmap_1_set[resn] - cmap_2_set[resn]) tn = size - sum((tp, fp, fn)) - mcc = calculate_mcc(tp, fp, tn, fn) + mcc = math_utils.calculate_mcc(tp, fp, tn, fn) diff.append(int(round(mcc, 0))) return diff @@ -138,13 +121,3 @@ def get_traces(prediction, dataset, track_idx, track_separation, marker_size, al return traces -def get_contact_density(contact_list, seq_length): - """Credits to Felix Simkovic; code taken from GitHub rigdenlab/conkit/core/contactmap.py""" - x = np.array([i for c in contact_list for i in np.arange(c[1], c[0] + 1)], dtype=np.int64)[:, np.newaxis] - bw = estimate_bandwidth(x) - kde = KernelDensity(bandwidth=bw).fit(x) - x_fit = np.arange(1, seq_length + 1)[:, np.newaxis] - density = np.exp(kde.score_samples(x_fit)).tolist() - density_max = max(density) - density = [int(round(float(i) / density_max, 1) * 10) for i in density] - return density From c59fdf8a3bb9d078954d202c114ec66bc6fd7ac9 Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Thu, 22 Apr 2021 20:15:31 +0100 Subject: [PATCH 10/37] use conkit to calculate amise bw (faster than numba) --- utils/math_utils.py | 83 ++------------------------------------------- 1 file changed, 3 insertions(+), 80 deletions(-) diff --git a/utils/math_utils.py b/utils/math_utils.py index 4004a03..24b43e4 100644 --- a/utils/math_utils.py +++ b/utils/math_utils.py @@ -1,14 +1,8 @@ -import math +from conkit.misc.bandwidth import bandwidth_factory from numba import njit import numpy as np from sklearn.neighbors import KernelDensity -"""Credits to Felix Simkovic; code taken from GitHub rigdenlab/conkit""" -"""Credits to Felix Simkovic; code taken from GitHub rigdenlab/conkit""" - -SQRT_PI = math.sqrt(math.pi) -SQRT_2PI = math.sqrt(2.0 * math.pi) - @njit(fastmath=True) def calculate_mcc(tp, fp, tn, fn): @@ -23,81 +17,10 @@ def calculate_mcc(tp, fp, tn, fn): return mcc -@njit(fastmath=True) -def calculate_bowman_bw(data): - M, N = data.shape - bw = math.sqrt((data ** 2).sum() / M - (data.sum() / M) ** 2) * ((((N + 2) * M) / 4.0) ** (-1.0 / (N + 4))) - return bw - - -@njit(fastmath=True) -def calculate_amise_bw(data, n_iterations=25, eps=0.001): - data = np.asarray(data) - x0 = calculate_bowman_bw(data) - y0 = optimize_bandwidth(data, x0) - x = 0.8 * x0 - y = optimize_bandwidth(data, x) - for i in range(n_iterations): - x = x - (y * (x0 - x) / (y0 - y)) - y = optimize_bandwidth(data, x) - if abs(y) < (eps * y0): - break - return x - - -@njit(cache=True, fastmath=True) -def optimize_bandwidth(A, v): - alpha = 1.0 / (2.0 * SQRT_PI) - sigma = 1.0 - integral = get_stiffness_integral(A, v, 0.0001) - result = v - ((A.shape[0] * integral * sigma ** 4) / alpha) ** (-1.0 / (A.shape[1] + 4)) - return result - - -@njit(fastmath=True) -def get_stiffness_integral(A, v, eps): - min_ = A.min() - v * 3 - max_ = A.max() + v * 3 - dx = 1.0 * (max_ - min_) - maxn = dx / math.sqrt(eps) - if maxn > 2048: - maxn = 2048 - y1 = get_gauss_curvature(A, min_, v) - y2 = get_gauss_curvature(A, max_, v) - yy = 0.5 * dx * (y1 * y1 + y2 * y2) - n = 2 - - while n <= maxn: - dx = dx / 2.0 - y = 0.0 - for i in range(1, n, 2): - y3 = get_gauss_curvature(A, min_ + i * dx, v) - y = y + (y3 * y3) - yy = 0.5 * yy + y * dx - if n > 8 and math.fabs(y * dx - 0.5 * yy) < eps * yy: - break - n = n * 2 - - return yy - - -@njit(cache=True, fastmath=True) -def get_gauss_curvature(A, x, w): - w_sq = w * w - w_sqrt_2pi = w * SQRT_2PI - curvature = 0.0 - shape_1 = A.shape[1] - for i in range(A.shape[0]): - for j in range(shape_1): - z = (x - A[i, j]) / w - z = z * z - curvature = curvature + (shape_1 * (z - 1.0) * (math.exp(-0.5 * z) / w_sqrt_2pi) / w_sq) - return curvature / A.shape[0] - - def get_contact_density(contact_list, seq_length): + """Credits to Felix Simkovic; code taken from GitHub rigdenlab/conkit""" x = np.array([i for c in contact_list for i in np.arange(c[1], c[0] + 1)], dtype=np.int64)[:, np.newaxis] - bw = calculate_amise_bw(x) + bw = bandwidth_factory('amise')(x).bw kde = KernelDensity(bandwidth=bw).fit(x) x_fit = np.arange(1, seq_length + 1)[:, np.newaxis] density = np.exp(kde.score_samples(x_fit)).tolist() From a6ab7ac47dc67767e6c50ce7dd56ac04f70e2eab Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Fri, 23 Apr 2021 09:50:39 +0100 Subject: [PATCH 11/37] fix contact density tests --- utils/math_utils.py | 2 +- utils/tests/test_math_utils.py | 19 +++++++++++++++++++ utils/tests/test_tracks_utils.py | 10 +++++----- 3 files changed, 25 insertions(+), 6 deletions(-) create mode 100644 utils/tests/test_math_utils.py diff --git a/utils/math_utils.py b/utils/math_utils.py index 24b43e4..ae48f12 100644 --- a/utils/math_utils.py +++ b/utils/math_utils.py @@ -4,7 +4,7 @@ from sklearn.neighbors import KernelDensity -@njit(fastmath=True) +@njit() def calculate_mcc(tp, fp, tn, fn): denominator = (tp + fp) * (tp + fn) * (tn + fp) * (tn + fn) denominator = np.sqrt(denominator) diff --git a/utils/tests/test_math_utils.py b/utils/tests/test_math_utils.py new file mode 100644 index 0000000..d887cf2 --- /dev/null +++ b/utils/tests/test_math_utils.py @@ -0,0 +1,19 @@ +import unittest +from utils import math_utils + + +class MathUtilsTestCase(unittest.TestCase): + + def test_1(self): + dummy_cmap = [(52, 50), (53, 51), (145, 143), (142, 140), (150, 148), (53, 50), (147, 145), (141, 139), + (143, 141), (148, 146)] + expected_density = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 7, 10, 10, 7, 4, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 5, 8, 10, 9, 8, 8, 8, 8, 8, 7, 6, 4, 2, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + + density = math_utils.get_contact_density(dummy_cmap, 168) + print(density) + self.assertListEqual(density, expected_density) diff --git a/utils/tests/test_tracks_utils.py b/utils/tests/test_tracks_utils.py index c03f479..64b34d5 100644 --- a/utils/tests/test_tracks_utils.py +++ b/utils/tests/test_tracks_utils.py @@ -2,17 +2,17 @@ from utils import tracks_utils -class SessionUtilsTestCase(unittest.TestCase): +class TrackUtilsTestCase(unittest.TestCase): def test_1(self): dummy_cmap = [(52, 50), (53, 51), (145, 143), (142, 140), (150, 148), (53, 50), (147, 145), (141, 139), (143, 141), (148, 146)] expected_density = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 5, 7, 8, 8, 7, 5, 3, 2, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 7, 10, 10, 7, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 3, 4, 6, 8, 9, 10, 10, 10, 10, 9, 9, 8, 6, 5, 3, 2, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 4, 6, 7, 6, 5, 6, 6, 5, 5, 4, 4, 3, 2, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - density = tracks_utils.get_contact_density(dummy_cmap, 168) + density = tracks_utils.calculate_density(dummy_cmap, 168, 20) self.assertListEqual(density, expected_density) From 18571f0b2dad02ec947cd89d3bdacef6f4c5780b Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Fri, 23 Apr 2021 10:27:59 +0100 Subject: [PATCH 12/37] added more tests --- utils/tests/test_math_utils.py | 15 ++++++++++ utils/tests/test_tracks_utils.py | 47 ++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/utils/tests/test_math_utils.py b/utils/tests/test_math_utils.py index d887cf2..344160e 100644 --- a/utils/tests/test_math_utils.py +++ b/utils/tests/test_math_utils.py @@ -17,3 +17,18 @@ def test_1(self): density = math_utils.get_contact_density(dummy_cmap, 168) print(density) self.assertListEqual(density, expected_density) + + def test_2(self): + expected_output = 3.0210772833723656 + output = math_utils.calculate_mcc(5, 2, 120, 2) + self.assertEqual(output, expected_output) + + def test_3(self): + expected_output = 1 + output = math_utils.calculate_mcc(0, 0, 120, 2) + self.assertEqual(output, expected_output) + + def test_4(self): + expected_output = 10 + output = math_utils.calculate_mcc(12, 1, 0, 2) + self.assertEqual(output, expected_output) diff --git a/utils/tests/test_tracks_utils.py b/utils/tests/test_tracks_utils.py index 64b34d5..faeea31 100644 --- a/utils/tests/test_tracks_utils.py +++ b/utils/tests/test_tracks_utils.py @@ -1,5 +1,8 @@ import unittest from utils import tracks_utils +from collections import namedtuple + +DisplayControlSettings = namedtuple('DisplayControlSettings', ('factor', 'seq_length')) class TrackUtilsTestCase(unittest.TestCase): @@ -16,3 +19,47 @@ def test_1(self): density = tracks_utils.calculate_density(dummy_cmap, 168, 20) self.assertListEqual(density, expected_density) + + def test_2(self): + dummy_cmap_1 = [(52, 50), (53, 51), (145, 143), (142, 140), (150, 148), (53, 50), (147, 145), (141, 139), + (143, 141), (148, 146)] + dummy_cmap_2 = [(52, 50), (53, 51), (145, 143), (142, 140), (150, 148), (53, 50), (147, 145), (141, 139), + (143, 141), (148, 146)] + expected_diff = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1] + dummy_display_settings = DisplayControlSettings(factor=0, seq_length=168) + diff = tracks_utils.calculate_diff(dummy_cmap_1, dummy_cmap_2, dummy_display_settings) + self.assertListEqual(diff, expected_diff) + + def test_3(self): + dummy_cmap_1 = [(52, 50), (53, 51), (145, 143), (142, 140), (150, 148), (53, 50), (147, 145), (141, 139), + (143, 141), (148, 146)] + dummy_cmap_2 = [(150, 148), (53, 50), (147, 145), (141, 139), (143, 141), (148, 146), (120, 12), (25, 35)] + expected_diff = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 3, 1, 3, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1] + dummy_display_settings = DisplayControlSettings(factor=0, seq_length=168) + diff = tracks_utils.calculate_diff(dummy_cmap_1, dummy_cmap_2, dummy_display_settings) + self.assertListEqual(diff, expected_diff) + + def test_4(self): + dummy_cmap_1 = [(52, 50), (53, 51), (145, 143), (142, 140), (150, 148), (53, 50), (147, 145), (141, 139), + (143, 141), (148, 146), (10, 55), (5, 145)] + dummy_cmap_2 = [(150, 148), (53, 50), (147, 145), (141, 139), (143, 141), (148, 146), (120, 12), (25, 35)] + expected_diff = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 3, 1, 10, 1, 3, 1, 0, 3, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1] + dummy_display_settings = DisplayControlSettings(factor=20, seq_length=168) + + diff = tracks_utils.calculate_diff(dummy_cmap_1, dummy_cmap_2, dummy_display_settings) + self.assertListEqual(diff, expected_diff) From 90a2854baf5cf35fc77eab6d33bb0d8e087e5a66 Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Fri, 23 Apr 2021 10:31:07 +0100 Subject: [PATCH 13/37] fixed tests --- utils/tests/test_cache_utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/utils/tests/test_cache_utils.py b/utils/tests/test_cache_utils.py index aaefcd1..1811da2 100644 --- a/utils/tests/test_cache_utils.py +++ b/utils/tests/test_cache_utils.py @@ -83,8 +83,10 @@ def test_9(self): cachekey_2 = cache_utils.CacheKeys.CMAP_DENSITY.value.format('fname_2', '2').encode() density_2 = [5, 6, 7, 8, 9, 0] - cache_utils.store_data(self.session_id, cachekey_1, density_1, self.cache) - cache_utils.store_data(self.session_id, cachekey_2, density_2, self.cache) + cache_utils.store_data(self.session_id, cachekey_1, density_1, + cache_utils.CacheKeys.CONTACT_DENSITY.value, self.cache) + cache_utils.store_data(self.session_id, cachekey_2, density_2, + cache_utils.CacheKeys.CONTACT_DENSITY.value, self.cache) output = cache_utils.retrieve_data(self.session_id, cachekey_2, self.cache) self.assertListEqual(output, density_2) expected_cache = {b'id': cache_utils.compress_data(self.session_id)} From f9777658a27517a0d5a0a99864dc39a67640e2c8 Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Fri, 23 Apr 2021 11:31:24 +0100 Subject: [PATCH 14/37] calculate rmsd --- utils/math_utils.py | 19 +++++++++++++++++-- utils/tests/test_math_utils.py | 8 ++++++++ utils/tracks_utils.py | 1 + 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/utils/math_utils.py b/utils/math_utils.py index ae48f12..f55632b 100644 --- a/utils/math_utils.py +++ b/utils/math_utils.py @@ -1,5 +1,6 @@ from conkit.misc.bandwidth import bandwidth_factory -from numba import njit +import math +from numba import njit, vectorize import numpy as np from sklearn.neighbors import KernelDensity @@ -7,7 +8,7 @@ @njit() def calculate_mcc(tp, fp, tn, fn): denominator = (tp + fp) * (tp + fn) * (tn + fp) * (tn + fn) - denominator = np.sqrt(denominator) + denominator = math.sqrt(denominator) if denominator == 0: return 1 numerator = (tp * tn - fp * fn) * 10 @@ -17,6 +18,20 @@ def calculate_mcc(tp, fp, tn, fn): return mcc +@vectorize('float64(int64, int64)') +def get_difference(expected, observed): + difference = expected - observed + difference_squared = difference ** 2 + return difference_squared + + +@njit() +def calculate_rmsd(expected_array, observed_array): + squared_differences = get_difference(expected_array, observed_array) + rmsd = np.sum(squared_differences) / observed_array.shape[0] + return rmsd + + def get_contact_density(contact_list, seq_length): """Credits to Felix Simkovic; code taken from GitHub rigdenlab/conkit""" x = np.array([i for c in contact_list for i in np.arange(c[1], c[0] + 1)], dtype=np.int64)[:, np.newaxis] diff --git a/utils/tests/test_math_utils.py b/utils/tests/test_math_utils.py index 344160e..90eb1a3 100644 --- a/utils/tests/test_math_utils.py +++ b/utils/tests/test_math_utils.py @@ -1,5 +1,6 @@ import unittest from utils import math_utils +import numpy as np class MathUtilsTestCase(unittest.TestCase): @@ -32,3 +33,10 @@ def test_4(self): expected_output = 10 output = math_utils.calculate_mcc(12, 1, 0, 2) self.assertEqual(output, expected_output) + + def test_5(self): + expected_output = 24.714285714285715 + expected_array = np.array([10, 8, 1, 5, 5, 7, 5, 8, 5, 6, 8, 2, 1, 9]) + observed_array = np.array([1, 8, 10, 1, 5, 3, 7, 5, 1, 3, 8, 9, 0, 1]) + output = math_utils.calculate_rmsd(expected_array, observed_array) + self.assertEqual(output, expected_output) diff --git a/utils/tracks_utils.py b/utils/tracks_utils.py index 31ab2de..2413887 100644 --- a/utils/tracks_utils.py +++ b/utils/tracks_utils.py @@ -10,6 +10,7 @@ def calculate_density(cmap, seq_length, factor): def calculate_diff(cmap_1, cmap_2, display_settings): size = display_settings.seq_length + # TODO Check if cmap_1 AND cmap_2 contain residue distance predicitons. If so, calculate RMSD instead of MCC cmap_1 = slice_cmap(cmap_1, display_settings.seq_length, display_settings.factor) cmap_2 = slice_cmap(cmap_2, display_settings.seq_length, display_settings.factor) cmap_1_set = {resn: {(c[0], c[1]) for c in cmap_1 if resn in (c[0], c[1])} for resn in range(1, size + 1)} From 7f8c53514decbe84ea02b2c4cabb830feff88f74 Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Mon, 26 Apr 2021 17:13:06 +0100 Subject: [PATCH 15/37] append file type at begining of cmap --- parsers/__init__.py | 9 +++++++++ parsers/casprr2parser.py | 2 +- parsers/mappred.py | 2 +- parsers/npzparser.py | 2 +- parsers/pdbparser.py | 4 ++-- parsers/tests/test_casprr2parser.py | 2 +- parsers/tests/test_mappred.py | 2 +- parsers/tests/test_pdbparser.py | 2 +- 8 files changed, 17 insertions(+), 8 deletions(-) diff --git a/parsers/__init__.py b/parsers/__init__.py index d78ed98..75b09d0 100644 --- a/parsers/__init__.py +++ b/parsers/__init__.py @@ -1,4 +1,5 @@ from enum import Enum +from operator import itemgetter def ConsurfParser(*args, **kwargs): @@ -295,3 +296,11 @@ class DatasetStates(Enum): density = DensityStates diff = DiffStates msa = MsaStates + + +def get_unique_distances(elements): + key = itemgetter(0) + unique_contacts = list({key(el): el for el in elements}.values()) + output = ['DISTO'] + output += sorted([(*contact[0], *contact[1:]) for contact in unique_contacts], key=itemgetter(2), reverse=True) + return output diff --git a/parsers/casprr2parser.py b/parsers/casprr2parser.py index d600018..cbb9e2f 100644 --- a/parsers/casprr2parser.py +++ b/parsers/casprr2parser.py @@ -1,4 +1,4 @@ -from utils import get_unique_distances +from parsers import get_unique_distances from utils.exceptions import InvalidFormat diff --git a/parsers/mappred.py b/parsers/mappred.py index 8616471..e6e03ac 100644 --- a/parsers/mappred.py +++ b/parsers/mappred.py @@ -1,4 +1,4 @@ -from utils import get_unique_distances +from parsers import get_unique_distances from utils.exceptions import InvalidFormat diff --git a/parsers/npzparser.py b/parsers/npzparser.py index 6287828..375afe4 100644 --- a/parsers/npzparser.py +++ b/parsers/npzparser.py @@ -2,7 +2,7 @@ import base64 import numpy as np from utils.exceptions import InvalidFormat -from utils import get_unique_distances +from parsers import get_unique_distances def parse_array(array): diff --git a/parsers/pdbparser.py b/parsers/pdbparser.py index 9b82634..3f3602f 100644 --- a/parsers/pdbparser.py +++ b/parsers/pdbparser.py @@ -61,6 +61,6 @@ def PDBParser(input, input_format=None): if not contacts: raise InvalidFormat('Unable to parse contacts') - output = sorted(contacts, key=itemgetter(2), reverse=True) - output.append("PDB") + output = ["PDB"] + output += sorted(contacts, key=itemgetter(2), reverse=True) return output diff --git a/parsers/tests/test_casprr2parser.py b/parsers/tests/test_casprr2parser.py index ed8e08f..d913415 100644 --- a/parsers/tests/test_casprr2parser.py +++ b/parsers/tests/test_casprr2parser.py @@ -35,7 +35,7 @@ def test_1(self): output = CASPRR2Parser(dummy_prediction) - self.assertEqual('DISTO', output.pop(-1)) + self.assertEqual('DISTO', output.pop(0)) self.assertEqual(12, len(output)) self.assertListEqual(expected_res1, [contact[0] for contact in output]) self.assertListEqual(expected_res2, [contact[1] for contact in output]) diff --git a/parsers/tests/test_mappred.py b/parsers/tests/test_mappred.py index f6fa43b..7672893 100644 --- a/parsers/tests/test_mappred.py +++ b/parsers/tests/test_mappred.py @@ -24,7 +24,7 @@ def test_1(self): output = MappredParser(dummy_prediction) - self.assertEqual('DISTO', output.pop(-1)) + self.assertEqual('DISTO', output.pop(0)) self.assertEqual(4, len(output)) self.assertListEqual(expected_res1, [contact[0] for contact in output]) self.assertListEqual(expected_res2, [contact[1] for contact in output]) diff --git a/parsers/tests/test_pdbparser.py b/parsers/tests/test_pdbparser.py index d48a300..782bc50 100644 --- a/parsers/tests/test_pdbparser.py +++ b/parsers/tests/test_pdbparser.py @@ -33,7 +33,7 @@ def test_1(self): output = PDBParser(dummy_prediction) self.assertEqual(7, len(output)) - self.assertEqual('PDB', output.pop(-1)) + self.assertEqual('PDB', output.pop(0)) self.assertListEqual(expected_res1, [contact[0] for contact in output]) self.assertListEqual(expected_res2, [contact[1] for contact in output]) self.assertListEqual(expected_score, [contact[2] for contact in output]) From 2f6fda4070b5d73edcfa5cd57415e37ab63a8af1 Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Mon, 26 Apr 2021 17:16:27 +0100 Subject: [PATCH 16/37] refactor cmap code --- utils/__init__.py | 15 ++++++--------- utils/cmap_utils.py | 16 ++++++++++------ utils/heatmap_utils.py | 14 ++++++++------ utils/plot_utils.py | 4 ++-- 4 files changed, 26 insertions(+), 23 deletions(-) diff --git a/utils/__init__.py b/utils/__init__.py index dba1ce1..7ccd701 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -134,6 +134,12 @@ def retrieve_sequence_fname(*args, **kwargs): return retrieve_sequence_fname(*args, **kwargs) +def contains_distances(*args, **kwargs): + from utils.cmap_utils import contains_distances + + return contains_distances(*args, **kwargs) + + def CacheKeys(*args, **kwargs): from utils.cache_utils import CacheKeys @@ -273,12 +279,3 @@ def get_unique_contacts(elements): output = [(*contact[0], contact[1]) for contact in unique] output = sorted(output, key=itemgetter(2), reverse=True) return output - - -def get_unique_distances(elements): - key = itemgetter(0) - unique_contacts = list({key(el): el for el in elements}.values()) - output = [(*contact[0], *contact[1:]) for contact in unique_contacts] - output = sorted(output, key=itemgetter(2), reverse=True) - output.append('DISTO') - return output diff --git a/utils/cmap_utils.py b/utils/cmap_utils.py index 42bb413..c14304c 100644 --- a/utils/cmap_utils.py +++ b/utils/cmap_utils.py @@ -47,13 +47,17 @@ def create_cmap(cmap, idx, display_settings, verbose_labels=None): return res1_list, res2_list, hover +def contains_distances(cmap): + if len(cmap[-1]) > 3: + return True + return False + + def slice_cmap(cmap, seq_length, factor): - if cmap[-1] == 'PDB': - cmap = cmap[:-1] - cmap = [contact for contact in cmap if contact[2] > 0] - return cmap - elif cmap[-1] == 'DISTO': - cmap = cmap[:-1] + if cmap[0] == 'PDB': + return [contact for contact in cmap[1:] if contact[2] > 0] + elif cmap[0] == 'DISTO': + cmap = cmap[1:] if factor != 0: cmap = cmap[:int(round(seq_length / factor, 0))] diff --git a/utils/heatmap_utils.py b/utils/heatmap_utils.py index 0c2c0dc..93bb9b3 100644 --- a/utils/heatmap_utils.py +++ b/utils/heatmap_utils.py @@ -1,10 +1,12 @@ import plotly.graph_objects as go +import numpy as np from utils import color_palettes, DistanceLabels, HoverTemplates def init_heatmap(seq_length): - heat = [[0 for x in range(seq_length + 1)] for y in range(seq_length + 1)] - hover = [[None for x in range(seq_length + 1)] for y in range(seq_length + 1)] + shape = (seq_length + 1, seq_length + 1) + heat = np.zeros(shape).tolist() + hover = np.full(shape, None).tolist() return heat, hover @@ -41,8 +43,8 @@ def populate_heatmap(cmap, idx, distances, hover, verbose_labels=None): idx_x = 0 idx_y = 1 - if cmap[-1] == 'DISTO' or cmap[-1] == 'PDB': - cmap = cmap[:-1] + if cmap[0] == 'DISTO' or cmap[0] == 'PDB': + cmap = cmap[1:] if verbose_labels is not None: for contact in cmap: @@ -81,8 +83,8 @@ def populate_heatmap(cmap, idx, distances, hover, verbose_labels=None): def populate_superimposed_heatmap(reference_cmap, secondary_cmap, heat, hover, verbose_labels=None): idx_x = 1 idx_y = 0 - reference_ftype = reference_cmap.pop(-1) - secondary_ftype = secondary_cmap.pop(-1) + reference_cmap = reference_cmap[1:] + secondary_cmap = secondary_cmap[1:] predicted_set = {(x[0], x[1]): x[3] for x in secondary_cmap} if verbose_labels is not None: diff --git a/utils/plot_utils.py b/utils/plot_utils.py index 9c80e65..2d61aab 100644 --- a/utils/plot_utils.py +++ b/utils/plot_utils.py @@ -159,7 +159,7 @@ def lookup_input_errors(session_id, session, cmap_selection, superimpose, heatma reference_cmap = session[cmap_selection[0].encode()] predicted_cmap = session[cmap_selection[1].encode()] error = no_update, components.InvalidSuperposeHeatmapModal(), no_update, no_update - if not isinstance(reference_cmap[-1], str) or not isinstance(predicted_cmap[-1], str): + if not isinstance(reference_cmap[0], str) or not isinstance(predicted_cmap[0], str): return None, None, None, error return None @@ -225,7 +225,7 @@ def separate_pdb_cmaps(session, cmap_fname_list): for fname in cmap_fname_list: cmap = session[fname.encode()] - if cmap[-1] == 'PDB': + if cmap[0] == 'PDB': pdb_fnames.append(fname) else: non_pdb_fnames.append(fname) From 5af3026c6fb28bd2ce2c404a1953c13dbc5a67e4 Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Mon, 26 Apr 2021 17:16:47 +0100 Subject: [PATCH 17/37] calculate rmsd between distograms --- utils/cache_utils.py | 24 ++++++++++++++---- utils/data_utils.py | 12 +++++---- utils/math_utils.py | 14 +++++++++-- utils/tests/test_cache_utils.py | 2 +- utils/tests/test_math_utils.py | 9 ------- utils/tests/test_track_utils.py | 40 ++++++++++++++++++++++++++++++ utils/tracks_utils.py | 44 ++++++++++++++++++++++++++------- 7 files changed, 114 insertions(+), 31 deletions(-) create mode 100644 utils/tests/test_track_utils.py diff --git a/utils/cache_utils.py b/utils/cache_utils.py index 18c2e8e..9f9b660 100644 --- a/utils/cache_utils.py +++ b/utils/cache_utils.py @@ -45,9 +45,8 @@ def store_data(session_id, cachekey, data, dataset, cache): store_fname(cache, session_id, cachekey.decode(), dataset) -# TODO: Need to implement the same for contact diff -def remove_all_density(session_id, cache): - density_list = cache.hget(session_id, CacheKeys.CONTACT_DENSITY.value) +def remove_all(session_id, dataset, cache): + density_list = cache.hget(session_id, dataset) if not density_list: return @@ -55,7 +54,7 @@ def remove_all_density(session_id, cache): for density in density_list: cache.hdel(session_id, density) - cache.hdel(session_id, CacheKeys.CONTACT_DENSITY.value) + cache.hdel(session_id, dataset) def remove_density(session_id, cache, fname): @@ -72,6 +71,20 @@ def remove_density(session_id, cache, fname): cache.hset(session_id, CacheKeys.CONTACT_DENSITY.value, compress_data(density_list)) +def remove_diff(session_id, cache, fname): + diff_list = cache.hget(session_id, CacheKeys.CONTACT_DIFF.value) + if not diff_list: + return + diff_list = decompress_data(diff_list) + print(diff_list) + + for diff in diff_list: + if fname in diff: + cache.hdel(session_id, diff) + diff_list = [diff for diff in diff_list if fname not in diff] + cache.hset(session_id, CacheKeys.CONTACT_DIFF.value, compress_data(diff_list)) + + def is_valid_fname(fname): if any([x for x in CacheKeys if x.value == fname]) or any([tag for tag in MetadataTags if tag.value in fname]): return False @@ -161,7 +174,8 @@ def clear_cache(session_id, cache): remove_datasets(session_id, cache) remove_figure(session_id, cache) remove_sequence(session_id, cache) - remove_all_density(session_id, cache) + remove_all(session_id, cache, CacheKeys.CONTACT_DENSITY.value) + remove_all(session_id, cache, CacheKeys.CONTACT_DIFF.value) def remove_datasets(session_id, cache): diff --git a/utils/data_utils.py b/utils/data_utils.py index 8017e92..862cf77 100644 --- a/utils/data_utils.py +++ b/utils/data_utils.py @@ -12,8 +12,8 @@ def check_sequence_mismatch(session_id, cache, seq_length): cmap_fnames = decompress_data(cache.hget(session_id, cache_utils.CacheKeys.CONTACT_MAP.value)) for cmap_fname in cmap_fnames: cmap_data = decompress_data(cache.hget(session_id, cmap_fname)) - if cmap_data[-1] == 'PDB' or cmap_data[-1] == 'DISTO': - cmap_data.pop() + if cmap_data[0] == 'PDB' or cmap_data[0] == 'DISTO': + cmap_data = cmap_data[1:] cmap_max_register = max((max(cmap_data, key=itemgetter(0))[0], max(cmap_data, key=itemgetter(1))[0])) if cmap_max_register > seq_length: mismatched.append(cmap_fname) @@ -42,8 +42,8 @@ def check_dataset_mismatch(session_id, cache, data, dataset): return seq_fname else: return False - elif data[-1] == 'PDB' or data[-1] == 'DISTO': - max_register = max((max(data[:-1], key=itemgetter(0))[0], max(data[:-1], key=itemgetter(1))[0])) + elif data[0] == 'PDB' or data[0] == 'DISTO': + max_register = max((max(data[1:], key=itemgetter(0))[0], max(data[1:], key=itemgetter(1))[0])) else: max_register = max((max(data, key=itemgetter(0))[0], max(data, key=itemgetter(1))[0])) @@ -127,9 +127,11 @@ def remove_dataset(trigger, cache, session_id, logger): cache_utils.remove_fname(cache, session_id, fname, dataset) if dataset == loaders.DatasetReference.SEQUENCE.value: - cache_utils.remove_all_density(session_id, cache) + cache_utils.remove_all(session_id, cache, cache_utils.CacheKeys.CONTACT_DENSITY.value) + cache_utils.remove_all(session_id, cache, cache_utils.CacheKeys.CONTACT_DIFF.value) elif dataset == loaders.DatasetReference.CONTACT_MAP.value: cache_utils.remove_density(session_id, cache, fname) + cache_utils.remove_diff(session_id, cache, fname) def lookup_data(session, session_id, cachekey, cache): diff --git a/utils/math_utils.py b/utils/math_utils.py index f55632b..87dda03 100644 --- a/utils/math_utils.py +++ b/utils/math_utils.py @@ -25,10 +25,20 @@ def get_difference(expected, observed): return difference_squared +@vectorize('float64(int64, float64)') +def populate_rmsd(seq_length, sum_squared_differences): + rmsd = np.round(math.sqrt(sum_squared_differences / seq_length), 0) + if rmsd > 10: + return 10 + return rmsd + + @njit() -def calculate_rmsd(expected_array, observed_array): +def calculate_rmsd(expected_array, observed_array, seq_length): squared_differences = get_difference(expected_array, observed_array) - rmsd = np.sum(squared_differences) / observed_array.shape[0] + seq_length_array = np.full(seq_length, seq_length) + sum_squared_differences = np.sum(squared_differences, axis=0) + rmsd = populate_rmsd(seq_length_array, sum_squared_differences) return rmsd diff --git a/utils/tests/test_cache_utils.py b/utils/tests/test_cache_utils.py index 1811da2..e21c151 100644 --- a/utils/tests/test_cache_utils.py +++ b/utils/tests/test_cache_utils.py @@ -90,7 +90,7 @@ def test_9(self): output = cache_utils.retrieve_data(self.session_id, cachekey_2, self.cache) self.assertListEqual(output, density_2) expected_cache = {b'id': cache_utils.compress_data(self.session_id)} - cache_utils.remove_all_density(self.session_id, self.cache) + cache_utils.remove_all(self.session_id, self.cache, cache_utils.CacheKeys.CONTACT_DENSITY.value) self.assertDictEqual(expected_cache, self.cache.hgetall(self.session_id)) def test_10(self): diff --git a/utils/tests/test_math_utils.py b/utils/tests/test_math_utils.py index 90eb1a3..a8d6c3c 100644 --- a/utils/tests/test_math_utils.py +++ b/utils/tests/test_math_utils.py @@ -1,6 +1,5 @@ import unittest from utils import math_utils -import numpy as np class MathUtilsTestCase(unittest.TestCase): @@ -16,7 +15,6 @@ def test_1(self): 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] density = math_utils.get_contact_density(dummy_cmap, 168) - print(density) self.assertListEqual(density, expected_density) def test_2(self): @@ -33,10 +31,3 @@ def test_4(self): expected_output = 10 output = math_utils.calculate_mcc(12, 1, 0, 2) self.assertEqual(output, expected_output) - - def test_5(self): - expected_output = 24.714285714285715 - expected_array = np.array([10, 8, 1, 5, 5, 7, 5, 8, 5, 6, 8, 2, 1, 9]) - observed_array = np.array([1, 8, 10, 1, 5, 3, 7, 5, 1, 3, 8, 9, 0, 1]) - output = math_utils.calculate_rmsd(expected_array, observed_array) - self.assertEqual(output, expected_output) diff --git a/utils/tests/test_track_utils.py b/utils/tests/test_track_utils.py new file mode 100644 index 0000000..b0fc0c6 --- /dev/null +++ b/utils/tests/test_track_utils.py @@ -0,0 +1,40 @@ +import unittest +from utils import tracks_utils + + +class TrackUtilsTestCase(unittest.TestCase): + + def test_1(self): + """ + cmap_1 + 2 8 9 0 + 5 0 0 9 + 7 0 0 8 + 0 7 5 2 + cmap_2 + 9 6 0 0 + 5 1 0 0 + 5 0 1 6 + 0 5 5 9 + """ + cmap_1 = [ + [2, 1, 0, 7], + [3, 1, 0, 5], + [4, 1, 0, 2], + [4, 2, 0, 8], + [3, 2, 0, 0], + [4, 3, 0, 9] + ] + + cmap_2 = [ + [2, 1, 0, 5], + [3, 1, 0, 5], + [4, 1, 0, 9], + [3, 2, 0, 1], + [4, 2, 0, 6], + [4, 3, 0, 0] + ] + + expected = [7, 4, 10, 10] + output = tracks_utils.get_cmap_rmsd(cmap_1, cmap_2, 4) + self.assertListEqual(output, expected) diff --git a/utils/tracks_utils.py b/utils/tracks_utils.py index 2413887..a092494 100644 --- a/utils/tracks_utils.py +++ b/utils/tracks_utils.py @@ -1,18 +1,26 @@ +import numpy as np from loaders import AdditionalDatasetReference, DatasetReference from parsers import DatasetStates -from utils import create_cmap_trace, color_palettes, cache_utils, lookup_data, slice_cmap, math_utils +from utils import create_cmap_trace, color_palettes, cache_utils, lookup_data, cmap_utils, math_utils def calculate_density(cmap, seq_length, factor): - contact_list = slice_cmap(cmap, seq_length, factor) + contact_list = cmap_utils.slice_cmap(cmap, seq_length, factor) return math_utils.get_contact_density(contact_list, seq_length) -def calculate_diff(cmap_1, cmap_2, display_settings): - size = display_settings.seq_length - # TODO Check if cmap_1 AND cmap_2 contain residue distance predicitons. If so, calculate RMSD instead of MCC - cmap_1 = slice_cmap(cmap_1, display_settings.seq_length, display_settings.factor) - cmap_2 = slice_cmap(cmap_2, display_settings.seq_length, display_settings.factor) +DISTANCE_BINS = {0: 0, 1: 5, 2: 7, 3: 9, 4: 11, 5: 13, 6: 15, 7: 17, 8: 19, 9: 20} + + +def get_distance_array(cmap, seq_length): + array = np.full((seq_length, seq_length), 20) + for contact in cmap: + array[seq_length - contact[0], contact[1] - 1] = DISTANCE_BINS[contact[3]] + array[seq_length - contact[1], contact[0] - 1] = DISTANCE_BINS[contact[3]] + return array + + +def get_cmap_mcc(cmap_1, cmap_2, size): cmap_1_set = {resn: {(c[0], c[1]) for c in cmap_1 if resn in (c[0], c[1])} for resn in range(1, size + 1)} cmap_2_set = {resn: {(c[0], c[1]) for c in cmap_2 if resn in (c[0], c[1])} for resn in range(1, size + 1)} diff = [] @@ -24,9 +32,28 @@ def calculate_diff(cmap_1, cmap_2, display_settings): tn = size - sum((tp, fp, fn)) mcc = math_utils.calculate_mcc(tp, fp, tn, fn) diff.append(int(round(mcc, 0))) + return diff +def get_cmap_rmsd(cmap_1, cmap_2, seq_length): + cmap_1_array = get_distance_array(cmap_1, seq_length) + cmap_2_array = get_distance_array(cmap_2, seq_length) + rmsd = math_utils.calculate_rmsd(cmap_1_array, cmap_2_array, seq_length) + return rmsd.astype(int).tolist() + + +def calculate_diff(cmap_1, cmap_2, display_settings): + if cmap_utils.contains_distances(cmap_1) and cmap_utils.contains_distances(cmap_2): + cmap_1 = cmap_utils.slice_cmap(cmap_1, display_settings.seq_length, 0) + cmap_2 = cmap_utils.slice_cmap(cmap_2, display_settings.seq_length, 0) + return get_cmap_rmsd(cmap_1, cmap_2, display_settings.seq_length) + else: + cmap_1 = cmap_utils.slice_cmap(cmap_1, display_settings.seq_length, display_settings.factor) + cmap_2 = cmap_utils.slice_cmap(cmap_2, display_settings.seq_length, display_settings.factor) + return get_cmap_mcc(cmap_1, cmap_2, display_settings.seq_length) + + def get_diff_args(fname, factor): cmap_1 = fname.split('|')[0].rstrip().lstrip() cmap_2 = fname.split('|')[1].rstrip().lstrip() @@ -48,6 +75,7 @@ def retrieve_dataset_prediction(session_id, session, fname, display_settings, ca return DatasetReference.CONTACT_DENSITY.value, density + # TODO: If looking for diff for distance predictions, we also don't care about L factor (using all) if cache_utils.MetadataTags.SEPARATOR.value in fname: cmap_1, cmap_2, cachekey = get_diff_args(fname, display_settings.factor) diff = lookup_data(session, session_id, cachekey, cache) @@ -120,5 +148,3 @@ def get_traces(prediction, dataset, track_idx, track_separation, marker_size, al traces.append(create_cmap_trace(x, y, 'diamond', marker_size=marker_size, color=color, hovertext=hovertext)) return traces - - From 1df088b96f2c0c1e47b4e744e4c06ec041ea7680 Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Mon, 26 Apr 2021 17:22:36 +0100 Subject: [PATCH 18/37] skip conkit density test if running on github actions --- .github/workflows/main.yml | 1 + utils/tests/test_track_utils.py | 40 -------------------------------- utils/tests/test_tracks_utils.py | 37 +++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 40 deletions(-) delete mode 100644 utils/tests/test_track_utils.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d701079..65e3378 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -39,6 +39,7 @@ jobs: redis-version: 5 - name: Run tests.py env: + THIS_IS_GH_ACTIONS: 1 KEYDB_URL: $ {{ secrets.KEYDB_URL }} run: | python tests.py diff --git a/utils/tests/test_track_utils.py b/utils/tests/test_track_utils.py deleted file mode 100644 index b0fc0c6..0000000 --- a/utils/tests/test_track_utils.py +++ /dev/null @@ -1,40 +0,0 @@ -import unittest -from utils import tracks_utils - - -class TrackUtilsTestCase(unittest.TestCase): - - def test_1(self): - """ - cmap_1 - 2 8 9 0 - 5 0 0 9 - 7 0 0 8 - 0 7 5 2 - cmap_2 - 9 6 0 0 - 5 1 0 0 - 5 0 1 6 - 0 5 5 9 - """ - cmap_1 = [ - [2, 1, 0, 7], - [3, 1, 0, 5], - [4, 1, 0, 2], - [4, 2, 0, 8], - [3, 2, 0, 0], - [4, 3, 0, 9] - ] - - cmap_2 = [ - [2, 1, 0, 5], - [3, 1, 0, 5], - [4, 1, 0, 9], - [3, 2, 0, 1], - [4, 2, 0, 6], - [4, 3, 0, 0] - ] - - expected = [7, 4, 10, 10] - output = tracks_utils.get_cmap_rmsd(cmap_1, cmap_2, 4) - self.assertListEqual(output, expected) diff --git a/utils/tests/test_tracks_utils.py b/utils/tests/test_tracks_utils.py index faeea31..0b693e1 100644 --- a/utils/tests/test_tracks_utils.py +++ b/utils/tests/test_tracks_utils.py @@ -1,3 +1,4 @@ +import os import unittest from utils import tracks_utils from collections import namedtuple @@ -7,6 +8,7 @@ class TrackUtilsTestCase(unittest.TestCase): + @unittest.skipIf('THIS_IS_GH_ACTIONS' in os.environ, "not implemented in Travis CI") def test_1(self): dummy_cmap = [(52, 50), (53, 51), (145, 143), (142, 140), (150, 148), (53, 50), (147, 145), (141, 139), (143, 141), (148, 146)] @@ -63,3 +65,38 @@ def test_4(self): diff = tracks_utils.calculate_diff(dummy_cmap_1, dummy_cmap_2, dummy_display_settings) self.assertListEqual(diff, expected_diff) + + def test_5(self): + """ + cmap_1 + 2 8 9 0 + 5 0 0 9 + 7 0 0 8 + 0 7 5 2 + cmap_2 + 9 6 0 0 + 5 1 0 0 + 5 0 1 6 + 0 5 5 9 + """ + cmap_1 = [ + [2, 1, 0, 7], + [3, 1, 0, 5], + [4, 1, 0, 2], + [4, 2, 0, 8], + [3, 2, 0, 0], + [4, 3, 0, 9] + ] + + cmap_2 = [ + [2, 1, 0, 5], + [3, 1, 0, 5], + [4, 1, 0, 9], + [3, 2, 0, 1], + [4, 2, 0, 6], + [4, 3, 0, 0] + ] + + expected = [7, 4, 10, 10] + output = tracks_utils.get_cmap_rmsd(cmap_1, cmap_2, 4) + self.assertListEqual(output, expected) From ddf4a94aa22cd422f20175a186610939947018d0 Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Mon, 26 Apr 2021 17:32:05 +0100 Subject: [PATCH 19/37] fix more tests --- utils/cache_utils.py | 14 +++++++------- utils/tests/test_tracks_utils.py | 2 -- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/utils/cache_utils.py b/utils/cache_utils.py index 9f9b660..7a06053 100644 --- a/utils/cache_utils.py +++ b/utils/cache_utils.py @@ -46,13 +46,13 @@ def store_data(session_id, cachekey, data, dataset, cache): def remove_all(session_id, dataset, cache): - density_list = cache.hget(session_id, dataset) - if not density_list: + cachekey_list = cache.hget(session_id, dataset) + if not cachekey_list: return - density_list = decompress_data(density_list) - for density in density_list: - cache.hdel(session_id, density) + cachekey_list = decompress_data(cachekey_list) + for cachekey in cachekey_list: + cache.hdel(session_id, cachekey) cache.hdel(session_id, dataset) @@ -174,8 +174,8 @@ def clear_cache(session_id, cache): remove_datasets(session_id, cache) remove_figure(session_id, cache) remove_sequence(session_id, cache) - remove_all(session_id, cache, CacheKeys.CONTACT_DENSITY.value) - remove_all(session_id, cache, CacheKeys.CONTACT_DIFF.value) + remove_all(session_id, CacheKeys.CONTACT_DENSITY.value, cache) + remove_all(session_id, CacheKeys.CONTACT_DIFF.value, cache) def remove_datasets(session_id, cache): diff --git a/utils/tests/test_tracks_utils.py b/utils/tests/test_tracks_utils.py index 0b693e1..520db31 100644 --- a/utils/tests/test_tracks_utils.py +++ b/utils/tests/test_tracks_utils.py @@ -1,4 +1,3 @@ -import os import unittest from utils import tracks_utils from collections import namedtuple @@ -8,7 +7,6 @@ class TrackUtilsTestCase(unittest.TestCase): - @unittest.skipIf('THIS_IS_GH_ACTIONS' in os.environ, "not implemented in Travis CI") def test_1(self): dummy_cmap = [(52, 50), (53, 51), (145, 143), (142, 140), (150, 148), (53, 50), (147, 145), (141, 139), (143, 141), (148, 146)] From ff91968eb569394b20cc8971c0c05ddd706ddb75 Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Mon, 26 Apr 2021 17:35:20 +0100 Subject: [PATCH 20/37] more broken tests --- utils/tests/test_cache_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/tests/test_cache_utils.py b/utils/tests/test_cache_utils.py index e21c151..89e75c0 100644 --- a/utils/tests/test_cache_utils.py +++ b/utils/tests/test_cache_utils.py @@ -90,7 +90,7 @@ def test_9(self): output = cache_utils.retrieve_data(self.session_id, cachekey_2, self.cache) self.assertListEqual(output, density_2) expected_cache = {b'id': cache_utils.compress_data(self.session_id)} - cache_utils.remove_all(self.session_id, self.cache, cache_utils.CacheKeys.CONTACT_DENSITY.value) + cache_utils.remove_all(self.session_id, cache_utils.CacheKeys.CONTACT_DENSITY.value, self.cache) self.assertDictEqual(expected_cache, self.cache.hgetall(self.session_id)) def test_10(self): From dfd684cf64c27327a29fd585079ca71fa067d14a Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Mon, 26 Apr 2021 17:39:37 +0100 Subject: [PATCH 21/37] final fix --- utils/tests/test_tracks_utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/tests/test_tracks_utils.py b/utils/tests/test_tracks_utils.py index 520db31..7fde65e 100644 --- a/utils/tests/test_tracks_utils.py +++ b/utils/tests/test_tracks_utils.py @@ -1,3 +1,4 @@ +import os import unittest from utils import tracks_utils from collections import namedtuple @@ -7,6 +8,7 @@ class TrackUtilsTestCase(unittest.TestCase): + @unittest.skipIf('THIS_IS_GH_ACTIONS' in os.environ, "not implemented in Github Actions") def test_1(self): dummy_cmap = [(52, 50), (53, 51), (145, 143), (142, 140), (150, 148), (53, 50), (147, 145), (141, 139), (143, 141), (148, 146)] From 085e8273b3c6e6ade93eb8332637e9dbaabae408 Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Tue, 27 Apr 2021 19:02:29 +0100 Subject: [PATCH 22/37] add smoothing functions --- utils/cache_utils.py | 1 - utils/math_utils.py | 11 +++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/utils/cache_utils.py b/utils/cache_utils.py index 7a06053..983fa6e 100644 --- a/utils/cache_utils.py +++ b/utils/cache_utils.py @@ -76,7 +76,6 @@ def remove_diff(session_id, cache, fname): if not diff_list: return diff_list = decompress_data(diff_list) - print(diff_list) for diff in diff_list: if fname in diff: diff --git a/utils/math_utils.py b/utils/math_utils.py index 87dda03..8d72327 100644 --- a/utils/math_utils.py +++ b/utils/math_utils.py @@ -42,6 +42,17 @@ def calculate_rmsd(expected_array, observed_array, seq_length): return rmsd +def convolution_smooth_values(x, window=5): + box = np.ones(window) / window + x_smooth = np.convolve(x, box, mode='same') + return x_smooth + + +def cumsum_smooth(x, window=5): + cumsum_vec = np.cumsum(np.insert(x, 0, 0)) + return (cumsum_vec[window:] - cumsum_vec[:-window]) / window + + def get_contact_density(contact_list, seq_length): """Credits to Felix Simkovic; code taken from GitHub rigdenlab/conkit""" x = np.array([i for c in contact_list for i in np.arange(c[1], c[0] + 1)], dtype=np.int64)[:, np.newaxis] From 3049a09e0064eca91e37f70b92a453899c81681f Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Wed, 28 Apr 2021 09:43:22 +0100 Subject: [PATCH 23/37] smooth diff track --- utils/math_utils.py | 4 +- utils/tests/test_tracks_utils.py | 81 +++++++++++++++++--------------- utils/tracks_utils.py | 19 ++++---- 3 files changed, 56 insertions(+), 48 deletions(-) diff --git a/utils/math_utils.py b/utils/math_utils.py index 8d72327..919871a 100644 --- a/utils/math_utils.py +++ b/utils/math_utils.py @@ -27,7 +27,7 @@ def get_difference(expected, observed): @vectorize('float64(int64, float64)') def populate_rmsd(seq_length, sum_squared_differences): - rmsd = np.round(math.sqrt(sum_squared_differences / seq_length), 0) + rmsd = math.sqrt(sum_squared_differences / seq_length) if rmsd > 10: return 10 return rmsd @@ -45,7 +45,7 @@ def calculate_rmsd(expected_array, observed_array, seq_length): def convolution_smooth_values(x, window=5): box = np.ones(window) / window x_smooth = np.convolve(x, box, mode='same') - return x_smooth + return np.round(x_smooth, 0) def cumsum_smooth(x, window=5): diff --git a/utils/tests/test_tracks_utils.py b/utils/tests/test_tracks_utils.py index 7fde65e..0cfa123 100644 --- a/utils/tests/test_tracks_utils.py +++ b/utils/tests/test_tracks_utils.py @@ -23,50 +23,52 @@ def test_1(self): self.assertListEqual(density, expected_density) def test_2(self): - dummy_cmap_1 = [(52, 50), (53, 51), (145, 143), (142, 140), (150, 148), (53, 50), (147, 145), (141, 139), - (143, 141), (148, 146)] - dummy_cmap_2 = [(52, 50), (53, 51), (145, 143), (142, 140), (150, 148), (53, 50), (147, 145), (141, 139), - (143, 141), (148, 146)] - expected_diff = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1] - dummy_display_settings = DisplayControlSettings(factor=0, seq_length=168) + """ + cmap_1 + 1 1 0 1 + 1 0 1 0 + 0 1 0 1 + 1 0 1 1 + cmap_2 + 0 1 0 1 + 1 1 1 0 + 1 1 1 1 + 1 1 1 0 + """ + dummy_cmap_1 = [(1, 1), (3, 1), (4, 1), (2, 2), (4, 2), (3, 3), (4, 4)] + dummy_cmap_2 = [(1, 1), (3, 1), (2, 1), (2, 2), (4, 2), (3, 2), (3, 3), (4, 4)] + expected_mcc = [10, 1, 4, 4] + expected_mcc_smooth = [2, 3, 4, 4, 2] + dummy_display_settings = DisplayControlSettings(factor=0, seq_length=4) diff = tracks_utils.calculate_diff(dummy_cmap_1, dummy_cmap_2, dummy_display_settings) - self.assertListEqual(diff, expected_diff) + mcc = tracks_utils.get_cmap_mcc(dummy_cmap_1, dummy_cmap_2, dummy_display_settings.seq_length, smooth=False) + mcc_smooth = tracks_utils.get_cmap_mcc(dummy_cmap_1, dummy_cmap_2, dummy_display_settings.seq_length) + self.assertListEqual(mcc, expected_mcc) + self.assertListEqual(mcc_smooth, expected_mcc_smooth) + self.assertListEqual(mcc_smooth, diff) def test_3(self): - dummy_cmap_1 = [(52, 50), (53, 51), (145, 143), (142, 140), (150, 148), (53, 50), (147, 145), (141, 139), - (143, 141), (148, 146)] - dummy_cmap_2 = [(150, 148), (53, 50), (147, 145), (141, 139), (143, 141), (148, 146), (120, 12), (25, 35)] - expected_diff = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 3, 1, 3, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1] - dummy_display_settings = DisplayControlSettings(factor=0, seq_length=168) - diff = tracks_utils.calculate_diff(dummy_cmap_1, dummy_cmap_2, dummy_display_settings) - self.assertListEqual(diff, expected_diff) - - def test_4(self): - dummy_cmap_1 = [(52, 50), (53, 51), (145, 143), (142, 140), (150, 148), (53, 50), (147, 145), (141, 139), - (143, 141), (148, 146), (10, 55), (5, 145)] - dummy_cmap_2 = [(150, 148), (53, 50), (147, 145), (141, 139), (143, 141), (148, 146), (120, 12), (25, 35)] - expected_diff = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 3, 1, 10, 1, 3, 1, 0, 3, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1] - dummy_display_settings = DisplayControlSettings(factor=20, seq_length=168) + """ + cmap_1 + 1 1 0 1 + 1 0 1 0 + 0 1 0 1 + 1 0 1 1 + cmap_2 + 0 1 0 1 + 1 1 1 0 + 1 1 1 1 + 1 1 1 0 + """ + dummy_cmap_1 = [(1, 1), (3, 1), (4, 1), (2, 2), (4, 2), (3, 3), (4, 4)] + dummy_cmap_2 = [(1, 1), (3, 1), (2, 1), (2, 2), (4, 2), (3, 2), (3, 3), (4, 4)] + expected_diff = [3, 3, 3, 3, 1] + dummy_display_settings = DisplayControlSettings(factor=1, seq_length=4) diff = tracks_utils.calculate_diff(dummy_cmap_1, dummy_cmap_2, dummy_display_settings) self.assertListEqual(diff, expected_diff) - def test_5(self): + def test_4(self): """ cmap_1 2 8 9 0 @@ -98,5 +100,8 @@ def test_5(self): ] expected = [7, 4, 10, 10] - output = tracks_utils.get_cmap_rmsd(cmap_1, cmap_2, 4) + expected_smooth = [2, 4, 6, 6, 5] + output = tracks_utils.get_cmap_rmsd(cmap_1, cmap_2, 4, smooth=False) + output_smooth = tracks_utils.get_cmap_rmsd(cmap_1, cmap_2, 4, smooth=True) self.assertListEqual(output, expected) + self.assertListEqual(output_smooth, expected_smooth) diff --git a/utils/tracks_utils.py b/utils/tracks_utils.py index a092494..affb733 100644 --- a/utils/tracks_utils.py +++ b/utils/tracks_utils.py @@ -20,7 +20,7 @@ def get_distance_array(cmap, seq_length): return array -def get_cmap_mcc(cmap_1, cmap_2, size): +def get_cmap_mcc(cmap_1, cmap_2, size, smooth=True): cmap_1_set = {resn: {(c[0], c[1]) for c in cmap_1 if resn in (c[0], c[1])} for resn in range(1, size + 1)} cmap_2_set = {resn: {(c[0], c[1]) for c in cmap_2 if resn in (c[0], c[1])} for resn in range(1, size + 1)} diff = [] @@ -31,23 +31,26 @@ def get_cmap_mcc(cmap_1, cmap_2, size): fn = len(cmap_1_set[resn] - cmap_2_set[resn]) tn = size - sum((tp, fp, fn)) mcc = math_utils.calculate_mcc(tp, fp, tn, fn) - diff.append(int(round(mcc, 0))) + diff.append(mcc) - return diff + if smooth: + return math_utils.convolution_smooth_values(diff).astype(int).tolist() + return [int(round(mcc, 0)) for mcc in diff] -def get_cmap_rmsd(cmap_1, cmap_2, seq_length): + +def get_cmap_rmsd(cmap_1, cmap_2, seq_length, smooth=True): cmap_1_array = get_distance_array(cmap_1, seq_length) cmap_2_array = get_distance_array(cmap_2, seq_length) rmsd = math_utils.calculate_rmsd(cmap_1_array, cmap_2_array, seq_length) - return rmsd.astype(int).tolist() + if smooth: + return math_utils.convolution_smooth_values(rmsd).astype(int).tolist() + return np.round(rmsd, 0).astype(int).tolist() def calculate_diff(cmap_1, cmap_2, display_settings): if cmap_utils.contains_distances(cmap_1) and cmap_utils.contains_distances(cmap_2): - cmap_1 = cmap_utils.slice_cmap(cmap_1, display_settings.seq_length, 0) - cmap_2 = cmap_utils.slice_cmap(cmap_2, display_settings.seq_length, 0) - return get_cmap_rmsd(cmap_1, cmap_2, display_settings.seq_length) + return get_cmap_rmsd(cmap_1[1:], cmap_2[1:], display_settings.seq_length) else: cmap_1 = cmap_utils.slice_cmap(cmap_1, display_settings.seq_length, display_settings.factor) cmap_2 = cmap_utils.slice_cmap(cmap_2, display_settings.seq_length, display_settings.factor) From 6cc8e20ca45efecb6c8d23b026b4e8caadb7978f Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Wed, 28 Apr 2021 09:48:16 +0100 Subject: [PATCH 24/37] update version number --- CHANGELOG.rst | 13 +++++++++++++ utils/__init__.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 9913c32..dd1997d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,19 @@ Changelog ========= +0.4.1 +----- + +Added +~~~~~ +- Added contact diff track with smoothed values (MCC for contacts and RMSD for distograms) + +Changed +~~~~~ +- Increased contrast in sequence hydrophobicity color palettes +- Use AMISE to estimate bandwidth use to calculate contact density + + 0.4 ----- diff --git a/utils/__init__.py b/utils/__init__.py index 7ccd701..47a0c7f 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -5,7 +5,7 @@ def conplot_version(): - return 'v0.4' + return 'v0.4.1' def get_base_url(): From 38334f63dd07a9518f884afc45a00070528ed420 Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Thu, 29 Apr 2021 10:34:08 +0100 Subject: [PATCH 25/37] fix typo --- CHANGELOG.rst | 2 +- utils/data_utils.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index dd1997d..77dd7eb 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -12,7 +12,7 @@ Added Changed ~~~~~ - Increased contrast in sequence hydrophobicity color palettes -- Use AMISE to estimate bandwidth use to calculate contact density +- Use AMISE to estimate bandwidth required to calculate contact density 0.4 diff --git a/utils/data_utils.py b/utils/data_utils.py index 862cf77..afe50ec 100644 --- a/utils/data_utils.py +++ b/utils/data_utils.py @@ -127,8 +127,8 @@ def remove_dataset(trigger, cache, session_id, logger): cache_utils.remove_fname(cache, session_id, fname, dataset) if dataset == loaders.DatasetReference.SEQUENCE.value: - cache_utils.remove_all(session_id, cache, cache_utils.CacheKeys.CONTACT_DENSITY.value) - cache_utils.remove_all(session_id, cache, cache_utils.CacheKeys.CONTACT_DIFF.value) + cache_utils.remove_all(session_id, cache_utils.CacheKeys.CONTACT_DENSITY.value, cache) + cache_utils.remove_all(session_id, cache_utils.CacheKeys.CONTACT_DIFF.value, cache) elif dataset == loaders.DatasetReference.CONTACT_MAP.value: cache_utils.remove_density(session_id, cache, fname) cache_utils.remove_diff(session_id, cache, fname) From 70cde61f38f808d63cb1061b715c8e280bfdcdc4 Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Thu, 29 Apr 2021 11:51:33 +0100 Subject: [PATCH 26/37] improved plot time performance --- utils/cache_utils.py | 7 +++ utils/plot_utils.py | 91 +++++++++++++++------------------- utils/tests/test_math_utils.py | 2 + utils/tracks_utils.py | 7 ++- 4 files changed, 52 insertions(+), 55 deletions(-) diff --git a/utils/cache_utils.py b/utils/cache_utils.py index 983fa6e..19a213c 100644 --- a/utils/cache_utils.py +++ b/utils/cache_utils.py @@ -210,3 +210,10 @@ def is_redis_available(cache): def get_active_sessions(cache): return cache.dbsize() + + +def get_cachekey(session, fname, factor): + if 'PDB' == session[fname.encode()][0]: + return CacheKeys.CMAP_DENSITY.value.format(fname, fname).encode() + else: + return CacheKeys.CMAP_DENSITY.value.format(fname, factor).encode() diff --git a/utils/plot_utils.py b/utils/plot_utils.py index 2d61aab..5ff8ff5 100644 --- a/utils/plot_utils.py +++ b/utils/plot_utils.py @@ -30,11 +30,10 @@ def create_ConPlot(session_id, cache, trigger, selected_tracks, cmap_selection, contact_marker_size=5, track_marker_size=5, track_separation=2, transparent=True, superimpose=False, heatmap=False, verbose_labels=False): session = cache.hgetall(session_id) - session, display_settings, verbose_labels, error = process_args(session_id, session, trigger, selected_tracks, - cmap_selection, factor, contact_marker_size, - track_separation, transparent, selected_palettes, - superimpose, track_marker_size, heatmap, - verbose_labels, cache) + session, display_settings, error = process_args(session_id, session, trigger, selected_tracks, cmap_selection, + factor, contact_marker_size, track_separation, transparent, + selected_palettes, superimpose, track_marker_size, heatmap, + verbose_labels, cache) if error is not None: return error @@ -42,8 +41,11 @@ def create_ConPlot(session_id, cache, trigger, selected_tracks, cmap_selection, display_card = get_display_control_card(display_settings) figure = create_figure(display_settings.axis_range) - add_contact_trace(session, display_settings, figure, verbose_labels) - add_additional_tracks(session_id, session, display_settings, figure, cache) + verbose_labels, additional_traces = add_additional_tracks(session_id, session, display_settings, figure, cache) + contact_traces = add_contact_trace(session, display_settings, figure, verbose_labels) + + figure.add_traces(contact_traces) + figure.add_traces(additional_traces) figure.update_xaxes(spikemode="across", showspikes=False) figure.update_yaxes(spikemode="across", showspikes=False) @@ -58,45 +60,56 @@ def create_ConPlot(session_id, cache, trigger, selected_tracks, cmap_selection, def add_additional_tracks(session_id, session, display_settings, figure, cache): + prediction_labels = {} + traces = [] for idx, fname in enumerate(display_settings.selected_tracks): if fname == '--- Empty ---': continue - dataset, prediction = tracks_utils.retrieve_dataset_prediction(session_id, session, fname, display_settings, - cache) + dataset, prediction = tracks_utils.get_dataset_prediction(session_id, session, fname, display_settings, cache) + if display_settings.verbose_labels and fname not in prediction_labels: + prediction_labels[fname] = [STATES[dataset][x] for x in prediction] palette_idx = [x.name for x in color_palettes.DatasetColorPalettes].index(dataset) palette = display_settings.selected_palettes[palette_idx] if idx == 4: - traces = tracks_utils.get_diagonal_trace(prediction, dataset, display_settings.track_marker_size, - session[display_settings.seq_fname.encode()], - display_settings.alpha, palette) + traces += tracks_utils.get_diagonal_trace(prediction, dataset, display_settings.track_marker_size, + session[display_settings.seq_fname.encode()], + display_settings.alpha, palette) else: - traces = tracks_utils.get_traces(prediction, dataset, idx, display_settings.track_separation, - display_settings.track_marker_size, display_settings.alpha, palette) + traces += tracks_utils.get_traces(prediction, dataset, idx, display_settings.track_separation, + display_settings.track_marker_size, display_settings.alpha, palette) + + if display_settings.verbose_labels: + verbose_labels = [] + sequence = session[display_settings.seq_fname.encode()] + all_predictions = list(prediction_labels.values()) + label_template = '------
Residue {} ({})' + '
{}' * len(all_predictions) + for idx, residue_info in enumerate(zip(sequence, *all_predictions), 1): + verbose_labels.append(label_template.format(idx, *residue_info)) - for trace in traces: - figure.add_trace(trace) + return verbose_labels, traces + + return None, traces def add_contact_trace(session, display_settings, figure, verbose_labels): if display_settings.superimpose and display_settings.heatmap: heat, hover, colorscale = heatmap_utils.superimpose_heatmaps(session, display_settings, verbose_labels) - figure.add_trace(heatmap_utils.create_heatmap_trace(hovertext=hover, distances=heat, colorscale=colorscale)) + return heatmap_utils.create_heatmap_trace(hovertext=hover, distances=heat, colorscale=colorscale) elif display_settings.heatmap: heat, hover, colorscale = heatmap_utils.create_heatmap(session, display_settings, verbose_labels) - figure.add_trace(heatmap_utils.create_heatmap_trace(hovertext=hover, distances=heat, colorscale=colorscale)) + return heatmap_utils.create_heatmap_trace(hovertext=hover, distances=heat, colorscale=colorscale) elif display_settings.superimpose: reference_cmap = session[display_settings.cmap_selection[0].encode()] predicted_cmap = session[display_settings.cmap_selection[1].encode()] - traces = cmap_utils.create_superimposed_cmap(reference_cmap, predicted_cmap, display_settings, verbose_labels) - for trace in traces: - figure.add_trace(trace) + return cmap_utils.create_superimposed_cmap(reference_cmap, predicted_cmap, display_settings, verbose_labels) else: + traces = [] for idx, fname in enumerate(display_settings.cmap_selection): if fname == '--- Empty ---': continue @@ -104,7 +117,9 @@ def add_contact_trace(session, display_settings, figure, verbose_labels): cmap = session[fname.encode()] size = display_settings.contact_marker_size x, y, hover = cmap_utils.create_cmap(cmap, idx, display_settings, verbose_labels) - figure.add_trace(cmap_utils.create_cmap_trace(x, y, 'circle', size, 'black', hover)) + traces.append(cmap_utils.create_cmap_trace(x, y, 'circle', size, 'black', hover)) + + return traces def get_display_control_card(display_settings): @@ -153,14 +168,14 @@ def lookup_input_errors(session_id, session, cmap_selection, superimpose, heatma error = components.PlotPlaceHolder(), \ components.MissingInputModal(*[missing.name for missing in missing_data]), \ components.DisplayControlCard(), True - return None, None, None, error + return None, None, error if superimpose and heatmap: reference_cmap = session[cmap_selection[0].encode()] predicted_cmap = session[cmap_selection[1].encode()] error = no_update, components.InvalidSuperposeHeatmapModal(), no_update, no_update if not isinstance(reference_cmap[0], str) or not isinstance(predicted_cmap[0], str): - return None, None, None, error + return None, None, error return None @@ -210,13 +225,7 @@ def process_args(session_id, session, trigger, selected_tracks, cmap_selection, cmap_selection=cmap_selection, available_cmaps=available_cmaps, heatmap=heatmap, verbose_labels=verbose_labels) - if verbose_labels: - fnames = [fname for fname in selected_tracks if fname != '--- Empty ---'] - verbose_labels = get_verbose_labels(session_id, session, fnames, display_settings, cache) - else: - verbose_labels = None - - return session, display_settings, verbose_labels, None + return session, display_settings, None def separate_pdb_cmaps(session, cmap_fname_list): @@ -337,23 +346,3 @@ def create_figure(axis_range): plot_bgcolor='rgba(0,0,0,0)' ) ) - - -def get_verbose_labels(session_id, session, fnames, display_settings, cache): - sequence = session[display_settings.seq_fname.encode()] - all_predictions = [] - for fname in set(fnames): - dataset, prediction = tracks_utils.retrieve_dataset_prediction(session_id, session, fname, - display_settings, cache) - dataset_dict = STATES[dataset] - prediction = [dataset_dict[x] for x in prediction] - all_predictions.append(prediction) - - labels = [] - for idx, residue in enumerate(sequence, 1): - current_label = '------
Residue {} ({})'.format(idx, residue) - for prediction in all_predictions: - current_label += '
{}'.format(prediction[idx - 1]) - labels.append(current_label) - - return labels diff --git a/utils/tests/test_math_utils.py b/utils/tests/test_math_utils.py index a8d6c3c..66c12fa 100644 --- a/utils/tests/test_math_utils.py +++ b/utils/tests/test_math_utils.py @@ -1,9 +1,11 @@ +import os import unittest from utils import math_utils class MathUtilsTestCase(unittest.TestCase): + @unittest.skipIf('THIS_IS_GH_ACTIONS' in os.environ, "not implemented in Github Actions") def test_1(self): dummy_cmap = [(52, 50), (53, 51), (145, 143), (142, 140), (150, 148), (53, 50), (147, 145), (141, 139), (143, 141), (148, 146)] diff --git a/utils/tracks_utils.py b/utils/tracks_utils.py index affb733..77242b2 100644 --- a/utils/tracks_utils.py +++ b/utils/tracks_utils.py @@ -60,17 +60,17 @@ def calculate_diff(cmap_1, cmap_2, display_settings): def get_diff_args(fname, factor): cmap_1 = fname.split('|')[0].rstrip().lstrip() cmap_2 = fname.split('|')[1].rstrip().lstrip() + # TODO: If looking for diff for distance predictions, we also don't care about L factor (using all) cachekey = cache_utils.CacheKeys.CMAP_DIFF.value.format(cmap_1, cmap_2, factor).encode() return cmap_1, cmap_2, cachekey -def retrieve_dataset_prediction(session_id, session, fname, display_settings, cache): +def get_dataset_prediction(session_id, session, fname, display_settings, cache): if fname == session[DatasetReference.SEQUENCE.value.encode()]: return DatasetReference.HYDROPHOBICITY.value, session[DatasetReference.HYDROPHOBICITY.value.encode()] - # TODO: If it is a PDB lookup data should not care about the L factor since it will always be the same if fname in session[DatasetReference.CONTACT_MAP.value.encode()]: - cachekey = cache_utils.CacheKeys.CMAP_DENSITY.value.format(fname, display_settings.factor).encode() + cachekey = cache_utils.get_cachekey(session, fname, display_settings.factor) density = lookup_data(session, session_id, cachekey, cache) if not density: density = calculate_density(session[fname.encode()], display_settings.seq_length, display_settings.factor) @@ -78,7 +78,6 @@ def retrieve_dataset_prediction(session_id, session, fname, display_settings, ca return DatasetReference.CONTACT_DENSITY.value, density - # TODO: If looking for diff for distance predictions, we also don't care about L factor (using all) if cache_utils.MetadataTags.SEPARATOR.value in fname: cmap_1, cmap_2, cachekey = get_diff_args(fname, display_settings.factor) diff = lookup_data(session, session_id, cachekey, cache) From cfb07c395e3727d95c13d7aed70c33a8598c6336 Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Thu, 29 Apr 2021 12:01:26 +0100 Subject: [PATCH 27/37] update README.md --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 9ab07fb..ec7510a 100644 --- a/README.md +++ b/README.md @@ -59,21 +59,21 @@ Once you have installed `redis`, you will need to start the service by running: $ sudo service redis start ``` -You will also need to create a environment variable called `KEYDB_URL` with -the URL to connect to the redis server you just started on your machine: +Now you'll need to clone this repository, install the requirements and setup environment variables. +Please note that ConPlot requires at least `python 3.6`. ```bash -$ KEYDB_URL=redis://localhost:6379 +$ git clone https://github.com/rigdenlab/conplot +$ cd conplot +$ python3.6 -m pip install -r requirements.txt +$ echo "KEYDB_URL=redis://localhost:6379" > .env ``` -After this, all you need to do is clone this repository, install the requirements -and start the Flask development server on your machine. Please note that ConPlot -requires at least `python 3.6`. +With that last command you will also have created an environment variable called `KEYDB_URL` with +the URL to connect to the redis server you previously started on your machine. The only thing +left to do is to start the Flask development server on your machine: ```bash -$ git clone https://github.com/rigdenlab/conplot -$ cd conplot -$ python3.6 -m pip install -r requirements.txt $ python3.6 app.py ``` From aa4dc397d112b182a185b60da386019402b816d3 Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Thu, 29 Apr 2021 14:53:46 +0100 Subject: [PATCH 28/37] update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ec7510a..212a3a0 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ Please note that ConPlot requires at least `python 3.6`. $ git clone https://github.com/rigdenlab/conplot $ cd conplot $ python3.6 -m pip install -r requirements.txt -$ echo "KEYDB_URL=redis://localhost:6379" > .env +$ echo "KEYDB_URL=0://localhost:6379" > .env ``` With that last command you will also have created an environment variable called `KEYDB_URL` with From 17351bab6655eb012d5e7e3dd7f8c2093569262c Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Fri, 30 Apr 2021 15:29:33 +0100 Subject: [PATCH 29/37] control session timeout from environment variable --- README.md | 10 +++++++--- app.py | 5 +++-- layouts/help.py | 31 +++++++++++++++++++++---------- requirements.txt | 3 ++- 4 files changed, 33 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 212a3a0..72d7772 100644 --- a/README.md +++ b/README.md @@ -67,11 +67,15 @@ $ git clone https://github.com/rigdenlab/conplot $ cd conplot $ python3.6 -m pip install -r requirements.txt $ echo "KEYDB_URL=0://localhost:6379" > .env +$ echo "KEYDB_TIMEOUT=3600" >> .env ``` -With that last command you will also have created an environment variable called `KEYDB_URL` with -the URL to connect to the redis server you previously started on your machine. The only thing -left to do is to start the Flask development server on your machine: +With the last two commands you will also have created an `.env` file with a variable named +`KEYDB_URL` pointing to the redis server and a `KEYDB_TIMEOUT` variable with the session +timeout value. This is the time at which a session expires after inactivity. By default in +`www.conplot.org` this has a value of 3600 minutes, but if running locally you can set this +time to any other value. The only thing left to do is to start the Flask development +server on your machine: ```bash $ python3.6 app.py diff --git a/app.py b/app.py index f7dbf5f..7270a1a 100644 --- a/app.py +++ b/app.py @@ -24,7 +24,7 @@ def serve_layout(): except (keydb.ConnectionError, TypeError, KeyError) as e: app.logger.error('Redis connection error! {}'.format(e)) return layouts.RedisConnectionError() - session_id = session_utils.initiate_session(cache, app.logger) + session_id = session_utils.initiate_session(cache, app.logger, keydb_timeout) return layouts.Base(session_id) @@ -44,6 +44,7 @@ def serve_layout(): 'requests_pathname_prefix': '/conplot/', }) keydb_pool = keydb_utils.create_pool(os.environ.get('KEYDB_URL')) +keydb_timeout = os.environ.get('KEYDB_TIMEOUT') app.layout = serve_layout @@ -407,7 +408,7 @@ def javascript_exe_button(n_clicks, session_id): elif 'new-session' in trigger['prop_id'] or session_utils.is_expired_session(session_id, cache, app.logger): cache = keydb.KeyDB(connection_pool=keydb_pool) - new_session_id = session_utils.initiate_session(cache, app.logger) + new_session_id = session_utils.initiate_session(cache, app.logger, keydb_timeout) return "location.reload();", no_update, new_session_id else: diff --git a/layouts/help.py b/layouts/help.py index c50c693..f556e2e 100644 --- a/layouts/help.py +++ b/layouts/help.py @@ -24,7 +24,7 @@ def Body(cache): components.TutorialTwoModal(), components.TutorialThreeModal(), components.TutorialFourModal(), - #components.TutorialFiveModal(), + # components.TutorialFiveModal(), components.CustomFormatDescriptionModal(), dbc.Row([ dbc.Col([ @@ -262,19 +262,30 @@ def Body(cache): 'memory server used by ConPlot.']), dbc.Col([ html.Plaintext('$ sudo apt update\n$ sudo apt install redis-server\n$ sudo ' - 'service redis start\n$ KEYDB_URL=redis://localhost:6379') + 'service redis start') ], style={'background-color': '#EAEAEA'}, align='center'), - html.P('With the above commands you will have installed Redis and started the server. You ' - 'will also have created a environment variable called "KEYDB_URL" containing ' - 'the URL to connect to your redis server. ConPlot will need to read this ' - 'environment variable to access the redis database. After this, all you need to do ' - 'is clone ConPlot repository, install the requirements and start the Flask ' - 'development server on your machine. Please note that ConPlot requires at least ' - 'python 3.6 installed:'), + html.P('Once you have installed `redis`, you will need to start the service by running:'), + dbc.Col([ + html.Plaintext('$ sudo service redis start') + ], style={'background-color': '#EAEAEA'}, align='center'), + html.P('Now you will need to clone the repository, install the requirements and ' + 'setup environment variables. Please note that ConPlot requires at least ' + 'python 3.6.'), dbc.Col([ html.Plaintext('$ git clone https://github.com/rigdenlab/conplot\n' '$ cd conplot\n$ python3.6 -m pip install -r requirements\n$ ' - 'python3.6 app.py') + 'echo "KEYDB_URL=0://localhost:6379" > .env\n$ echo "KEYDB_TIME' + 'OUT=3600" >> .env') + ], style={'background-color': '#EAEAEA'}, align='center'), + html.P('With the last two commands you will also have created an .env file with a ' + 'variable named KEYDB_URL pointing to the redis server and a KEYDB_TIMEOUT ' + 'variable with the session timeout value. This is the time at which a session ' + 'expires after inactivity. By default in www.conplot.org this has a value of 3600 ' + 'minutes, but if running locally you can set this time to any other value. ' + 'The only thing left to do is to start the Flask development server on your ' + 'machine:'), + dbc.Col([ + html.Plaintext('$ python3.6 app.py') ], style={'background-color': '#EAEAEA'}, align='center'), html.P(['Now you will be able to access the app on ', html.A(html.U('http://127.0.0.1:8050/home'), diff --git a/requirements.txt b/requirements.txt index 9295cd6..5fffe4b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,4 +23,5 @@ numpy~=1.19.4 fast-enum~=1.3.0 scikit-learn~=0.24.1 numba~=0.53.1 -conkit~=0.12.0 \ No newline at end of file +conkit~=0.12.0 +python-dotenv~=0.17.1 \ No newline at end of file From 8e355647535257dce16adada1e30db3f1812a633 Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Sun, 2 May 2021 20:46:53 +0100 Subject: [PATCH 30/37] minor changes --- components/cards.py | 2 +- utils/plot_utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/components/cards.py b/components/cards.py index 1df39d8..0dc0b0d 100644 --- a/components/cards.py +++ b/components/cards.py @@ -346,7 +346,7 @@ def ColorPaletteSelectionCard(dataset, selected_palette): def HalfSquareSelectionCard(square_idx, selection, available_cmaps): - cmap_options = [{'label': '--- Empty ---', 'value': '---'}] + cmap_options = [{'label': '--- Empty ---', 'value': '--- Empty ---'}] cmap_options += [{'label': fname, 'value': fname} for fname in available_cmaps] return dbc.Card(components.HalfSquareSelector(square_idx, cmap_options, selection), outline=False) diff --git a/utils/plot_utils.py b/utils/plot_utils.py index 5ff8ff5..7ceadfa 100644 --- a/utils/plot_utils.py +++ b/utils/plot_utils.py @@ -173,8 +173,8 @@ def lookup_input_errors(session_id, session, cmap_selection, superimpose, heatma if superimpose and heatmap: reference_cmap = session[cmap_selection[0].encode()] predicted_cmap = session[cmap_selection[1].encode()] - error = no_update, components.InvalidSuperposeHeatmapModal(), no_update, no_update if not isinstance(reference_cmap[0], str) or not isinstance(predicted_cmap[0], str): + error = no_update, components.InvalidSuperposeHeatmapModal(), no_update, no_update return None, None, error return None From 1f66bb2a603bbf16b581a4a2b24654e030079188 Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Mon, 3 May 2021 13:06:03 +0100 Subject: [PATCH 31/37] use numpy when creating heatmap --- utils/heatmap_utils.py | 180 ++++++++++++++++-------------- utils/tests/test_heatmap_utils.py | 12 ++ 2 files changed, 110 insertions(+), 82 deletions(-) create mode 100644 utils/tests/test_heatmap_utils.py diff --git a/utils/heatmap_utils.py b/utils/heatmap_utils.py index 93bb9b3..53d5325 100644 --- a/utils/heatmap_utils.py +++ b/utils/heatmap_utils.py @@ -2,14 +2,24 @@ import numpy as np from utils import color_palettes, DistanceLabels, HoverTemplates +DISTANCE_BINS = {0: 0, 1: 5, 2: 7, 3: 9, 4: 11, 5: 13, 6: 15, 7: 17, 8: 19, 9: 20} + def init_heatmap(seq_length): shape = (seq_length + 1, seq_length + 1) - heat = np.zeros(shape).tolist() - hover = np.full(shape, None).tolist() + heat = np.zeros(shape) + hover = np.full(shape, None) return heat, hover +def get_array(cmap, seq_length): + array = np.full((seq_length + 1, seq_length + 1), 20) + for contact in cmap: + array[contact[0], contact[1]] = DISTANCE_BINS[contact[3]] + array[contact[1], contact[0]] = DISTANCE_BINS[contact[3]] + return array + + def create_heatmap(session, display_settings, verbose_labels): heat, hover = init_heatmap(display_settings.seq_length) for idx, fname in enumerate(display_settings.cmap_selection): @@ -19,23 +29,19 @@ def create_heatmap(session, display_settings, verbose_labels): palette_idx = [x.value for x in color_palettes.PaletteDefaultLayout].index(b'heatmap') colorscale = color_palettes.get_heatmap_colorscale(display_settings.selected_palettes[palette_idx]) - return heat, hover, colorscale + return heat.tolist(), hover.tolist(), colorscale def superimpose_heatmaps(session, display_settings, verbose_labels): - heat, hover = init_heatmap(display_settings.seq_length) - for idx, fname in enumerate(display_settings.cmap_selection): - if fname == '--- Empty ---': - continue - heat, hover = populate_superimposed_heatmap(session[display_settings.cmap_selection[0].encode()], - session[display_settings.cmap_selection[1].encode()], - heat, hover, verbose_labels) + heat, hover = create_superimposed_heatmap(session[display_settings.cmap_selection[0].encode()][1:], + session[display_settings.cmap_selection[1].encode()][1:], + display_settings.seq_length, verbose_labels) palette_idx = [x.value for x in color_palettes.PaletteDefaultLayout].index(b'heatmap') colorscale = color_palettes.get_heatmap_colorscale(display_settings.selected_palettes[palette_idx]) - return heat, hover, colorscale + return heat.tolist(), hover.tolist(), colorscale -def populate_heatmap(cmap, idx, distances, hover, verbose_labels=None): +def populate_heatmap(cmap, idx, heat, hover, verbose_labels=None): if idx == 1: idx_x = 1 idx_y = 0 @@ -43,92 +49,102 @@ def populate_heatmap(cmap, idx, distances, hover, verbose_labels=None): idx_x = 0 idx_y = 1 + hover_labels = [] + if cmap[0] == 'DISTO' or cmap[0] == 'PDB': cmap = cmap[1:] + cmap_array = np.array(cmap) + res_1 = cmap_array[:, idx_x] + res_1 = res_1.astype(int) + res_2 = cmap_array[:, idx_y] + res_2 = res_2.astype(int) + distances = cmap_array[:, 3] + scores = cmap_array[:, 4] + heat[res_1.astype(int), res_2.astype(int)] = 9 - distances if verbose_labels is not None: - for contact in cmap: - distances[contact[idx_x]][contact[idx_y]] = 9 - contact[3] - label = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(contact[3])) - hover_label = HoverTemplates.DISTOGRAM_VERBOSE.format(contact[idx_y], contact[idx_x], label, contact[4], - verbose_labels[contact[idx_y] - 1], - verbose_labels[contact[idx_x] - 1]) - hover[contact[idx_x]][contact[idx_y]] = hover_label + for x, y, distance, score in zip(res_1, res_2, distances.astype(int), scores): + label = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(distance)) + hover_label = HoverTemplates.DISTOGRAM_VERBOSE.format(y, x, label, score, verbose_labels[y - 1], + verbose_labels[x - 1]) + hover_labels.append(hover_label) + else: - for contact in cmap: - distances[contact[idx_x]][contact[idx_y]] = 9 - contact[3] - label = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(contact[3])) - hover_label = HoverTemplates.DISTOGRAM.format(contact[idx_y], contact[idx_x], label, contact[4]) - hover[contact[idx_x]][contact[idx_y]] = hover_label + for x, y, distance, score in zip(res_1, res_2, distances.astype(int), scores): + label = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(distance)) + hover_label = HoverTemplates.DISTOGRAM.format(y, x, label, score) + hover_labels.append(hover_label) + + hover[res_1.astype(int), res_2.astype(int)] = hover_labels - return distances, hover + return heat, hover + + cmap_array = np.array(cmap) + res_1 = cmap_array[:, idx_x] + res_1 = res_1.astype(int) + res_2 = cmap_array[:, idx_y] + res_2 = res_2.astype(int) + scores = cmap_array[:, 2] + heat[res_1, res_2] = scores if verbose_labels is None: - for contact in cmap: - distances[contact[idx_x]][contact[idx_y]] = contact[2] - hover_label = HoverTemplates.CMAP.format(contact[idx_y], contact[idx_x], contact[2]) - hover[contact[idx_x]][contact[idx_y]] = hover_label + for x, y, score in zip(res_1, res_2, scores): + hover_labels.append(HoverTemplates.CMAP.format(y, x, score)) else: - for contact in cmap: - distances[contact[idx_x]][contact[idx_y]] = contact[2] - hover_label = HoverTemplates.CMAP_VERBOSE.format(contact[idx_y], contact[idx_x], contact[2], - verbose_labels[contact[idx_y] - 1], - verbose_labels[contact[idx_x] - 1]) - hover[contact[idx_x]][contact[idx_y]] = hover_label + for x, y, score in zip(res_1, res_2, scores): + hover_label = HoverTemplates.CMAP_VERBOSE.format(y, x, score, verbose_labels[y - 1], verbose_labels[x - 1]) + hover_labels.append(hover_label) - return distances, hover + hover[res_1.astype(int), res_2.astype(int)] = hover_labels + return heat, hover -def populate_superimposed_heatmap(reference_cmap, secondary_cmap, heat, hover, verbose_labels=None): - idx_x = 1 - idx_y = 0 - reference_cmap = reference_cmap[1:] - secondary_cmap = secondary_cmap[1:] - predicted_set = {(x[0], x[1]): x[3] for x in secondary_cmap} + +def create_superimposed_heatmap(reference_cmap, predicted_cmap, seq_length, verbose_labels=None): + hover = np.full((seq_length + 1, seq_length + 1), None) + reference_array = get_array(reference_cmap, seq_length) + predicted_array = get_array(predicted_cmap, seq_length) + difference_heatmap = np.abs(reference_array - predicted_array) + predicted_set = {(x[0], x[1]): x[3] for x in predicted_cmap} + reference_set = {(x[0], x[1]): x[3] for x in reference_cmap} if verbose_labels is not None: - for reference_distance in reference_cmap: - residues = tuple(reference_distance[:2]) - predicted_bin = predicted_set[residues] if residues in predicted_set.keys() else 9 - reference_bin = reference_distance[3] - error = abs((9 - reference_bin) - (9 - predicted_bin)) - resid_y = reference_distance[idx_y] - resid_x = reference_distance[idx_x] - heat[resid_x][resid_y] = error - heat[resid_y][resid_x] = error - map_a_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(reference_bin)) - map_b_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(predicted_bin)) - hover_label_a = HoverTemplates.DISTOGRAM_SUPERIMPOSE_VERBOSE.format(resid_y, resid_x, map_a_distance, - map_b_distance, error, - verbose_labels[resid_y - 1], - verbose_labels[resid_x - 1]) - hover_label_b = HoverTemplates.DISTOGRAM_SUPERIMPOSE_VERBOSE.format(resid_x, resid_y, map_a_distance, - map_b_distance, error, - verbose_labels[resid_x - 1], - verbose_labels[resid_y - 1]) - hover[resid_x][resid_y] = hover_label_a - hover[resid_y][resid_x] = hover_label_b + for x in range(1, seq_length + 1): + for y in range(x + 5, seq_length + 1): + residues = (y, x) + predicted_bin = predicted_set[residues] if residues in predicted_set.keys() else 9 + reference_bin = reference_set[residues] if residues in reference_set.keys() else 9 + error = '{} Å'.format(difference_heatmap[x, y]) + map_a_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(reference_bin)) + map_b_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(predicted_bin)) + hover_label_a = HoverTemplates.DISTOGRAM_SUPERIMPOSE_VERBOSE.format(y, x, map_a_distance, + map_b_distance, error, + verbose_labels[y - 1], + verbose_labels[x - 1]) + hover_label_b = HoverTemplates.DISTOGRAM_SUPERIMPOSE_VERBOSE.format(x, y, map_a_distance, + map_b_distance, error, + verbose_labels[x - 1], + verbose_labels[y - 1]) + hover[x, y] = hover_label_a + hover[y, x] = hover_label_b else: - for reference_distance in reference_cmap: - residues = tuple(reference_distance[:2]) - predicted_bin = predicted_set[residues] if residues in predicted_set.keys() else 9 - reference_bin = reference_distance[3] - error = abs((9 - reference_bin) - (9 - predicted_bin)) - resid_y = reference_distance[idx_y] - resid_x = reference_distance[idx_x] - heat[resid_x][resid_y] = error - heat[resid_y][resid_x] = error - map_a_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(reference_bin)) - map_b_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(predicted_bin)) - hover_label_a = HoverTemplates.DISTOGRAM_SUPERIMPOSE.format(resid_y, resid_x, map_a_distance, - map_b_distance, error) - hover_label_b = HoverTemplates.DISTOGRAM_SUPERIMPOSE.format(resid_x, resid_y, map_a_distance, - map_b_distance, error) - hover[resid_x][resid_y] = hover_label_a - hover[resid_y][resid_x] = hover_label_b - - return heat, hover + for x in range(1, seq_length + 1): + for y in range(x + 5, seq_length + 1): + residues = (y, x) + predicted_bin = predicted_set[residues] if residues in predicted_set.keys() else 9 + reference_bin = reference_set[residues] if residues in reference_set.keys() else 9 + error = '{} Å'.format(difference_heatmap[x, y]) + map_a_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(reference_bin)) + map_b_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(predicted_bin)) + hover_label_a = HoverTemplates.DISTOGRAM_SUPERIMPOSE.format(y, x, map_a_distance, + map_b_distance, error) + hover_label_b = HoverTemplates.DISTOGRAM_SUPERIMPOSE.format(x, y, map_a_distance, + map_b_distance, error) + hover[x, y] = hover_label_a + hover[y, x] = hover_label_b + + return difference_heatmap, hover def create_heatmap_trace(distances, colorscale, hovertext=None): diff --git a/utils/tests/test_heatmap_utils.py b/utils/tests/test_heatmap_utils.py new file mode 100644 index 0000000..8418d1d --- /dev/null +++ b/utils/tests/test_heatmap_utils.py @@ -0,0 +1,12 @@ +import unittest +from utils import heatmap_utils + + +class HeatmapUtilsTestCase(unittest.TestCase): + + def test_1(self): + expected_heat = [[0, 0, 0], [0, 0, 0], [0, 0, 0]] + expected_hover = [[None, None, None], [None, None, None], [None, None, None]] + heat, hover = heatmap_utils.init_heatmap(2) + self.assertListEqual(expected_hover, hover.tolist()) + self.assertListEqual(expected_heat, heat.tolist()) From 2e3f99397e2e659faab6b8f59dfedb267b387070 Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Tue, 4 May 2021 10:46:41 +0100 Subject: [PATCH 32/37] add a3m format to the help page --- components/listgrpoups.py | 9 +++++++++ utils/__init__.py | 1 + 2 files changed, 10 insertions(+) diff --git a/components/listgrpoups.py b/components/listgrpoups.py index cbea080..cbf0fce 100644 --- a/components/listgrpoups.py +++ b/components/listgrpoups.py @@ -326,6 +326,15 @@ def AdditionalFormatsHelpList(): html.A(html.U('here'), href=UrlIndex.CONSURF_CITATION.value), '.'], style={"font-size": "110%", 'text-align': "justify"}), + html.Li(['A3M file. This is a multiple sequence alignment file that should have been obtained using the ' + 'sequence of interest as a query. ConPlot will parse the file and calculate the MSA coverage along ' + 'the query sequence, normalise these values (1-10) and create a track where each residue ' + 'is coloured according to the number of sequences aligned in that particular position These ' + 'files are used in most contact prediction pipelines, and visualising the MSA coverage can help you ' + 'understand the quality of the information used to obtain your predictions. Several alignment tools ' + 'will create MSA files in this format, like for example HHBLITS, which you can use ' + 'online ', html.A(html.U('here'), href=UrlIndex.HHBLITS_URL.value), '.'], + style={"font-size": "110%", 'text-align': "justify"}), html.Li(['CUSTOM file. These files are plain text files that can be created manually ' 'by users to include additional tracks of information to the plot. These ' 'files enable limitless personalisation of the contact map plot, as it ' diff --git a/utils/__init__.py b/utils/__init__.py index 47a0c7f..77b2fd1 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -106,6 +106,7 @@ class UrlIndex(Enum): IUPRED_CITATION = 'https://doi.org/10.1093/nar/gky384' CONSURF_WEB = 'https://consurf.tau.ac.il/' CONSURF_CITATION = 'https://doi.org/10.1093/nar/gkw408' + HHBLITS_URL = 'https://toolkit.tuebingen.mpg.de/tools/hhblits' GDPR_WEBSITE = 'https://gdpr-info.eu' DOCKER_HUB = 'https://hub.docker.com/r/filosanrod/conplot' CONPLOT_DOCKER = 'https://github.com/rigdenlab/conplot-docker' From dacc5919ec04eb6bb34832bbdd1f9486a799e2bd Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Tue, 4 May 2021 10:59:19 +0100 Subject: [PATCH 33/37] minor change to the help page --- components/listgrpoups.py | 66 +++++++++++++++++---------------------- layouts/help.py | 5 +++ 2 files changed, 33 insertions(+), 38 deletions(-) diff --git a/components/listgrpoups.py b/components/listgrpoups.py index cbf0fce..66cd042 100644 --- a/components/listgrpoups.py +++ b/components/listgrpoups.py @@ -191,48 +191,39 @@ def AdjustPlotHelpList(): 'series of input menus:', html.Ul([ html.Li(['L/N selector: Change the values of ', html.I('N'), - ' with this selector to choose how many contacts should be ' - 'included in the plot (L is the number of residues in the ' - 'protein sequence, residues are sorted by their probability ' - 'score). If you set ', html.I('N'), - ' to 0, then all contacts in the file will be displayed. Please ' - 'note that only numerical values between 0 and 10 are recommended.']), - html.Li('Size selector: Change the size of the contact markers in the ' - 'plot. ConPlot will set a default value depending on the size of ' - 'the protein you are working with, but you can still change this ' - 'if you would like to make the markers smaller or bigger. Please ' + ' with this selector to choose how many contacts should be included in the plot (L is ' + 'the number of residues in the protein sequence, residues are sorted by their ' + 'probability score). If you set ', html.I('N'), + ' to 0, then all contacts in the file will be displayed. Please note that only numerical ' + 'values between 0 and 10 are recommended. Additionally, please remember that contact ' + 'data shown for PDB files is unaltered by this selector.']), + html.Li('Size selector: Change the size of the contact markers in the plot. ConPlot will set a ' + 'default value depending on the size of the protein you are working with, but you can ' + 'still change this if you would like to make the markers smaller or bigger. Please ' 'note that only numerical values between 1 and 15 are recommended.'), - html.Li(['Map A and Map B selectors: These two selectors let you choose ' - 'which contact data should be displayed on the plot. By ' - 'default, ', html.I('Map A'), - ' refers to the top half triangle of the map, and ', - html.I('Map B'), ' to the lower one. If the ', - html.I('Superimpose Maps'), - ' switch is activated, then the roles of these two dropdown ' - 'menus change: ', html.I('Map A'), - ' is now used to select the reference map, which will be ' - 'compared with the secondary map selected with the ', - html.I('Map B'), ' selector.']), - html.Li(['Superimpose Maps Switch: As explained above, if this switch ' - 'is activated ', html.I('Map A'), - ' will be used as a reference map to be compared with ', - html.I('Map B'), - '. In this mode, contacts will be coloured according to their ' - 'presence in the reference map and the secondary map. Contacts ' - 'that appear on both the reference and the secondary map will be ' - 'coloured in black -match-, those that only appear in the ' - 'reference in grey -absent-, and those that only appear in the ' - 'secondary map in red -mismatch-. Please note that you can only ' - 'use this mode if you select two different contact map files in ', + html.Li(['Map A and Map B selectors: These two selectors let you choose which contact data should ' + 'be displayed on the plot. By ' 'default, ', html.I('Map A'), + ' refers to the top half triangle of the map, and ', html.I('Map B'), + ' to the lower one. If the ', html.I('Superimpose Maps'), + ' switch is activated, then these roles change: ', html.I('Map A'), + ' is now used to select the reference map, which will be compared with the secondary map ' + 'selected with the ', html.I('Map B'), ' selector.']), + html.Li(['Superimpose Maps Switch: As explained above, if this switch is activated ', + html.I('Map A'), ' will be used as a reference map to be compared with ', html.I('Map B'), + '. In this mode, contacts will be coloured according to their presence in the reference ' + 'map and the secondary map. Contacts that appear on both the reference and the secondary ' + 'map will be coloured in black -match-, those that only appear in the reference in grey ' + '-absent-, and those that only appear in the secondary map in red -mismatch-. Please ' + 'note that you can only use this mode if you select two different contact map files in ', html.I('Map A'), ' and ', html.I('Map B'), ' selectors.']), html.Li(['Create Heatmap Switch: If this switch is activated, a heatmap will be created with the ' 'provided residue contact information. By default, if a contact map is uploaded, the ' 'intensity of the colours in this heatmap will correspond with the confidence of each ' 'contact. Alternatively, if a residue-residue distance prediction file has been uploaded ' '(', html.I('CASPRR_MODE2'), - ' format), the heatmap will correspond with the predicted distances for ' - 'each residue pair oin this file. Please note that when this mode is active, the ', - html.I('L/N'), ' selector and the ', html.I('Size'), + ' format), the heatmap will correspond with the predicted distances for each residue ' + 'pair oin this file. Please note that when this mode is active, the ', html.I('L/N'), + ' selector and the ', html.I('Size'), ' selector will be disabled. You can read more about how to visualise residue-residue ' 'distance predictions at ', html.I('Tutorial 4. Residue-Residue distance predictions'), '.']), @@ -242,9 +233,8 @@ def AdjustPlotHelpList(): 'would normally be displayed.') ])], style={"font-size": "110%", 'text-align': "justify"}), - html.Li(['Section 2: Adjust additional tracks. In this section you will find selectors ' - 'that will let you control aspects about how the additional tracks are being ' - 'displayed in the plot:', + html.Li(['Section 2: Adjust additional tracks. In this section you will find selectors that will let you ' + 'control aspects about how the additional tracks are being displayed in the plot:', html.Ul([ html.Li('Size selector: Change the size of the tiles used to create the ' 'tracks on the diagonal of the plot. By changing this value, ' diff --git a/layouts/help.py b/layouts/help.py index f556e2e..36735e8 100644 --- a/layouts/help.py +++ b/layouts/help.py @@ -168,6 +168,11 @@ def Body(cache): dbc.Alert(['TIPS: ', html.Ul([ html.Br(), + html.Li(['Remember that the ', html.I('L/N'), + ' selector will not affect any data being shown for PDB files. ' + 'Similarly, data will also not be affected if the ', + html.I('Create heatmap'), ' switch is turned on.' + ]), html.Li(['If you have just created a plot with the ', html.I('Generate Plot'), ' button and you can see individual squared tiles in the diagonal ' From 49b5d091d2e8623ad9291caa1937262c4eb187ea Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Thu, 6 May 2021 09:41:10 +0100 Subject: [PATCH 34/37] if looking for diff between distance predictions cache should not care about L factor --- utils/tracks_utils.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/utils/tracks_utils.py b/utils/tracks_utils.py index 77242b2..461ff08 100644 --- a/utils/tracks_utils.py +++ b/utils/tracks_utils.py @@ -57,11 +57,16 @@ def calculate_diff(cmap_1, cmap_2, display_settings): return get_cmap_mcc(cmap_1, cmap_2, display_settings.seq_length) -def get_diff_args(fname, factor): - cmap_1 = fname.split('|')[0].rstrip().lstrip() - cmap_2 = fname.split('|')[1].rstrip().lstrip() - # TODO: If looking for diff for distance predictions, we also don't care about L factor (using all) - cachekey = cache_utils.CacheKeys.CMAP_DIFF.value.format(cmap_1, cmap_2, factor).encode() +def get_diff_args(session, fname, factor): + cmap_1_fname = fname.split('|')[0].rstrip().lstrip() + cmap_1 = session[cmap_1_fname.encode()] + cmap_2_fname = fname.split('|')[1].rstrip().lstrip() + cmap_2 = session[cmap_2_fname.encode()] + if cmap_utils.contains_distances(cmap_1) and cmap_utils.contains_distances(cmap_2): + cachekey = cache_utils.CacheKeys.CMAP_DIFF.value.format(cmap_1_fname, cmap_2_fname, '1').encode() + else: + cachekey = cache_utils.CacheKeys.CMAP_DIFF.value.format(cmap_1_fname, cmap_2_fname, factor).encode() + return cmap_1, cmap_2, cachekey @@ -79,11 +84,9 @@ def get_dataset_prediction(session_id, session, fname, display_settings, cache): return DatasetReference.CONTACT_DENSITY.value, density if cache_utils.MetadataTags.SEPARATOR.value in fname: - cmap_1, cmap_2, cachekey = get_diff_args(fname, display_settings.factor) + cmap_1, cmap_2, cachekey = get_diff_args(session, fname, display_settings.factor) diff = lookup_data(session, session_id, cachekey, cache) if not diff: - cmap_1 = session[cmap_1.encode()] - cmap_2 = session[cmap_2.encode()] diff = calculate_diff(cmap_1, cmap_2, display_settings) cache_utils.store_data(session_id, cachekey, diff, cache_utils.CacheKeys.CONTACT_DIFF.value, cache) return DatasetReference.CONTACT_DIFF.value, diff From b32f4887c76a227c7a6ca4b876da5118818910c2 Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Thu, 6 May 2021 10:01:13 +0100 Subject: [PATCH 35/37] rmsd scale from 0-5 --- utils/math_utils.py | 4 ++-- utils/tests/test_tracks_utils.py | 4 ++-- utils/tracks_utils.py | 7 +++++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/utils/math_utils.py b/utils/math_utils.py index 919871a..973bc9e 100644 --- a/utils/math_utils.py +++ b/utils/math_utils.py @@ -28,8 +28,8 @@ def get_difference(expected, observed): @vectorize('float64(int64, float64)') def populate_rmsd(seq_length, sum_squared_differences): rmsd = math.sqrt(sum_squared_differences / seq_length) - if rmsd > 10: - return 10 + if rmsd > 5: + return 5 return rmsd diff --git a/utils/tests/test_tracks_utils.py b/utils/tests/test_tracks_utils.py index 0cfa123..8d12b65 100644 --- a/utils/tests/test_tracks_utils.py +++ b/utils/tests/test_tracks_utils.py @@ -99,8 +99,8 @@ def test_4(self): [4, 3, 0, 0] ] - expected = [7, 4, 10, 10] - expected_smooth = [2, 4, 6, 6, 5] + expected = [5, 4, 5, 5] + expected_smooth = [4, 6, 8, 8, 6] output = tracks_utils.get_cmap_rmsd(cmap_1, cmap_2, 4, smooth=False) output_smooth = tracks_utils.get_cmap_rmsd(cmap_1, cmap_2, 4, smooth=True) self.assertListEqual(output, expected) diff --git a/utils/tracks_utils.py b/utils/tracks_utils.py index 461ff08..a5ad32f 100644 --- a/utils/tracks_utils.py +++ b/utils/tracks_utils.py @@ -44,8 +44,11 @@ def get_cmap_rmsd(cmap_1, cmap_2, seq_length, smooth=True): cmap_2_array = get_distance_array(cmap_2, seq_length) rmsd = math_utils.calculate_rmsd(cmap_1_array, cmap_2_array, seq_length) if smooth: - return math_utils.convolution_smooth_values(rmsd).astype(int).tolist() - return np.round(rmsd, 0).astype(int).tolist() + rmsd = math_utils.convolution_smooth_values(rmsd) * 2 + return rmsd.astype(int).tolist() + else: + rmsd = np.round(rmsd, 0) * 2 + return rmsd.astype(int).tolist() def calculate_diff(cmap_1, cmap_2, display_settings): From 4462713f9ac3c28cc68a618d13cec63cfa405553 Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Thu, 6 May 2021 10:23:13 +0100 Subject: [PATCH 36/37] increase contrast hydrophobicity track --- CHANGELOG.rst | 1 + utils/color_palettes.py | 71 +++++++++++++++++++------------- utils/tests/test_tracks_utils.py | 2 +- 3 files changed, 45 insertions(+), 29 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 77dd7eb..c18258b 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -13,6 +13,7 @@ Changed ~~~~~ - Increased contrast in sequence hydrophobicity color palettes - Use AMISE to estimate bandwidth required to calculate contact density +- Updated track selector layout 0.4 diff --git a/utils/color_palettes.py b/utils/color_palettes.py index ca211fe..b189738 100644 --- a/utils/color_palettes.py +++ b/utils/color_palettes.py @@ -461,37 +461,52 @@ class Diff_ColorPalettes(Enum): PALETTE_5 = Diff_Hot -class Hydrophobicity_BlueGreyColorPalette(Enum): - HYDROPATHY_10 = 'rgba(66,138,245,{})' - HYDROPATHY_9 = 'rgba(72,137,234,{})' - HYDROPATHY_8 = 'rgba(79,136,222,{})' - HYDROPATHY_7 = 'rgba(85,136,211,{})' - HYDROPATHY_6 = 'rgba(92,135,199,{})' - HYDROPATHY_5 = 'rgba(98,134,188,{})' - HYDROPATHY_4 = 'rgba(104,133,176,{})' - HYDROPATHY_3 = 'rgba(111,132,165,{})' - HYDROPATHY_2 = 'rgba(117,132,153,{})' - HYDROPATHY_1 = 'rgba(124,131,142,{})' - HYDROPATHY_0 = 'rgba(130,130,130,{})' - - -class Hydrophobicity_GreenGreyColorPalette(Enum): - HYDROPATHY_10 = 'rgba(59,237,74,{})' - HYDROPATHY_9 = 'rgba(66,226,80,{})' - HYDROPATHY_8 = 'rgba(73,216,85,{})' - HYDROPATHY_7 = 'rgba(80,205,91,{})' - HYDROPATHY_6 = 'rgba(87,194,96,{})' - HYDROPATHY_5 = 'rgba(95,184,102,{})' - HYDROPATHY_4 = 'rgba(102,173,108,{})' - HYDROPATHY_3 = 'rgba(109,162,113,{})' - HYDROPATHY_2 = 'rgba(116,151,119,{})' - HYDROPATHY_1 = 'rgba(123,141,124,{})' - HYDROPATHY_0 = 'rgba(130,130,130,{})' +class Hydrophobicity_BlueColorPalette(Enum): + HYDROPATHY_10 = sequential.ice[1].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_9 = sequential.ice[1].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_8 = sequential.ice[2].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_7 = sequential.ice[3].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_6 = sequential.ice[4].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_5 = sequential.ice[5].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_4 = sequential.ice[6].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_3 = sequential.ice[7].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_2 = sequential.ice[8].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_1 = sequential.ice[9].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_0 = sequential.ice[9].replace(')', ', {})').replace('rgb', 'rgba') + + +class Hydrophobicity_GreenColorPalette(Enum): + HYDROPATHY_10 = 'rgba(8, 28, 21,{})' + HYDROPATHY_9 = 'rgba(8, 28, 21,{})' + HYDROPATHY_8 = 'rgba(27, 67, 50,{})' + HYDROPATHY_7 = 'rgba(45, 106, 79,{})' + HYDROPATHY_6 = 'rgba(64, 145, 108,{})' + HYDROPATHY_5 = 'rgba(82, 183, 136,{})' + HYDROPATHY_4 = 'rgba(116, 198, 157,{})' + HYDROPATHY_3 = 'rgba(149, 213, 178,{})' + HYDROPATHY_2 = 'rgba(183, 228, 199,{})' + HYDROPATHY_1 = 'rgba(216, 243, 220,{})' + HYDROPATHY_0 = 'rgba(216, 243, 220,{})' + + +class Hydrophobicity_RedColorPalette(Enum): + HYDROPATHY_10 = sequential.amp[9].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_9 = sequential.amp[9].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_8 = sequential.amp[8].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_7 = sequential.amp[7].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_6 = sequential.amp[6].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_5 = sequential.amp[5].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_4 = sequential.amp[4].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_3 = sequential.amp[3].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_2 = sequential.amp[2].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_1 = sequential.amp[1].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_0 = sequential.amp[1].replace(')', ', {})').replace('rgb', 'rgba') class HydrophobicityColorPalettes(Enum): - PALETTE_1 = Hydrophobicity_BlueGreyColorPalette - PALETTE_2 = Hydrophobicity_GreenGreyColorPalette + PALETTE_1 = Hydrophobicity_BlueColorPalette + PALETTE_2 = Hydrophobicity_RedColorPalette + PALETTE_3 = Hydrophobicity_GreenColorPalette class DatasetColorPalettes(Enum): diff --git a/utils/tests/test_tracks_utils.py b/utils/tests/test_tracks_utils.py index 8d12b65..c14cdca 100644 --- a/utils/tests/test_tracks_utils.py +++ b/utils/tests/test_tracks_utils.py @@ -99,7 +99,7 @@ def test_4(self): [4, 3, 0, 0] ] - expected = [5, 4, 5, 5] + expected = [10, 8, 10, 10] expected_smooth = [4, 6, 8, 8, 6] output = tracks_utils.get_cmap_rmsd(cmap_1, cmap_2, 4, smooth=False) output_smooth = tracks_utils.get_cmap_rmsd(cmap_1, cmap_2, 4, smooth=True) From a3637256a3c8015f6b3fc1c810f3aa3271b1f72f Mon Sep 17 00:00:00 2001 From: FilomenoSanchez Date: Thu, 6 May 2021 10:27:50 +0100 Subject: [PATCH 37/37] small change to help page --- components/listgrpoups.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/components/listgrpoups.py b/components/listgrpoups.py index 66cd042..853b0d8 100644 --- a/components/listgrpoups.py +++ b/components/listgrpoups.py @@ -219,9 +219,8 @@ def AdjustPlotHelpList(): html.Li(['Create Heatmap Switch: If this switch is activated, a heatmap will be created with the ' 'provided residue contact information. By default, if a contact map is uploaded, the ' 'intensity of the colours in this heatmap will correspond with the confidence of each ' - 'contact. Alternatively, if a residue-residue distance prediction file has been uploaded ' - '(', html.I('CASPRR_MODE2'), - ' format), the heatmap will correspond with the predicted distances for each residue ' + 'contact. Alternatively, if a residue-residue distance prediction file has been ' + 'uploaded, the heatmap will correspond with the predicted distances for each residue ' 'pair oin this file. Please note that when this mode is active, the ', html.I('L/N'), ' selector and the ', html.I('Size'), ' selector will be disabled. You can read more about how to visualise residue-residue '