diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d701079..65e3378 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -39,6 +39,7 @@ jobs: redis-version: 5 - name: Run tests.py env: + THIS_IS_GH_ACTIONS: 1 KEYDB_URL: $ {{ secrets.KEYDB_URL }} run: | python tests.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 9913c32..c18258b 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,20 @@ Changelog ========= +0.4.1 +----- + +Added +~~~~~ +- Added contact diff track with smoothed values (MCC for contacts and RMSD for distograms) + +Changed +~~~~~ +- Increased contrast in sequence hydrophobicity color palettes +- Use AMISE to estimate bandwidth required to calculate contact density +- Updated track selector layout + + 0.4 ----- diff --git a/README.md b/README.md index 9ab07fb..72d7772 100644 --- a/README.md +++ b/README.md @@ -59,21 +59,25 @@ Once you have installed `redis`, you will need to start the service by running: $ sudo service redis start ``` -You will also need to create a environment variable called `KEYDB_URL` with -the URL to connect to the redis server you just started on your machine: +Now you'll need to clone this repository, install the requirements and setup environment variables. +Please note that ConPlot requires at least `python 3.6`. ```bash -$ KEYDB_URL=redis://localhost:6379 +$ git clone https://github.com/rigdenlab/conplot +$ cd conplot +$ python3.6 -m pip install -r requirements.txt +$ echo "KEYDB_URL=0://localhost:6379" > .env +$ echo "KEYDB_TIMEOUT=3600" >> .env ``` -After this, all you need to do is clone this repository, install the requirements -and start the Flask development server on your machine. Please note that ConPlot -requires at least `python 3.6`. +With the last two commands you will also have created an `.env` file with a variable named +`KEYDB_URL` pointing to the redis server and a `KEYDB_TIMEOUT` variable with the session +timeout value. This is the time at which a session expires after inactivity. By default in +`www.conplot.org` this has a value of 3600 minutes, but if running locally you can set this +time to any other value. The only thing left to do is to start the Flask development +server on your machine: ```bash -$ git clone https://github.com/rigdenlab/conplot -$ cd conplot -$ python3.6 -m pip install -r requirements.txt $ python3.6 app.py ``` diff --git a/app.py b/app.py index 88af642..7270a1a 100644 --- a/app.py +++ b/app.py @@ -24,7 +24,7 @@ def serve_layout(): except (keydb.ConnectionError, TypeError, KeyError) as e: app.logger.error('Redis connection error! {}'.format(e)) return layouts.RedisConnectionError() - session_id = session_utils.initiate_session(cache, app.logger) + session_id = session_utils.initiate_session(cache, app.logger, keydb_timeout) return layouts.Base(session_id) @@ -44,6 +44,7 @@ def serve_layout(): 'requests_pathname_prefix': '/conplot/', }) keydb_pool = keydb_utils.create_pool(os.environ.get('KEYDB_URL')) +keydb_timeout = os.environ.get('KEYDB_TIMEOUT') app.layout = serve_layout @@ -407,7 +408,7 @@ def javascript_exe_button(n_clicks, session_id): elif 'new-session' in trigger['prop_id'] or session_utils.is_expired_session(session_id, cache, app.logger): cache = keydb.KeyDB(connection_pool=keydb_pool) - new_session_id = session_utils.initiate_session(cache, app.logger) + new_session_id = session_utils.initiate_session(cache, app.logger, keydb_timeout) return "location.reload();", no_update, new_session_id else: @@ -454,7 +455,7 @@ def create_ConPlot(plot_click, refresh_click, factor, contact_marker_size, track if any([True for x in (factor, contact_marker_size, track_marker_size, track_separation) if x is None or x < 0]): app.logger.info('Session {} invalid display control value detected'.format(session_id)) return no_update, components.InvalidInputModal(), no_update, no_update - elif superimpose and ('---' in cmap_selection or len(set(cmap_selection)) == 1): + elif superimpose and ('--- Empty ---' in cmap_selection or len(set(cmap_selection)) == 1): return no_update, components.InvalidMapSelectionModal(), no_update, no_update app.logger.info('Session {} creating conplot'.format(session_id)) diff --git a/components/__init__.py b/components/__init__.py index ef69f27..b4c52cf 100644 --- a/components/__init__.py +++ b/components/__init__.py @@ -11,6 +11,7 @@ class UserReadableTrackNames(Enum): heatmap = 'Heatmap' hydrophobicity = 'Hydrophobicity' density = 'Contact density' + diff = 'Contact diff' class EmailIssueReference(Enum): diff --git a/components/cards.py b/components/cards.py index 0e7d141..0dc0b0d 100644 --- a/components/cards.py +++ b/components/cards.py @@ -1,3 +1,4 @@ +from utils import cache_utils import components import dash_core_components as dcc import dash_bootstrap_components as dbc @@ -286,23 +287,23 @@ def DisplayControlCard(available_tracks=None, selected_tracks=None, selected_cma html.H5("Active tracks", className="card-text", style={'text-align': "center"}), html.Hr(), html.Br(), - TrackSelectionCard('-4', selected_tracks[0], available_tracks=available_tracks), + dbc.Card(components.TrackLayoutSelector('-4', available_tracks, selected_tracks[0]), outline=False), html.Br(), - TrackSelectionCard('-3', selected_tracks[1], available_tracks=available_tracks), + dbc.Card(components.TrackLayoutSelector('-3', available_tracks, selected_tracks[1]), outline=False), html.Br(), - TrackSelectionCard('-2', selected_tracks[2], available_tracks=available_tracks), + dbc.Card(components.TrackLayoutSelector('-2', available_tracks, selected_tracks[2]), outline=False), html.Br(), - TrackSelectionCard('-1', selected_tracks[3], available_tracks=available_tracks), + dbc.Card(components.TrackLayoutSelector('-1', available_tracks, selected_tracks[3]), outline=False), html.Br(), - TrackSelectionCard(' 0', selected_tracks[4], available_tracks=available_tracks), + dbc.Card(components.TrackLayoutSelector('0', available_tracks, selected_tracks[4]), outline=False), html.Br(), - TrackSelectionCard('+1', selected_tracks[5], available_tracks=available_tracks), + dbc.Card(components.TrackLayoutSelector('+1', available_tracks, selected_tracks[5]), outline=False), html.Br(), - TrackSelectionCard('+2', selected_tracks[6], available_tracks=available_tracks), + dbc.Card(components.TrackLayoutSelector('+2', available_tracks, selected_tracks[6]), outline=False), html.Br(), - TrackSelectionCard('+3', selected_tracks[7], available_tracks=available_tracks), + dbc.Card(components.TrackLayoutSelector('+3', available_tracks, selected_tracks[7]), outline=False), html.Br(), - TrackSelectionCard('+4', selected_tracks[8], available_tracks=available_tracks), + dbc.Card(components.TrackLayoutSelector('+4', available_tracks, selected_tracks[8]), outline=False), html.Br(), html.Br(), html.H5("Colour palettes", className="card-text", style={'text-align': "center"}), @@ -310,21 +311,23 @@ def DisplayControlCard(available_tracks=None, selected_tracks=None, selected_cma html.Br(), ColorPaletteSelectionCard('density', selected_palettes[0]), html.Br(), - ColorPaletteSelectionCard('custom', selected_palettes[1]), + ColorPaletteSelectionCard('diff', selected_palettes[1]), html.Br(), - ColorPaletteSelectionCard('heatmap', selected_palettes[2]), + ColorPaletteSelectionCard('custom', selected_palettes[2]), html.Br(), - ColorPaletteSelectionCard('hydrophobicity', selected_palettes[3]), + ColorPaletteSelectionCard('heatmap', selected_palettes[3]), html.Br(), - ColorPaletteSelectionCard('membranetopology', selected_palettes[4]), + ColorPaletteSelectionCard('hydrophobicity', selected_palettes[4]), html.Br(), - ColorPaletteSelectionCard('msa', selected_palettes[5]), + ColorPaletteSelectionCard('membranetopology', selected_palettes[5]), html.Br(), - ColorPaletteSelectionCard('conservation', selected_palettes[6]), + ColorPaletteSelectionCard('msa', selected_palettes[6]), html.Br(), - ColorPaletteSelectionCard('disorder', selected_palettes[7]), + ColorPaletteSelectionCard('conservation', selected_palettes[7]), html.Br(), - ColorPaletteSelectionCard('secondarystructure', selected_palettes[8]), + ColorPaletteSelectionCard('disorder', selected_palettes[8]), + html.Br(), + ColorPaletteSelectionCard('secondarystructure', selected_palettes[9]), html.Br(), ]) ] @@ -334,14 +337,6 @@ def DisplayControlCard(available_tracks=None, selected_tracks=None, selected_cma else: raise ValueError('This should not occur! Please report.') - -def TrackSelectionCard(track_idx, track_value, available_tracks): - track_options = [{'label': '---', 'value': '---'}] - track_options += [{'label': fname, 'value': fname} for fname in available_tracks] - - return dbc.Card(components.TrackLayoutSelector(track_idx, track_options, track_value), outline=False) - - def ColorPaletteSelectionCard(dataset, selected_palette): available_palettes = [] for palette in color_palettes.DatasetColorPalettes.__getattr__(dataset).value: @@ -351,7 +346,7 @@ def ColorPaletteSelectionCard(dataset, selected_palette): def HalfSquareSelectionCard(square_idx, selection, available_cmaps): - cmap_options = [{'label': '---', 'value': '---'}] + cmap_options = [{'label': '--- Empty ---', 'value': '--- Empty ---'}] cmap_options += [{'label': fname, 'value': fname} for fname in available_cmaps] return dbc.Card(components.HalfSquareSelector(square_idx, cmap_options, selection), outline=False) diff --git a/components/listgrpoups.py b/components/listgrpoups.py index cbea080..853b0d8 100644 --- a/components/listgrpoups.py +++ b/components/listgrpoups.py @@ -191,48 +191,38 @@ def AdjustPlotHelpList(): 'series of input menus:', html.Ul([ html.Li(['L/N selector: Change the values of ', html.I('N'), - ' with this selector to choose how many contacts should be ' - 'included in the plot (L is the number of residues in the ' - 'protein sequence, residues are sorted by their probability ' - 'score). If you set ', html.I('N'), - ' to 0, then all contacts in the file will be displayed. Please ' - 'note that only numerical values between 0 and 10 are recommended.']), - html.Li('Size selector: Change the size of the contact markers in the ' - 'plot. ConPlot will set a default value depending on the size of ' - 'the protein you are working with, but you can still change this ' - 'if you would like to make the markers smaller or bigger. Please ' + ' with this selector to choose how many contacts should be included in the plot (L is ' + 'the number of residues in the protein sequence, residues are sorted by their ' + 'probability score). If you set ', html.I('N'), + ' to 0, then all contacts in the file will be displayed. Please note that only numerical ' + 'values between 0 and 10 are recommended. Additionally, please remember that contact ' + 'data shown for PDB files is unaltered by this selector.']), + html.Li('Size selector: Change the size of the contact markers in the plot. ConPlot will set a ' + 'default value depending on the size of the protein you are working with, but you can ' + 'still change this if you would like to make the markers smaller or bigger. Please ' 'note that only numerical values between 1 and 15 are recommended.'), - html.Li(['Map A and Map B selectors: These two selectors let you choose ' - 'which contact data should be displayed on the plot. By ' - 'default, ', html.I('Map A'), - ' refers to the top half triangle of the map, and ', - html.I('Map B'), ' to the lower one. If the ', - html.I('Superimpose Maps'), - ' switch is activated, then the roles of these two dropdown ' - 'menus change: ', html.I('Map A'), - ' is now used to select the reference map, which will be ' - 'compared with the secondary map selected with the ', - html.I('Map B'), ' selector.']), - html.Li(['Superimpose Maps Switch: As explained above, if this switch ' - 'is activated ', html.I('Map A'), - ' will be used as a reference map to be compared with ', - html.I('Map B'), - '. In this mode, contacts will be coloured according to their ' - 'presence in the reference map and the secondary map. Contacts ' - 'that appear on both the reference and the secondary map will be ' - 'coloured in black -match-, those that only appear in the ' - 'reference in grey -absent-, and those that only appear in the ' - 'secondary map in red -mismatch-. Please note that you can only ' - 'use this mode if you select two different contact map files in ', + html.Li(['Map A and Map B selectors: These two selectors let you choose which contact data should ' + 'be displayed on the plot. By ' 'default, ', html.I('Map A'), + ' refers to the top half triangle of the map, and ', html.I('Map B'), + ' to the lower one. If the ', html.I('Superimpose Maps'), + ' switch is activated, then these roles change: ', html.I('Map A'), + ' is now used to select the reference map, which will be compared with the secondary map ' + 'selected with the ', html.I('Map B'), ' selector.']), + html.Li(['Superimpose Maps Switch: As explained above, if this switch is activated ', + html.I('Map A'), ' will be used as a reference map to be compared with ', html.I('Map B'), + '. In this mode, contacts will be coloured according to their presence in the reference ' + 'map and the secondary map. Contacts that appear on both the reference and the secondary ' + 'map will be coloured in black -match-, those that only appear in the reference in grey ' + '-absent-, and those that only appear in the secondary map in red -mismatch-. Please ' + 'note that you can only use this mode if you select two different contact map files in ', html.I('Map A'), ' and ', html.I('Map B'), ' selectors.']), html.Li(['Create Heatmap Switch: If this switch is activated, a heatmap will be created with the ' 'provided residue contact information. By default, if a contact map is uploaded, the ' 'intensity of the colours in this heatmap will correspond with the confidence of each ' - 'contact. Alternatively, if a residue-residue distance prediction file has been uploaded ' - '(', html.I('CASPRR_MODE2'), - ' format), the heatmap will correspond with the predicted distances for ' - 'each residue pair oin this file. Please note that when this mode is active, the ', - html.I('L/N'), ' selector and the ', html.I('Size'), + 'contact. Alternatively, if a residue-residue distance prediction file has been ' + 'uploaded, the heatmap will correspond with the predicted distances for each residue ' + 'pair oin this file. Please note that when this mode is active, the ', html.I('L/N'), + ' selector and the ', html.I('Size'), ' selector will be disabled. You can read more about how to visualise residue-residue ' 'distance predictions at ', html.I('Tutorial 4. Residue-Residue distance predictions'), '.']), @@ -242,9 +232,8 @@ def AdjustPlotHelpList(): 'would normally be displayed.') ])], style={"font-size": "110%", 'text-align': "justify"}), - html.Li(['Section 2: Adjust additional tracks. In this section you will find selectors ' - 'that will let you control aspects about how the additional tracks are being ' - 'displayed in the plot:', + html.Li(['Section 2: Adjust additional tracks. In this section you will find selectors that will let you ' + 'control aspects about how the additional tracks are being displayed in the plot:', html.Ul([ html.Li('Size selector: Change the size of the tiles used to create the ' 'tracks on the diagonal of the plot. By changing this value, ' @@ -326,6 +315,15 @@ def AdditionalFormatsHelpList(): html.A(html.U('here'), href=UrlIndex.CONSURF_CITATION.value), '.'], style={"font-size": "110%", 'text-align': "justify"}), + html.Li(['A3M file. This is a multiple sequence alignment file that should have been obtained using the ' + 'sequence of interest as a query. ConPlot will parse the file and calculate the MSA coverage along ' + 'the query sequence, normalise these values (1-10) and create a track where each residue ' + 'is coloured according to the number of sequences aligned in that particular position These ' + 'files are used in most contact prediction pipelines, and visualising the MSA coverage can help you ' + 'understand the quality of the information used to obtain your predictions. Several alignment tools ' + 'will create MSA files in this format, like for example HHBLITS, which you can use ' + 'online ', html.A(html.U('here'), href=UrlIndex.HHBLITS_URL.value), '.'], + style={"font-size": "110%", 'text-align': "justify"}), html.Li(['CUSTOM file. These files are plain text files that can be created manually ' 'by users to include additional tracks of information to the plot. These ' 'files enable limitless personalisation of the contact map plot, as it ' diff --git a/layouts/help.py b/layouts/help.py index c50c693..36735e8 100644 --- a/layouts/help.py +++ b/layouts/help.py @@ -24,7 +24,7 @@ def Body(cache): components.TutorialTwoModal(), components.TutorialThreeModal(), components.TutorialFourModal(), - #components.TutorialFiveModal(), + # components.TutorialFiveModal(), components.CustomFormatDescriptionModal(), dbc.Row([ dbc.Col([ @@ -168,6 +168,11 @@ def Body(cache): dbc.Alert(['TIPS: ', html.Ul([ html.Br(), + html.Li(['Remember that the ', html.I('L/N'), + ' selector will not affect any data being shown for PDB files. ' + 'Similarly, data will also not be affected if the ', + html.I('Create heatmap'), ' switch is turned on.' + ]), html.Li(['If you have just created a plot with the ', html.I('Generate Plot'), ' button and you can see individual squared tiles in the diagonal ' @@ -262,19 +267,30 @@ def Body(cache): 'memory server used by ConPlot.']), dbc.Col([ html.Plaintext('$ sudo apt update\n$ sudo apt install redis-server\n$ sudo ' - 'service redis start\n$ KEYDB_URL=redis://localhost:6379') + 'service redis start') ], style={'background-color': '#EAEAEA'}, align='center'), - html.P('With the above commands you will have installed Redis and started the server. You ' - 'will also have created a environment variable called "KEYDB_URL" containing ' - 'the URL to connect to your redis server. ConPlot will need to read this ' - 'environment variable to access the redis database. After this, all you need to do ' - 'is clone ConPlot repository, install the requirements and start the Flask ' - 'development server on your machine. Please note that ConPlot requires at least ' - 'python 3.6 installed:'), + html.P('Once you have installed `redis`, you will need to start the service by running:'), + dbc.Col([ + html.Plaintext('$ sudo service redis start') + ], style={'background-color': '#EAEAEA'}, align='center'), + html.P('Now you will need to clone the repository, install the requirements and ' + 'setup environment variables. Please note that ConPlot requires at least ' + 'python 3.6.'), dbc.Col([ html.Plaintext('$ git clone https://github.com/rigdenlab/conplot\n' '$ cd conplot\n$ python3.6 -m pip install -r requirements\n$ ' - 'python3.6 app.py') + 'echo "KEYDB_URL=0://localhost:6379" > .env\n$ echo "KEYDB_TIME' + 'OUT=3600" >> .env') + ], style={'background-color': '#EAEAEA'}, align='center'), + html.P('With the last two commands you will also have created an .env file with a ' + 'variable named KEYDB_URL pointing to the redis server and a KEYDB_TIMEOUT ' + 'variable with the session timeout value. This is the time at which a session ' + 'expires after inactivity. By default in www.conplot.org this has a value of 3600 ' + 'minutes, but if running locally you can set this time to any other value. ' + 'The only thing left to do is to start the Flask development server on your ' + 'machine:'), + dbc.Col([ + html.Plaintext('$ python3.6 app.py') ], style={'background-color': '#EAEAEA'}, align='center'), html.P(['Now you will be able to access the app on ', html.A(html.U('http://127.0.0.1:8050/home'), diff --git a/loaders/__init__.py b/loaders/__init__.py index a6075a9..7d56d76 100644 --- a/loaders/__init__.py +++ b/loaders/__init__.py @@ -1,6 +1,5 @@ from enum import Enum import base64 -from parsers import HydrophobicityStates class DatasetReference(Enum): @@ -8,6 +7,7 @@ class DatasetReference(Enum): HYDROPHOBICITY = 'hydrophobicity' CONTACT_MAP = 'contact' CONTACT_DENSITY = 'density' + CONTACT_DIFF = 'diff' MEMBRANE_TOPOLOGY = 'membranetopology' SECONDARY_STRUCTURE = 'secondarystructure' CONSERVATION = 'conservation' @@ -130,6 +130,19 @@ def SequenceLoader(*args, **kwargs): 9: 'CONTACT_DENSITY_9', 10: 'CONTACT_DENSITY_10', }, + DatasetReference.CONTACT_DIFF.value:{ + 0: 'CONTACT_DIFF_0', + 1: 'CONTACT_DIFF_1', + 2: 'CONTACT_DIFF_2', + 3: 'CONTACT_DIFF_3', + 4: 'CONTACT_DIFF_4', + 5: 'CONTACT_DIFF_5', + 6: 'CONTACT_DIFF_6', + 7: 'CONTACT_DIFF_7', + 8: 'CONTACT_DIFF_8', + 9: 'CONTACT_DIFF_9', + 10: 'CONTACT_DIFF_10', + }, DatasetReference.MSA.value: { 0: 'MSA_COVERAGE_0', 1: 'MSA_COVERAGE_1', diff --git a/parsers/__init__.py b/parsers/__init__.py index c16e1e0..75b09d0 100644 --- a/parsers/__init__.py +++ b/parsers/__init__.py @@ -1,4 +1,5 @@ from enum import Enum +from operator import itemgetter def ConsurfParser(*args, **kwargs): @@ -257,6 +258,20 @@ class DensityStates(Enum): CONTACT_DENSITY_10 = 10 +class DiffStates(Enum): + CONTACT_DIFF_0 = 0 + CONTACT_DIFF_1 = 1 + CONTACT_DIFF_2 = 2 + CONTACT_DIFF_3 = 3 + CONTACT_DIFF_4 = 4 + CONTACT_DIFF_5 = 5 + CONTACT_DIFF_6 = 6 + CONTACT_DIFF_7 = 7 + CONTACT_DIFF_8 = 8 + CONTACT_DIFF_9 = 9 + CONTACT_DIFF_10 = 10 + + class MsaStates(Enum): MSA_COVERAGE_0 = 0 MSA_COVERAGE_1 = 1 @@ -279,4 +294,13 @@ class DatasetStates(Enum): custom = CustomStates hydrophobicity = HydrophobicityStates density = DensityStates + diff = DiffStates msa = MsaStates + + +def get_unique_distances(elements): + key = itemgetter(0) + unique_contacts = list({key(el): el for el in elements}.values()) + output = ['DISTO'] + output += sorted([(*contact[0], *contact[1:]) for contact in unique_contacts], key=itemgetter(2), reverse=True) + return output diff --git a/parsers/casprr2parser.py b/parsers/casprr2parser.py index d600018..cbb9e2f 100644 --- a/parsers/casprr2parser.py +++ b/parsers/casprr2parser.py @@ -1,4 +1,4 @@ -from utils import get_unique_distances +from parsers import get_unique_distances from utils.exceptions import InvalidFormat diff --git a/parsers/mappred.py b/parsers/mappred.py index 8616471..e6e03ac 100644 --- a/parsers/mappred.py +++ b/parsers/mappred.py @@ -1,4 +1,4 @@ -from utils import get_unique_distances +from parsers import get_unique_distances from utils.exceptions import InvalidFormat diff --git a/parsers/npzparser.py b/parsers/npzparser.py index 6287828..375afe4 100644 --- a/parsers/npzparser.py +++ b/parsers/npzparser.py @@ -2,7 +2,7 @@ import base64 import numpy as np from utils.exceptions import InvalidFormat -from utils import get_unique_distances +from parsers import get_unique_distances def parse_array(array): diff --git a/parsers/pdbparser.py b/parsers/pdbparser.py index 9b82634..3f3602f 100644 --- a/parsers/pdbparser.py +++ b/parsers/pdbparser.py @@ -61,6 +61,6 @@ def PDBParser(input, input_format=None): if not contacts: raise InvalidFormat('Unable to parse contacts') - output = sorted(contacts, key=itemgetter(2), reverse=True) - output.append("PDB") + output = ["PDB"] + output += sorted(contacts, key=itemgetter(2), reverse=True) return output diff --git a/parsers/tests/test_casprr2parser.py b/parsers/tests/test_casprr2parser.py index ed8e08f..d913415 100644 --- a/parsers/tests/test_casprr2parser.py +++ b/parsers/tests/test_casprr2parser.py @@ -35,7 +35,7 @@ def test_1(self): output = CASPRR2Parser(dummy_prediction) - self.assertEqual('DISTO', output.pop(-1)) + self.assertEqual('DISTO', output.pop(0)) self.assertEqual(12, len(output)) self.assertListEqual(expected_res1, [contact[0] for contact in output]) self.assertListEqual(expected_res2, [contact[1] for contact in output]) diff --git a/parsers/tests/test_mappred.py b/parsers/tests/test_mappred.py index f6fa43b..7672893 100644 --- a/parsers/tests/test_mappred.py +++ b/parsers/tests/test_mappred.py @@ -24,7 +24,7 @@ def test_1(self): output = MappredParser(dummy_prediction) - self.assertEqual('DISTO', output.pop(-1)) + self.assertEqual('DISTO', output.pop(0)) self.assertEqual(4, len(output)) self.assertListEqual(expected_res1, [contact[0] for contact in output]) self.assertListEqual(expected_res2, [contact[1] for contact in output]) diff --git a/parsers/tests/test_pdbparser.py b/parsers/tests/test_pdbparser.py index d48a300..782bc50 100644 --- a/parsers/tests/test_pdbparser.py +++ b/parsers/tests/test_pdbparser.py @@ -33,7 +33,7 @@ def test_1(self): output = PDBParser(dummy_prediction) self.assertEqual(7, len(output)) - self.assertEqual('PDB', output.pop(-1)) + self.assertEqual('PDB', output.pop(0)) self.assertListEqual(expected_res1, [contact[0] for contact in output]) self.assertListEqual(expected_res2, [contact[1] for contact in output]) self.assertListEqual(expected_score, [contact[2] for contact in output]) diff --git a/requirements.txt b/requirements.txt index a633f1b..5fffe4b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,4 +21,7 @@ keyring~=22.0.1 keyrings.cryptfile~=1.3.6 numpy~=1.19.4 fast-enum~=1.3.0 -scikit-learn~=0.24.1 \ No newline at end of file +scikit-learn~=0.24.1 +numba~=0.53.1 +conkit~=0.12.0 +python-dotenv~=0.17.1 \ No newline at end of file diff --git a/utils/__init__.py b/utils/__init__.py index 8449257..77b2fd1 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -5,7 +5,7 @@ def conplot_version(): - return 'v0.4' + return 'v0.4.1' def get_base_url(): @@ -106,6 +106,7 @@ class UrlIndex(Enum): IUPRED_CITATION = 'https://doi.org/10.1093/nar/gky384' CONSURF_WEB = 'https://consurf.tau.ac.il/' CONSURF_CITATION = 'https://doi.org/10.1093/nar/gkw408' + HHBLITS_URL = 'https://toolkit.tuebingen.mpg.de/tools/hhblits' GDPR_WEBSITE = 'https://gdpr-info.eu' DOCKER_HUB = 'https://hub.docker.com/r/filosanrod/conplot' CONPLOT_DOCKER = 'https://github.com/rigdenlab/conplot-docker' @@ -134,6 +135,12 @@ def retrieve_sequence_fname(*args, **kwargs): return retrieve_sequence_fname(*args, **kwargs) +def contains_distances(*args, **kwargs): + from utils.cmap_utils import contains_distances + + return contains_distances(*args, **kwargs) + + def CacheKeys(*args, **kwargs): from utils.cache_utils import CacheKeys @@ -170,6 +177,24 @@ def get_active_sessions(*args, **kwargs): return get_active_sessions(*args, **kwargs) +def lookup_data(*args, **kwargs): + from utils.data_utils import lookup_data + + return lookup_data(*args, **kwargs) + + +def create_cmap_sets(*args, **kwargs): + from utils.cmap_utils import create_cmap_sets + + return create_cmap_sets(*args, **kwargs) + + +def slice_cmap(*args, **kwargs): + from utils.cmap_utils import slice_cmap + + return slice_cmap(*args, **kwargs) + + def load_figure_json(*args, **kwargs): from utils.plot_utils import load_figure_json @@ -255,12 +280,3 @@ def get_unique_contacts(elements): output = [(*contact[0], contact[1]) for contact in unique] output = sorted(output, key=itemgetter(2), reverse=True) return output - - -def get_unique_distances(elements): - key = itemgetter(0) - unique_contacts = list({key(el): el for el in elements}.values()) - output = [(*contact[0], *contact[1:]) for contact in unique_contacts] - output = sorted(output, key=itemgetter(2), reverse=True) - output.append('DISTO') - return output diff --git a/utils/cache_utils.py b/utils/cache_utils.py index 58f1f02..19a213c 100644 --- a/utils/cache_utils.py +++ b/utils/cache_utils.py @@ -13,6 +13,7 @@ class CacheKeys(Enum): DISPLAY_CONTROL_JSON = 'display_control_json' CONTACT_MAP = loaders.DatasetReference.CONTACT_MAP.value CONTACT_DENSITY = loaders.DatasetReference.CONTACT_DENSITY.value + CONTACT_DIFF = loaders.DatasetReference.CONTACT_DIFF.value CUSTOM = loaders.DatasetReference.CUSTOM.value SEQUENCE = loaders.DatasetReference.SEQUENCE.value SEQUENCE_HYDROPHOBICITY = loaders.DatasetReference.HYDROPHOBICITY.value @@ -20,29 +21,40 @@ class CacheKeys(Enum): SECONDARY_STRUCTURE = loaders.DatasetReference.SECONDARY_STRUCTURE.value CONSERVATION = loaders.DatasetReference.CONSERVATION.value DISORDER = loaders.DatasetReference.DISORDER.value - METADATA_TAG = 'CONPLOT-INTERNAL-USE-ONLY-METADATA-PROTECTED-TAG' + CMAP_DENSITY = '{}_CONPLOT-INTERNAL-USE-ONLY-METADATA-DENSITY-TAG_{}' + CMAP_DIFF = '{}_{}_CONPLOT-INTERNAL-USE-ONLY-METADATA-DIFF-TAG_{}' + PROTECETED_TAG = 'CONPLOT-INTERNAL-USE-ONLY-METADATA' -def retrieve_density(session_id, density_cachekey, cache): - density = cache.hget(session_id, density_cachekey) +class MetadataTags(Enum): + DENSITY = ' - density' + HYDROPHOBICITY = ' - hydrophobicity' + DIFF = ' - diff' + SEPARATOR = '|' + HYPHEN = '---' + TAG = 'CONPLOT-INTERNAL-USE-ONLY-METADATA' + + +def retrieve_data(session_id, cachekey, cache): + density = cache.hget(session_id, cachekey) return decompress_data(density) -def store_density(session_id, density_cachekey, density, cache): - cache.hset(session_id, density_cachekey, compress_data(density)) - store_fname(cache, session_id, density_cachekey.decode(), CacheKeys.CONTACT_DENSITY.value) +def store_data(session_id, cachekey, data, dataset, cache): + cache.hset(session_id, cachekey, compress_data(data)) + store_fname(cache, session_id, cachekey.decode(), dataset) -def remove_all_density(session_id, cache): - density_list = cache.hget(session_id, CacheKeys.CONTACT_DENSITY.value) - if not density_list: +def remove_all(session_id, dataset, cache): + cachekey_list = cache.hget(session_id, dataset) + if not cachekey_list: return - density_list = decompress_data(density_list) - for density in density_list: - cache.hdel(session_id, density) + cachekey_list = decompress_data(cachekey_list) + for cachekey in cachekey_list: + cache.hdel(session_id, cachekey) - cache.hdel(session_id, CacheKeys.CONTACT_DENSITY.value) + cache.hdel(session_id, dataset) def remove_density(session_id, cache, fname): @@ -51,7 +63,7 @@ def remove_density(session_id, cache, fname): return density_list = decompress_data(density_list) - density_cachekey = '{}_{}'.format(fname, CacheKeys.METADATA_TAG.value) + density_cachekey = '{}_{}'.format(fname, CacheKeys.PROTECETED_TAG.value) for density in density_list: if density_cachekey in density: cache.hdel(session_id, density) @@ -59,8 +71,21 @@ def remove_density(session_id, cache, fname): cache.hset(session_id, CacheKeys.CONTACT_DENSITY.value, compress_data(density_list)) +def remove_diff(session_id, cache, fname): + diff_list = cache.hget(session_id, CacheKeys.CONTACT_DIFF.value) + if not diff_list: + return + diff_list = decompress_data(diff_list) + + for diff in diff_list: + if fname in diff: + cache.hdel(session_id, diff) + diff_list = [diff for diff in diff_list if fname not in diff] + cache.hset(session_id, CacheKeys.CONTACT_DIFF.value, compress_data(diff_list)) + + def is_valid_fname(fname): - if CacheKeys.METADATA_TAG.value in fname or fname in [x.value for x in CacheKeys]: + if any([x for x in CacheKeys if x.value == fname]) or any([tag for tag in MetadataTags if tag.value in fname]): return False return True @@ -148,7 +173,8 @@ def clear_cache(session_id, cache): remove_datasets(session_id, cache) remove_figure(session_id, cache) remove_sequence(session_id, cache) - remove_all_density(session_id, cache) + remove_all(session_id, CacheKeys.CONTACT_DENSITY.value, cache) + remove_all(session_id, CacheKeys.CONTACT_DIFF.value, cache) def remove_datasets(session_id, cache): @@ -184,3 +210,10 @@ def is_redis_available(cache): def get_active_sessions(cache): return cache.dbsize() + + +def get_cachekey(session, fname, factor): + if 'PDB' == session[fname.encode()][0]: + return CacheKeys.CMAP_DENSITY.value.format(fname, fname).encode() + else: + return CacheKeys.CMAP_DENSITY.value.format(fname, factor).encode() diff --git a/utils/cmap_utils.py b/utils/cmap_utils.py index 91056f3..c14304c 100644 --- a/utils/cmap_utils.py +++ b/utils/cmap_utils.py @@ -18,11 +18,7 @@ def create_cmap_trace(x, y, symbol, marker_size, color, hovertext=None): def create_cmap(cmap, idx, display_settings, verbose_labels=None): - if cmap[-1] == 'PDB' or cmap[-1] == 'DISTO': - del cmap[-1] - - if display_settings.factor != 0: - cmap = cmap[:int(round(display_settings.seq_length / display_settings.factor, 0))] + cmap = slice_cmap(cmap, display_settings.seq_length, display_settings.factor) if idx == 1: idx_x = 0 @@ -39,10 +35,9 @@ def create_cmap(cmap, idx, display_settings, verbose_labels=None): for contact in cmap: res1_list.append(contact[idx_x]) res2_list.append(contact[idx_y]) - res_x_label = verbose_labels[contact[idx_x] - 1] - res_y_label = verbose_labels[contact[idx_y] - 1] - hover.append(HoverTemplates.CMAP_VERBOSE.format(contact[idx_x], contact[idx_y], contact[2], res_x_label, - res_y_label)) + xlabel = verbose_labels[contact[idx_x] - 1] + ylabel = verbose_labels[contact[idx_y] - 1] + hover.append(HoverTemplates.CMAP_VERBOSE.format(contact[idx_x], contact[idx_y], contact[2], xlabel, ylabel)) else: for contact in cmap: res1_list.append(contact[idx_x]) @@ -52,45 +47,39 @@ def create_cmap(cmap, idx, display_settings, verbose_labels=None): return res1_list, res2_list, hover -def superimpose_cmaps(reference_cmap, predicted_cmap, display_settings): - if display_settings.factor != 0: - predicted_cmap = predicted_cmap[:int(round(display_settings.seq_length / display_settings.factor, 0))] - if reference_cmap[-1] == 'PDB': - del reference_cmap[-1] - reference_cmap = [contact for contact in reference_cmap if contact[2] > 0] - elif reference_cmap[-1] == 'DISTO': - del reference_cmap[-1] - reference_cmap = reference_cmap[:int(round(display_settings.seq_length / display_settings.factor, 0))] - else: - reference_cmap = reference_cmap[:int(round(display_settings.seq_length / display_settings.factor, 0))] - elif reference_cmap[-1] == 'PDB' or reference_cmap[-1] == 'DISTO': - del reference_cmap[-1] +def contains_distances(cmap): + if len(cmap[-1]) > 3: + return True + return False + + +def slice_cmap(cmap, seq_length, factor): + if cmap[0] == 'PDB': + return [contact for contact in cmap[1:] if contact[2] > 0] + elif cmap[0] == 'DISTO': + cmap = cmap[1:] - reference_contacts = [contact[:2] for contact in reference_cmap] - predicted_contacts = [contact[:2] for contact in predicted_cmap] + if factor != 0: + cmap = cmap[:int(round(seq_length / factor, 0))] - matched = [] - mismatched = [] - reference = [] + return cmap - for contact in reference_cmap: - if contact[:2] in predicted_contacts: - matched.append(contact) - else: - reference.append(contact) - for contact in predicted_cmap: - if contact[:2] not in reference_contacts: - mismatched.append(contact) +def create_cmap_sets(reference_cmap, predicted_cmap, display_settings): + reference_cmap = slice_cmap(reference_cmap, display_settings.seq_length, display_settings.factor) + predicted_cmap = slice_cmap(predicted_cmap, display_settings.seq_length, display_settings.factor) + predicted_set = {(x[0], x[1]): x[2] for x in predicted_cmap} + reference_set = {(x[0], x[1]): x[2] for x in reference_cmap} - return reference, matched, mismatched + return reference_set, predicted_set def create_superimposed_cmap(reference_cmap, predicted_cmap, display_settings, verbose_labels): traces = [] - ref, match, mismatch = superimpose_cmaps(reference_cmap, predicted_cmap, display_settings) - predicted_set = {(x[0], x[1]): x[2] for x in predicted_cmap} - reference_set = {(x[0], x[1]): x[2] for x in reference_cmap} + reference_set, predicted_set = create_cmap_sets(reference_cmap, predicted_cmap, display_settings) + ref = reference_set.keys() - predicted_set.keys() + mismatch = predicted_set.keys() - reference_set.keys() + match = reference_set.keys() & predicted_set.keys() x, y, hover = process_superimposed_cmap(ref, reference_set, predicted_set, verbose_labels) traces.append(create_cmap_trace(x, y, 'circle', display_settings.contact_marker_size, 'grey', hover)) @@ -112,16 +101,8 @@ def process_superimposed_cmap(contacts, reference_set, predicted_set, verbose_la if verbose_labels is not None: for contact in contacts: - - if tuple(contact[:2]) in predicted_set.keys(): - pred_confidence = predicted_set[tuple(contact[:2])] - else: - pred_confidence = 0 - if tuple(contact[:2]) in reference_set.keys(): - ref_confidence = reference_set[tuple(contact[:2])] - else: - ref_confidence = 0 - + pred_confidence = predicted_set[contact] if contact in predicted_set.keys() else 0 + ref_confidence = reference_set[contact] if contact in reference_set.keys() else 0 res1_list.append(contact[0]) res2_list.append(contact[1]) res_1_label = verbose_labels[contact[0] - 1] @@ -132,15 +113,8 @@ def process_superimposed_cmap(contacts, reference_set, predicted_set, verbose_la hover_2.append(HoverTemplates.CMAP_SUPERIMPOSE_VERBOSE.format(*label)) else: for contact in contacts: - if tuple(contact[:2]) in predicted_set.keys(): - pred_confidence = predicted_set[tuple(contact[:2])] - else: - pred_confidence = 0 - if tuple(contact[:2]) in reference_set.keys(): - ref_confidence = reference_set[tuple(contact[:2])] - else: - ref_confidence = 0 - + pred_confidence = predicted_set[contact] if contact in predicted_set.keys() else 0 + ref_confidence = reference_set[contact] if contact in reference_set.keys() else 0 res1_list.append(contact[0]) res2_list.append(contact[1]) label = (contact[0], contact[1], ref_confidence, pred_confidence) diff --git a/utils/color_palettes.py b/utils/color_palettes.py index 899d7dd..b189738 100644 --- a/utils/color_palettes.py +++ b/utils/color_palettes.py @@ -177,6 +177,20 @@ class Density_GreyColorPalette(Enum): CONTACT_DENSITY_10 = 'rgb(0,0,0,{})' +class Diff_GreyColorPalette(Enum): + CONTACT_DIFF_0 = 'rgba(255,255,255,{})' + CONTACT_DIFF_1 = 'rgba(229,229,229,{})' + CONTACT_DIFF_2 = 'rgba(204,204,204,{})' + CONTACT_DIFF_3 = 'rgba(179,179,179,{})' + CONTACT_DIFF_4 = 'rgba(153,153,153,{})' + CONTACT_DIFF_5 = 'rgba(127,127,127,{})' + CONTACT_DIFF_6 = 'rgba(102,102,102,{})' + CONTACT_DIFF_7 = 'rgba(77,77,77,{})' + CONTACT_DIFF_8 = 'rgba(51,51,51,{})' + CONTACT_DIFF_9 = 'rgba(25,25,25,{})' + CONTACT_DIFF_10 = 'rgb(0,0,0,{})' + + class Coverage_GreyColorPalette(Enum): MSA_COVERAGE_0 = 'rgba(255,255,255,{})' MSA_COVERAGE_1 = 'rgba(229,229,229,{})' @@ -219,6 +233,20 @@ class Density_Viridis(Enum): CONTACT_DENSITY_10 = sequential.Viridis[9] +class Diff_Viridis(Enum): + CONTACT_DIFF_0 = sequential.Viridis[0] + CONTACT_DIFF_1 = sequential.Viridis[0] + CONTACT_DIFF_2 = sequential.Viridis[1] + CONTACT_DIFF_3 = sequential.Viridis[2] + CONTACT_DIFF_4 = sequential.Viridis[3] + CONTACT_DIFF_5 = sequential.Viridis[4] + CONTACT_DIFF_6 = sequential.Viridis[5] + CONTACT_DIFF_7 = sequential.Viridis[6] + CONTACT_DIFF_8 = sequential.Viridis[7] + CONTACT_DIFF_9 = sequential.Viridis[8] + CONTACT_DIFF_10 = sequential.Viridis[9] + + class Coverage_Viridis(Enum): MSA_COVERAGE_0 = sequential.Viridis[0] MSA_COVERAGE_1 = sequential.Viridis[0] @@ -261,6 +289,20 @@ class Density_BuRd(Enum): CONTACT_DENSITY_10 = diverging.RdYlBu[1] +class Diff_BuRd(Enum): + CONTACT_DIFF_0 = diverging.RdYlBu[10] + CONTACT_DIFF_1 = diverging.RdYlBu[10] + CONTACT_DIFF_2 = diverging.RdYlBu[9] + CONTACT_DIFF_3 = diverging.RdYlBu[8] + CONTACT_DIFF_4 = diverging.RdYlBu[7] + CONTACT_DIFF_5 = diverging.RdYlBu[6] + CONTACT_DIFF_6 = diverging.RdYlBu[5] + CONTACT_DIFF_7 = diverging.RdYlBu[4] + CONTACT_DIFF_8 = diverging.RdYlBu[3] + CONTACT_DIFF_9 = diverging.RdYlBu[2] + CONTACT_DIFF_10 = diverging.RdYlBu[1] + + class Coverage_BuRd(Enum): MSA_COVERAGE_0 = diverging.RdYlBu[10] MSA_COVERAGE_1 = diverging.RdYlBu[10] @@ -303,6 +345,20 @@ class Density_Inferno(Enum): CONTACT_DENSITY_10 = sequential.Inferno[9] +class Diff_Inferno(Enum): + CONTACT_DIFF_0 = sequential.Inferno[0] + CONTACT_DIFF_1 = sequential.Inferno[0] + CONTACT_DIFF_2 = sequential.Inferno[1] + CONTACT_DIFF_3 = sequential.Inferno[2] + CONTACT_DIFF_4 = sequential.Inferno[3] + CONTACT_DIFF_5 = sequential.Inferno[4] + CONTACT_DIFF_6 = sequential.Inferno[5] + CONTACT_DIFF_7 = sequential.Inferno[6] + CONTACT_DIFF_8 = sequential.Inferno[7] + CONTACT_DIFF_9 = sequential.Inferno[8] + CONTACT_DIFF_10 = sequential.Inferno[9] + + class Coverage_Inferno(Enum): MSA_COVERAGE_0 = sequential.Inferno[0] MSA_COVERAGE_1 = sequential.Inferno[0] @@ -331,6 +387,20 @@ class Heatmap_Hot(Enum): BIN_10 = 'rgb(255.0, 255.0, 255.0)' +class Diff_Hot(Enum): + CONTACT_DIFF_10 = 'rgb(10.607999999999999, 0.0, 0.0)' + CONTACT_DIFF_9 = 'rgb(76.23763084702213, 0.0, 0.0)' + CONTACT_DIFF_8 = 'rgb(144.4924469279252, 0.0, 0.0)' + CONTACT_DIFF_7 = 'rgb(210.12207777494734, 0.0, 0.0)' + CONTACT_DIFF_6 = 'rgb(255.0, 23.37520639028961, 0.0)' + CONTACT_DIFF_5 = 'rgb(255.0, 91.62509548421984, 0.0)' + CONTACT_DIFF_4 = 'rgb(255.0, 157.24998884376814, 0.0)' + CONTACT_DIFF_3 = 'rgb(255.0, 225.49987793769836, 0.0)' + CONTACT_DIFF_2 = 'rgb(255.0, 255.0, 54.18729918729921)' + CONTACT_DIFF_1 = 'rgb(255.0, 255.0, 156.56240156240156)' + CONTACT_DIFF_0 = 'rgb(255.0, 255.0, 255.0)' + + class Density_Hot(Enum): CONTACT_DENSITY_10 = 'rgb(10.607999999999999, 0.0, 0.0)' CONTACT_DENSITY_9 = 'rgb(76.23763084702213, 0.0, 0.0)' @@ -383,41 +453,65 @@ class MsaCoverage_ColorPalettes(Enum): PALETTE_5 = Coverage_Hot -class Hydrophobicity_BlueGreyColorPalette(Enum): - HYDROPATHY_10 = 'rgba(66,138,245,{})' - HYDROPATHY_9 = 'rgba(72,137,234,{})' - HYDROPATHY_8 = 'rgba(79,136,222,{})' - HYDROPATHY_7 = 'rgba(85,136,211,{})' - HYDROPATHY_6 = 'rgba(92,135,199,{})' - HYDROPATHY_5 = 'rgba(98,134,188,{})' - HYDROPATHY_4 = 'rgba(104,133,176,{})' - HYDROPATHY_3 = 'rgba(111,132,165,{})' - HYDROPATHY_2 = 'rgba(117,132,153,{})' - HYDROPATHY_1 = 'rgba(124,131,142,{})' - HYDROPATHY_0 = 'rgba(130,130,130,{})' - - -class Hydrophobicity_GreenGreyColorPalette(Enum): - HYDROPATHY_10 = 'rgba(59,237,74,{})' - HYDROPATHY_9 = 'rgba(66,226,80,{})' - HYDROPATHY_8 = 'rgba(73,216,85,{})' - HYDROPATHY_7 = 'rgba(80,205,91,{})' - HYDROPATHY_6 = 'rgba(87,194,96,{})' - HYDROPATHY_5 = 'rgba(95,184,102,{})' - HYDROPATHY_4 = 'rgba(102,173,108,{})' - HYDROPATHY_3 = 'rgba(109,162,113,{})' - HYDROPATHY_2 = 'rgba(116,151,119,{})' - HYDROPATHY_1 = 'rgba(123,141,124,{})' - HYDROPATHY_0 = 'rgba(130,130,130,{})' +class Diff_ColorPalettes(Enum): + PALETTE_1 = Diff_GreyColorPalette + PALETTE_2 = Diff_Viridis + PALETTE_3 = Diff_BuRd + PALETTE_4 = Diff_Inferno + PALETTE_5 = Diff_Hot + + +class Hydrophobicity_BlueColorPalette(Enum): + HYDROPATHY_10 = sequential.ice[1].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_9 = sequential.ice[1].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_8 = sequential.ice[2].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_7 = sequential.ice[3].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_6 = sequential.ice[4].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_5 = sequential.ice[5].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_4 = sequential.ice[6].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_3 = sequential.ice[7].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_2 = sequential.ice[8].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_1 = sequential.ice[9].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_0 = sequential.ice[9].replace(')', ', {})').replace('rgb', 'rgba') + + +class Hydrophobicity_GreenColorPalette(Enum): + HYDROPATHY_10 = 'rgba(8, 28, 21,{})' + HYDROPATHY_9 = 'rgba(8, 28, 21,{})' + HYDROPATHY_8 = 'rgba(27, 67, 50,{})' + HYDROPATHY_7 = 'rgba(45, 106, 79,{})' + HYDROPATHY_6 = 'rgba(64, 145, 108,{})' + HYDROPATHY_5 = 'rgba(82, 183, 136,{})' + HYDROPATHY_4 = 'rgba(116, 198, 157,{})' + HYDROPATHY_3 = 'rgba(149, 213, 178,{})' + HYDROPATHY_2 = 'rgba(183, 228, 199,{})' + HYDROPATHY_1 = 'rgba(216, 243, 220,{})' + HYDROPATHY_0 = 'rgba(216, 243, 220,{})' + + +class Hydrophobicity_RedColorPalette(Enum): + HYDROPATHY_10 = sequential.amp[9].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_9 = sequential.amp[9].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_8 = sequential.amp[8].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_7 = sequential.amp[7].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_6 = sequential.amp[6].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_5 = sequential.amp[5].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_4 = sequential.amp[4].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_3 = sequential.amp[3].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_2 = sequential.amp[2].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_1 = sequential.amp[1].replace(')', ', {})').replace('rgb', 'rgba') + HYDROPATHY_0 = sequential.amp[1].replace(')', ', {})').replace('rgb', 'rgba') class HydrophobicityColorPalettes(Enum): - PALETTE_1 = Hydrophobicity_BlueGreyColorPalette - PALETTE_2 = Hydrophobicity_GreenGreyColorPalette + PALETTE_1 = Hydrophobicity_BlueColorPalette + PALETTE_2 = Hydrophobicity_RedColorPalette + PALETTE_3 = Hydrophobicity_GreenColorPalette class DatasetColorPalettes(Enum): density = Density_ColorPalettes + diff = Diff_ColorPalettes custom = Custom_ColorPalettes heatmap = Heatmap_ColorPalettes hydrophobicity = HydrophobicityColorPalettes @@ -430,6 +524,7 @@ class DatasetColorPalettes(Enum): class PaletteDefaultLayout(Enum): CONTACT_DENSITY = DatasetReference.CONTACT_DENSITY.value.encode() + CONTACT_DIFF = DatasetReference.CONTACT_DIFF.value.encode() CUSTOM = DatasetReference.CUSTOM.value.encode() HEATMAP = b'heatmap' HYDROPHOBICITY = DatasetReference.HYDROPHOBICITY.value.encode() diff --git a/utils/data_utils.py b/utils/data_utils.py index 3bea872..afe50ec 100644 --- a/utils/data_utils.py +++ b/utils/data_utils.py @@ -12,8 +12,8 @@ def check_sequence_mismatch(session_id, cache, seq_length): cmap_fnames = decompress_data(cache.hget(session_id, cache_utils.CacheKeys.CONTACT_MAP.value)) for cmap_fname in cmap_fnames: cmap_data = decompress_data(cache.hget(session_id, cmap_fname)) - if cmap_data[-1] == 'PDB' or cmap_data[-1] == 'DISTO': - cmap_data.pop() + if cmap_data[0] == 'PDB' or cmap_data[0] == 'DISTO': + cmap_data = cmap_data[1:] cmap_max_register = max((max(cmap_data, key=itemgetter(0))[0], max(cmap_data, key=itemgetter(1))[0])) if cmap_max_register > seq_length: mismatched.append(cmap_fname) @@ -42,8 +42,8 @@ def check_dataset_mismatch(session_id, cache, data, dataset): return seq_fname else: return False - elif data[-1] == 'PDB' or data[-1] == 'DISTO': - max_register = max((max(data[:-1], key=itemgetter(0))[0], max(data[:-1], key=itemgetter(1))[0])) + elif data[0] == 'PDB' or data[0] == 'DISTO': + max_register = max((max(data[1:], key=itemgetter(0))[0], max(data[1:], key=itemgetter(1))[0])) else: max_register = max((max(data, key=itemgetter(0))[0], max(data, key=itemgetter(1))[0])) @@ -127,6 +127,18 @@ def remove_dataset(trigger, cache, session_id, logger): cache_utils.remove_fname(cache, session_id, fname, dataset) if dataset == loaders.DatasetReference.SEQUENCE.value: - cache_utils.remove_all_density(session_id, cache) + cache_utils.remove_all(session_id, cache_utils.CacheKeys.CONTACT_DENSITY.value, cache) + cache_utils.remove_all(session_id, cache_utils.CacheKeys.CONTACT_DIFF.value, cache) elif dataset == loaders.DatasetReference.CONTACT_MAP.value: cache_utils.remove_density(session_id, cache, fname) + cache_utils.remove_diff(session_id, cache, fname) + + +def lookup_data(session, session_id, cachekey, cache): + if cachekey in session.keys(): + data = session[cachekey] + elif cache.hexists(session_id, cachekey): + data = cache_utils.retrieve_data(session_id, cachekey, cache) + else: + return None + return data diff --git a/utils/heatmap_utils.py b/utils/heatmap_utils.py index df13597..53d5325 100644 --- a/utils/heatmap_utils.py +++ b/utils/heatmap_utils.py @@ -1,39 +1,47 @@ import plotly.graph_objects as go +import numpy as np from utils import color_palettes, DistanceLabels, HoverTemplates +DISTANCE_BINS = {0: 0, 1: 5, 2: 7, 3: 9, 4: 11, 5: 13, 6: 15, 7: 17, 8: 19, 9: 20} + def init_heatmap(seq_length): - heat = [[0 for x in range(seq_length + 1)] for y in range(seq_length + 1)] - hover = [[None for x in range(seq_length + 1)] for y in range(seq_length + 1)] + shape = (seq_length + 1, seq_length + 1) + heat = np.zeros(shape) + hover = np.full(shape, None) return heat, hover +def get_array(cmap, seq_length): + array = np.full((seq_length + 1, seq_length + 1), 20) + for contact in cmap: + array[contact[0], contact[1]] = DISTANCE_BINS[contact[3]] + array[contact[1], contact[0]] = DISTANCE_BINS[contact[3]] + return array + + def create_heatmap(session, display_settings, verbose_labels): heat, hover = init_heatmap(display_settings.seq_length) for idx, fname in enumerate(display_settings.cmap_selection): - if fname == '---': + if fname == '--- Empty ---': continue heat, hover = populate_heatmap(session[fname.encode()], idx, heat, hover, verbose_labels) palette_idx = [x.value for x in color_palettes.PaletteDefaultLayout].index(b'heatmap') colorscale = color_palettes.get_heatmap_colorscale(display_settings.selected_palettes[palette_idx]) - return heat, hover, colorscale + return heat.tolist(), hover.tolist(), colorscale def superimpose_heatmaps(session, display_settings, verbose_labels): - heat, hover = init_heatmap(display_settings.seq_length) - for idx, fname in enumerate(display_settings.cmap_selection): - if fname == '---': - continue - heat, hover = populate_superimposed_heatmap(session[display_settings.cmap_selection[0].encode()], - session[display_settings.cmap_selection[1].encode()], - heat, hover, verbose_labels) + heat, hover = create_superimposed_heatmap(session[display_settings.cmap_selection[0].encode()][1:], + session[display_settings.cmap_selection[1].encode()][1:], + display_settings.seq_length, verbose_labels) palette_idx = [x.value for x in color_palettes.PaletteDefaultLayout].index(b'heatmap') colorscale = color_palettes.get_heatmap_colorscale(display_settings.selected_palettes[palette_idx]) - return heat, hover, colorscale + return heat.tolist(), hover.tolist(), colorscale -def populate_heatmap(cmap, idx, distances, hover, verbose_labels=None): +def populate_heatmap(cmap, idx, heat, hover, verbose_labels=None): if idx == 1: idx_x = 1 idx_y = 0 @@ -41,92 +49,102 @@ def populate_heatmap(cmap, idx, distances, hover, verbose_labels=None): idx_x = 0 idx_y = 1 - if cmap[-1] == 'DISTO' or cmap[-1] == 'PDB': - cmap = cmap[:-1] + hover_labels = [] + + if cmap[0] == 'DISTO' or cmap[0] == 'PDB': + cmap = cmap[1:] + cmap_array = np.array(cmap) + res_1 = cmap_array[:, idx_x] + res_1 = res_1.astype(int) + res_2 = cmap_array[:, idx_y] + res_2 = res_2.astype(int) + distances = cmap_array[:, 3] + scores = cmap_array[:, 4] + heat[res_1.astype(int), res_2.astype(int)] = 9 - distances if verbose_labels is not None: - for contact in cmap: - distances[contact[idx_x]][contact[idx_y]] = 9 - contact[3] - label = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(contact[3])) - hover_label = HoverTemplates.DISTOGRAM_VERBOSE.format(contact[idx_y], contact[idx_x], label, contact[4], - verbose_labels[contact[idx_y] - 1], - verbose_labels[contact[idx_x] - 1]) - hover[contact[idx_x]][contact[idx_y]] = hover_label + for x, y, distance, score in zip(res_1, res_2, distances.astype(int), scores): + label = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(distance)) + hover_label = HoverTemplates.DISTOGRAM_VERBOSE.format(y, x, label, score, verbose_labels[y - 1], + verbose_labels[x - 1]) + hover_labels.append(hover_label) + else: - for contact in cmap: - distances[contact[idx_x]][contact[idx_y]] = 9 - contact[3] - label = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(contact[3])) - hover_label = HoverTemplates.DISTOGRAM.format(contact[idx_y], contact[idx_x], label, contact[4]) - hover[contact[idx_x]][contact[idx_y]] = hover_label + for x, y, distance, score in zip(res_1, res_2, distances.astype(int), scores): + label = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(distance)) + hover_label = HoverTemplates.DISTOGRAM.format(y, x, label, score) + hover_labels.append(hover_label) + + hover[res_1.astype(int), res_2.astype(int)] = hover_labels - return distances, hover + return heat, hover + + cmap_array = np.array(cmap) + res_1 = cmap_array[:, idx_x] + res_1 = res_1.astype(int) + res_2 = cmap_array[:, idx_y] + res_2 = res_2.astype(int) + scores = cmap_array[:, 2] + heat[res_1, res_2] = scores if verbose_labels is None: - for contact in cmap: - distances[contact[idx_x]][contact[idx_y]] = contact[2] - hover_label = HoverTemplates.CMAP.format(contact[idx_y], contact[idx_x], contact[2]) - hover[contact[idx_x]][contact[idx_y]] = hover_label + for x, y, score in zip(res_1, res_2, scores): + hover_labels.append(HoverTemplates.CMAP.format(y, x, score)) else: - for contact in cmap: - distances[contact[idx_x]][contact[idx_y]] = contact[2] - hover_label = HoverTemplates.CMAP_VERBOSE.format(contact[idx_y], contact[idx_x], contact[2], - verbose_labels[contact[idx_y] - 1], - verbose_labels[contact[idx_x] - 1]) - hover[contact[idx_x]][contact[idx_y]] = hover_label + for x, y, score in zip(res_1, res_2, scores): + hover_label = HoverTemplates.CMAP_VERBOSE.format(y, x, score, verbose_labels[y - 1], verbose_labels[x - 1]) + hover_labels.append(hover_label) - return distances, hover + hover[res_1.astype(int), res_2.astype(int)] = hover_labels + return heat, hover -def populate_superimposed_heatmap(reference_cmap, secondary_cmap, heat, hover, verbose_labels=None): - idx_x = 1 - idx_y = 0 - reference_ftype = reference_cmap.pop(-1) - secondary_ftype = secondary_cmap.pop(-1) - predicted_set = {(x[0], x[1]): x[3] for x in secondary_cmap} + +def create_superimposed_heatmap(reference_cmap, predicted_cmap, seq_length, verbose_labels=None): + hover = np.full((seq_length + 1, seq_length + 1), None) + reference_array = get_array(reference_cmap, seq_length) + predicted_array = get_array(predicted_cmap, seq_length) + difference_heatmap = np.abs(reference_array - predicted_array) + predicted_set = {(x[0], x[1]): x[3] for x in predicted_cmap} + reference_set = {(x[0], x[1]): x[3] for x in reference_cmap} if verbose_labels is not None: - for reference_distance in reference_cmap: - if tuple(reference_distance[:2]) in predicted_set.keys(): - predicted_distance = predicted_set[tuple(reference_distance[:2])] - else: - predicted_distance = 9 - error = abs((9 - reference_distance[3]) - (9 - predicted_distance)) - heat[reference_distance[idx_x]][reference_distance[idx_y]] = error - heat[reference_distance[idx_y]][reference_distance[idx_x]] = error - map_a_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(reference_distance[3])) - map_b_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(predicted_distance)) - hover_label_a = HoverTemplates.DISTOGRAM_SUPERIMPOSE_VERBOSE. \ - format(reference_distance[idx_y], reference_distance[idx_x], map_a_distance, map_b_distance, - error, verbose_labels[reference_distance[idx_y] - 1], - verbose_labels[reference_distance[idx_x] - 1]) - hover_label_b = HoverTemplates.DISTOGRAM_SUPERIMPOSE_VERBOSE. \ - format(reference_distance[idx_x], reference_distance[idx_y], map_a_distance, map_b_distance, - error, verbose_labels[reference_distance[idx_x] - 1], - verbose_labels[reference_distance[idx_y] - 1]) - hover[reference_distance[idx_x]][reference_distance[idx_y]] = hover_label_a - hover[reference_distance[idx_y]][reference_distance[idx_x]] = hover_label_b + for x in range(1, seq_length + 1): + for y in range(x + 5, seq_length + 1): + residues = (y, x) + predicted_bin = predicted_set[residues] if residues in predicted_set.keys() else 9 + reference_bin = reference_set[residues] if residues in reference_set.keys() else 9 + error = '{} Å'.format(difference_heatmap[x, y]) + map_a_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(reference_bin)) + map_b_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(predicted_bin)) + hover_label_a = HoverTemplates.DISTOGRAM_SUPERIMPOSE_VERBOSE.format(y, x, map_a_distance, + map_b_distance, error, + verbose_labels[y - 1], + verbose_labels[x - 1]) + hover_label_b = HoverTemplates.DISTOGRAM_SUPERIMPOSE_VERBOSE.format(x, y, map_a_distance, + map_b_distance, error, + verbose_labels[x - 1], + verbose_labels[y - 1]) + hover[x, y] = hover_label_a + hover[y, x] = hover_label_b else: - for reference_distance in reference_cmap: - if tuple(reference_distance[:2]) in predicted_set.keys(): - predicted_distance = predicted_set[tuple(reference_distance[:2])] - else: - predicted_distance = 9 - error = abs((9 - reference_distance[3]) - (9 - predicted_distance)) - heat[reference_distance[idx_x]][reference_distance[idx_y]] = error - heat[reference_distance[idx_y]][reference_distance[idx_x]] = error - map_a_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(reference_distance[3])) - map_b_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(predicted_distance)) - hover_label_a = HoverTemplates.DISTOGRAM_SUPERIMPOSE.format(reference_distance[idx_y], - reference_distance[idx_x], map_a_distance, - map_b_distance, error) - hover_label_b = HoverTemplates.DISTOGRAM_SUPERIMPOSE.format(reference_distance[idx_x], - reference_distance[idx_y], map_a_distance, - map_b_distance, error) - hover[reference_distance[idx_x]][reference_distance[idx_y]] = hover_label_a - hover[reference_distance[idx_y]][reference_distance[idx_x]] = hover_label_b - - return heat, hover + for x in range(1, seq_length + 1): + for y in range(x + 5, seq_length + 1): + residues = (y, x) + predicted_bin = predicted_set[residues] if residues in predicted_set.keys() else 9 + reference_bin = reference_set[residues] if residues in reference_set.keys() else 9 + error = '{} Å'.format(difference_heatmap[x, y]) + map_a_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(reference_bin)) + map_b_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(predicted_bin)) + hover_label_a = HoverTemplates.DISTOGRAM_SUPERIMPOSE.format(y, x, map_a_distance, + map_b_distance, error) + hover_label_b = HoverTemplates.DISTOGRAM_SUPERIMPOSE.format(x, y, map_a_distance, + map_b_distance, error) + hover[x, y] = hover_label_a + hover[y, x] = hover_label_b + + return difference_heatmap, hover def create_heatmap_trace(distances, colorscale, hovertext=None): diff --git a/utils/math_utils.py b/utils/math_utils.py new file mode 100644 index 0000000..973bc9e --- /dev/null +++ b/utils/math_utils.py @@ -0,0 +1,65 @@ +from conkit.misc.bandwidth import bandwidth_factory +import math +from numba import njit, vectorize +import numpy as np +from sklearn.neighbors import KernelDensity + + +@njit() +def calculate_mcc(tp, fp, tn, fn): + denominator = (tp + fp) * (tp + fn) * (tn + fp) * (tn + fn) + denominator = math.sqrt(denominator) + if denominator == 0: + return 1 + numerator = (tp * tn - fp * fn) * 10 + if numerator < 0: + return 10 + mcc = 10 - (numerator / denominator) + return mcc + + +@vectorize('float64(int64, int64)') +def get_difference(expected, observed): + difference = expected - observed + difference_squared = difference ** 2 + return difference_squared + + +@vectorize('float64(int64, float64)') +def populate_rmsd(seq_length, sum_squared_differences): + rmsd = math.sqrt(sum_squared_differences / seq_length) + if rmsd > 5: + return 5 + return rmsd + + +@njit() +def calculate_rmsd(expected_array, observed_array, seq_length): + squared_differences = get_difference(expected_array, observed_array) + seq_length_array = np.full(seq_length, seq_length) + sum_squared_differences = np.sum(squared_differences, axis=0) + rmsd = populate_rmsd(seq_length_array, sum_squared_differences) + return rmsd + + +def convolution_smooth_values(x, window=5): + box = np.ones(window) / window + x_smooth = np.convolve(x, box, mode='same') + return np.round(x_smooth, 0) + + +def cumsum_smooth(x, window=5): + cumsum_vec = np.cumsum(np.insert(x, 0, 0)) + return (cumsum_vec[window:] - cumsum_vec[:-window]) / window + + +def get_contact_density(contact_list, seq_length): + """Credits to Felix Simkovic; code taken from GitHub rigdenlab/conkit""" + x = np.array([i for c in contact_list for i in np.arange(c[1], c[0] + 1)], dtype=np.int64)[:, np.newaxis] + bw = bandwidth_factory('amise')(x).bw + kde = KernelDensity(bandwidth=bw).fit(x) + x_fit = np.arange(1, seq_length + 1)[:, np.newaxis] + density = np.exp(kde.score_samples(x_fit)).tolist() + density_max = max(density) + density = [int(round(float(i) / density_max, 1) * 10) for i in density] + return density diff --git a/utils/plot_utils.py b/utils/plot_utils.py index dddc2b6..7ceadfa 100644 --- a/utils/plot_utils.py +++ b/utils/plot_utils.py @@ -3,6 +3,7 @@ from dash.dash import no_update import dash_core_components as dcc from enum import Enum +import itertools import json from loaders import DatasetReference, AdditionalDatasetReference, STATES from layouts import ContextReference @@ -29,11 +30,10 @@ def create_ConPlot(session_id, cache, trigger, selected_tracks, cmap_selection, contact_marker_size=5, track_marker_size=5, track_separation=2, transparent=True, superimpose=False, heatmap=False, verbose_labels=False): session = cache.hgetall(session_id) - session, display_settings, verbose_labels, error = process_args(session_id, session, trigger, selected_tracks, - cmap_selection, factor, contact_marker_size, - track_separation, transparent, selected_palettes, - superimpose, track_marker_size, heatmap, - verbose_labels, cache) + session, display_settings, error = process_args(session_id, session, trigger, selected_tracks, cmap_selection, + factor, contact_marker_size, track_separation, transparent, + selected_palettes, superimpose, track_marker_size, heatmap, + verbose_labels, cache) if error is not None: return error @@ -41,8 +41,11 @@ def create_ConPlot(session_id, cache, trigger, selected_tracks, cmap_selection, display_card = get_display_control_card(display_settings) figure = create_figure(display_settings.axis_range) - add_contact_trace(session, display_settings, figure, verbose_labels) - add_additional_tracks(session_id, session, display_settings, figure, cache) + verbose_labels, additional_traces = add_additional_tracks(session_id, session, display_settings, figure, cache) + contact_traces = add_contact_trace(session, display_settings, figure, verbose_labels) + + figure.add_traces(contact_traces) + figure.add_traces(additional_traces) figure.update_xaxes(spikemode="across", showspikes=False) figure.update_yaxes(spikemode="across", showspikes=False) @@ -57,53 +60,66 @@ def create_ConPlot(session_id, cache, trigger, selected_tracks, cmap_selection, def add_additional_tracks(session_id, session, display_settings, figure, cache): + prediction_labels = {} + traces = [] for idx, fname in enumerate(display_settings.selected_tracks): - if fname == '---': + if fname == '--- Empty ---': continue - dataset, prediction = tracks_utils.retrieve_dataset_prediction(session_id, session, fname, display_settings, - cache) + dataset, prediction = tracks_utils.get_dataset_prediction(session_id, session, fname, display_settings, cache) + if display_settings.verbose_labels and fname not in prediction_labels: + prediction_labels[fname] = [STATES[dataset][x] for x in prediction] palette_idx = [x.name for x in color_palettes.DatasetColorPalettes].index(dataset) palette = display_settings.selected_palettes[palette_idx] if idx == 4: - traces = tracks_utils.get_diagonal_trace(prediction, dataset, display_settings.track_marker_size, - session[display_settings.seq_fname.encode()], - display_settings.alpha, palette) + traces += tracks_utils.get_diagonal_trace(prediction, dataset, display_settings.track_marker_size, + session[display_settings.seq_fname.encode()], + display_settings.alpha, palette) else: - traces = tracks_utils.get_traces(prediction, dataset, idx, display_settings.track_separation, - display_settings.track_marker_size, display_settings.alpha, palette) + traces += tracks_utils.get_traces(prediction, dataset, idx, display_settings.track_separation, + display_settings.track_marker_size, display_settings.alpha, palette) + + if display_settings.verbose_labels: + verbose_labels = [] + sequence = session[display_settings.seq_fname.encode()] + all_predictions = list(prediction_labels.values()) + label_template = '------
Residue {} ({})' + '
{}' * len(all_predictions) + for idx, residue_info in enumerate(zip(sequence, *all_predictions), 1): + verbose_labels.append(label_template.format(idx, *residue_info)) + + return verbose_labels, traces - for trace in traces: - figure.add_trace(trace) + return None, traces def add_contact_trace(session, display_settings, figure, verbose_labels): if display_settings.superimpose and display_settings.heatmap: heat, hover, colorscale = heatmap_utils.superimpose_heatmaps(session, display_settings, verbose_labels) - figure.add_trace(heatmap_utils.create_heatmap_trace(hovertext=hover, distances=heat, colorscale=colorscale)) + return heatmap_utils.create_heatmap_trace(hovertext=hover, distances=heat, colorscale=colorscale) elif display_settings.heatmap: heat, hover, colorscale = heatmap_utils.create_heatmap(session, display_settings, verbose_labels) - figure.add_trace(heatmap_utils.create_heatmap_trace(hovertext=hover, distances=heat, colorscale=colorscale)) + return heatmap_utils.create_heatmap_trace(hovertext=hover, distances=heat, colorscale=colorscale) elif display_settings.superimpose: reference_cmap = session[display_settings.cmap_selection[0].encode()] predicted_cmap = session[display_settings.cmap_selection[1].encode()] - traces = cmap_utils.create_superimposed_cmap(reference_cmap, predicted_cmap, display_settings, verbose_labels) - for trace in traces: - figure.add_trace(trace) + return cmap_utils.create_superimposed_cmap(reference_cmap, predicted_cmap, display_settings, verbose_labels) else: + traces = [] for idx, fname in enumerate(display_settings.cmap_selection): - if fname == '---': + if fname == '--- Empty ---': continue cmap = session[fname.encode()] size = display_settings.contact_marker_size x, y, hover = cmap_utils.create_cmap(cmap, idx, display_settings, verbose_labels) - figure.add_trace(cmap_utils.create_cmap_trace(x, y, 'circle', size, 'black', hover)) + traces.append(cmap_utils.create_cmap_trace(x, y, 'circle', size, 'black', hover)) + + return traces def get_display_control_card(display_settings): @@ -152,14 +168,14 @@ def lookup_input_errors(session_id, session, cmap_selection, superimpose, heatma error = components.PlotPlaceHolder(), \ components.MissingInputModal(*[missing.name for missing in missing_data]), \ components.DisplayControlCard(), True - return None, None, None, error + return None, None, error if superimpose and heatmap: reference_cmap = session[cmap_selection[0].encode()] predicted_cmap = session[cmap_selection[1].encode()] - error = no_update, components.InvalidSuperposeHeatmapModal(), no_update, no_update - if not isinstance(reference_cmap[-1], str) or not isinstance(predicted_cmap[-1], str): - return None, None, None, error + if not isinstance(reference_cmap[0], str) or not isinstance(predicted_cmap[0], str): + error = no_update, components.InvalidSuperposeHeatmapModal(), no_update, no_update + return None, None, error return None @@ -209,41 +225,93 @@ def process_args(session_id, session, trigger, selected_tracks, cmap_selection, cmap_selection=cmap_selection, available_cmaps=available_cmaps, heatmap=heatmap, verbose_labels=verbose_labels) - if verbose_labels: - fnames = [fname for fname in selected_tracks if fname != '---'] - verbose_labels = get_verbose_labels(session_id, session, fnames, display_settings, cache) - else: - verbose_labels = None + return session, display_settings, None + - return session, display_settings, verbose_labels, None +def separate_pdb_cmaps(session, cmap_fname_list): + non_pdb_fnames = [] + pdb_fnames = [] + + for fname in cmap_fname_list: + cmap = session[fname.encode()] + if cmap[0] == 'PDB': + pdb_fnames.append(fname) + else: + non_pdb_fnames.append(fname) + + return pdb_fnames, non_pdb_fnames def get_available_data(session): - available_tracks = [] - for dataset in AdditionalDatasetReference: - if dataset.value.encode() in session.keys() and session[dataset.value.encode()]: - available_tracks += session[dataset.value.encode()] + available_tracks = [{'label': '--- Empty ---', 'value': 'Empty_1'}, + {'label': '--- Seq. Hydrophobicity ---', 'value': 'Hydrophobicity_Header', 'disabled': True}, + {'label': session[DatasetReference.SEQUENCE.value.encode()], + 'value': session[DatasetReference.SEQUENCE.value.encode()]}, + {'label': '--- Contact Density ---', 'value': 'Density_Header', 'disabled': True}] + + available_cmaps, cmap_fname_list, cmap_density = get_cmap_density_tracks(session) + + if not cmap_fname_list: + available_tracks.append({'label': '--- Empty ---', 'value': 'Empty_2'}) + available_tracks.append({'label': '--- Contact Diff ---', 'value': 'Diff_Header', 'disabled': True}) + available_tracks.append({'label': '--- Empty ---', 'value': 'Empty_3'}) + else: + available_tracks += sorted(cmap_density, key=lambda k: k['label']) + available_tracks.append({'label': '--- Contact Diff ---', 'value': 'Diff_Header', 'disabled': True}) + cmap_diff = get_cmap_diff_tracks(cmap_fname_list) + if not cmap_diff: + available_tracks.append({'label': '--- Empty ---', 'value': 'Empty_3'}) + else: + available_tracks += sorted(cmap_diff, key=lambda k: k['label']) + + available_tracks.append({'label': '--- Other Tracks ---', 'value': 'AdditionalTracks_Header', 'disabled': True}) + other_tracks = get_other_tracks(session) + if not other_tracks: + available_tracks.append({'label': '--- Empty ---', 'value': 'Empty_4'}) + else: + available_tracks += sorted(other_tracks, key=lambda k: k['label']) + return available_tracks, sorted(available_cmaps) + + +def get_cmap_density_tracks(session): + cmap_density = [] available_cmaps = [] - for cmap_fname in session[DatasetReference.CONTACT_MAP.value.encode()]: + cmap_fname_list = session[DatasetReference.CONTACT_MAP.value.encode()] + for cmap_fname in cmap_fname_list: available_cmaps.append(cmap_fname) - available_tracks.append(cmap_fname) + cmap_density.append({'label': cmap_fname, 'value': cmap_fname}) + return available_cmaps, cmap_fname_list, cmap_density - available_tracks.append(session[DatasetReference.SEQUENCE.value.encode()]) - return available_tracks, available_cmaps +def get_cmap_diff_tracks(cmap_fname_list): + cmap_diff = [] + for combination in itertools.combinations(cmap_fname_list, 2): + label = '{} | {}'.format(*combination) + cmap_diff.append({'label': label, 'value': label}) + return cmap_diff + + +def get_other_tracks(session): + other_tracks = [] + for dataset in AdditionalDatasetReference: + if dataset.value.encode() in session.keys() and session[dataset.value.encode()]: + for fname in session[dataset.value.encode()]: + other_tracks.append({'label': fname, 'value': fname}) + return other_tracks def get_user_selection(cmap_selection, available_cmaps, track_selection, available_tracks): if len(cmap_selection) == 0: - cmap_selection = ['---'] * 2 + cmap_selection = ['--- Empty ---'] * 2 else: - cmap_selection = [fname if fname in available_cmaps else '---' for fname in cmap_selection] + cmap_selection = [fname if fname in available_cmaps else '--- Empty ---' for fname in cmap_selection] if len(track_selection) == 0: - track_selection = ['---'] * 9 + track_selection = ['--- Empty ---'] * 9 else: - track_selection = [track if track in available_tracks else '---' for track in track_selection] + available_track_labels = [track['label'] for track in available_tracks] + track_selection = [track if track in available_track_labels else '--- Empty ---' for track in track_selection] return track_selection, cmap_selection @@ -258,9 +326,9 @@ def get_default_layout(session): tracks.append(session[dataset.value][0]) if not any(tracks): - return ['---'] * 9, (cmap_fname, cmap_fname), selected_palettes + return ['--- Empty ---'] * 9, (cmap_fname, cmap_fname), selected_palettes else: - missing_tracks = ['---' for missing in range(0, 5 - len(tracks))] + missing_tracks = ['--- Empty ---' for missing in range(0, 5 - len(tracks))] tracks += missing_tracks return tracks[1:][::-1] + tracks, (cmap_fname, cmap_fname), selected_palettes @@ -278,23 +346,3 @@ def create_figure(axis_range): plot_bgcolor='rgba(0,0,0,0)' ) ) - - -def get_verbose_labels(session_id, session, fnames, display_settings, cache): - sequence = session[display_settings.seq_fname.encode()] - all_predictions = [] - for fname in set(fnames): - dataset, prediction = tracks_utils.retrieve_dataset_prediction(session_id, session, fname, - display_settings, cache) - dataset_dict = STATES[dataset] - prediction = [dataset_dict[x] for x in prediction] - all_predictions.append(prediction) - - labels = [] - for idx, residue in enumerate(sequence, 1): - current_label = '------
Residue {} ({})'.format(idx, residue) - for prediction in all_predictions: - current_label += '
{}'.format(prediction[idx - 1]) - labels.append(current_label) - - return labels diff --git a/utils/tests/test_cache_utils.py b/utils/tests/test_cache_utils.py index 626a04c..89e75c0 100644 --- a/utils/tests/test_cache_utils.py +++ b/utils/tests/test_cache_utils.py @@ -78,15 +78,27 @@ def test_8(self): self.assertDictEqual(expected, self.cache.hgetall(self.session_id)) def test_9(self): - cachekey_1 = 'fname_1_{}_2'.format(cache_utils.CacheKeys.METADATA_TAG.value).encode() + cachekey_1 = cache_utils.CacheKeys.CMAP_DENSITY.value.format('fname_1', '2').encode() density_1 = [1, 2, 3, 3, 4, 5] - cachekey_2 = 'fname_2_{}_2'.format(cache_utils.CacheKeys.METADATA_TAG.value).encode() + cachekey_2 = cache_utils.CacheKeys.CMAP_DENSITY.value.format('fname_2', '2').encode() density_2 = [5, 6, 7, 8, 9, 0] - cache_utils.store_density(self.session_id, cachekey_1, density_1, self.cache) - cache_utils.store_density(self.session_id, cachekey_2, density_2, self.cache) - output = cache_utils.retrieve_density(self.session_id, cachekey_2, self.cache) + cache_utils.store_data(self.session_id, cachekey_1, density_1, + cache_utils.CacheKeys.CONTACT_DENSITY.value, self.cache) + cache_utils.store_data(self.session_id, cachekey_2, density_2, + cache_utils.CacheKeys.CONTACT_DENSITY.value, self.cache) + output = cache_utils.retrieve_data(self.session_id, cachekey_2, self.cache) self.assertListEqual(output, density_2) expected_cache = {b'id': cache_utils.compress_data(self.session_id)} - cache_utils.remove_all_density(self.session_id, self.cache) + cache_utils.remove_all(self.session_id, cache_utils.CacheKeys.CONTACT_DENSITY.value, self.cache) self.assertDictEqual(expected_cache, self.cache.hgetall(self.session_id)) + + def test_10(self): + self.assertTrue(cache_utils.is_valid_fname('fname_1')) + self.assertTrue(cache_utils.is_valid_fname('fname_1-METADATA-DENSITY')) + self.assertFalse(cache_utils.is_valid_fname('fname_CONPLOT-INTERNAL-USE-ONLY-METADATA_1')) + self.assertFalse(cache_utils.is_valid_fname('{}_CONPLOT-INTERNAL-USE-ONLY-METADATA_{}')) + self.assertFalse(cache_utils.is_valid_fname('fname - density')) + self.assertFalse(cache_utils.is_valid_fname('seq - hydrophobicity')) + self.assertFalse(cache_utils.is_valid_fname(cache_utils.CacheKeys.CMAP_DENSITY.value.format('fname_1', '2'))) + diff --git a/utils/tests/test_heatmap_utils.py b/utils/tests/test_heatmap_utils.py new file mode 100644 index 0000000..8418d1d --- /dev/null +++ b/utils/tests/test_heatmap_utils.py @@ -0,0 +1,12 @@ +import unittest +from utils import heatmap_utils + + +class HeatmapUtilsTestCase(unittest.TestCase): + + def test_1(self): + expected_heat = [[0, 0, 0], [0, 0, 0], [0, 0, 0]] + expected_hover = [[None, None, None], [None, None, None], [None, None, None]] + heat, hover = heatmap_utils.init_heatmap(2) + self.assertListEqual(expected_hover, hover.tolist()) + self.assertListEqual(expected_heat, heat.tolist()) diff --git a/utils/tests/test_math_utils.py b/utils/tests/test_math_utils.py new file mode 100644 index 0000000..66c12fa --- /dev/null +++ b/utils/tests/test_math_utils.py @@ -0,0 +1,35 @@ +import os +import unittest +from utils import math_utils + + +class MathUtilsTestCase(unittest.TestCase): + + @unittest.skipIf('THIS_IS_GH_ACTIONS' in os.environ, "not implemented in Github Actions") + def test_1(self): + dummy_cmap = [(52, 50), (53, 51), (145, 143), (142, 140), (150, 148), (53, 50), (147, 145), (141, 139), + (143, 141), (148, 146)] + expected_density = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 7, 10, 10, 7, 4, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 5, 8, 10, 9, 8, 8, 8, 8, 8, 7, 6, 4, 2, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + + density = math_utils.get_contact_density(dummy_cmap, 168) + self.assertListEqual(density, expected_density) + + def test_2(self): + expected_output = 3.0210772833723656 + output = math_utils.calculate_mcc(5, 2, 120, 2) + self.assertEqual(output, expected_output) + + def test_3(self): + expected_output = 1 + output = math_utils.calculate_mcc(0, 0, 120, 2) + self.assertEqual(output, expected_output) + + def test_4(self): + expected_output = 10 + output = math_utils.calculate_mcc(12, 1, 0, 2) + self.assertEqual(output, expected_output) diff --git a/utils/tests/test_tracks_utils.py b/utils/tests/test_tracks_utils.py index c03f479..c14cdca 100644 --- a/utils/tests/test_tracks_utils.py +++ b/utils/tests/test_tracks_utils.py @@ -1,18 +1,107 @@ +import os import unittest from utils import tracks_utils +from collections import namedtuple +DisplayControlSettings = namedtuple('DisplayControlSettings', ('factor', 'seq_length')) -class SessionUtilsTestCase(unittest.TestCase): +class TrackUtilsTestCase(unittest.TestCase): + + @unittest.skipIf('THIS_IS_GH_ACTIONS' in os.environ, "not implemented in Github Actions") def test_1(self): dummy_cmap = [(52, 50), (53, 51), (145, 143), (142, 140), (150, 148), (53, 50), (147, 145), (141, 139), (143, 141), (148, 146)] expected_density = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 5, 7, 8, 8, 7, 5, 3, 2, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 7, 10, 10, 7, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 3, 4, 6, 8, 9, 10, 10, 10, 10, 9, 9, 8, 6, 5, 3, 2, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 4, 6, 7, 6, 5, 6, 6, 5, 5, 4, 4, 3, 2, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - density = tracks_utils.get_contact_density(dummy_cmap, 168) + density = tracks_utils.calculate_density(dummy_cmap, 168, 20) self.assertListEqual(density, expected_density) + + def test_2(self): + """ + cmap_1 + 1 1 0 1 + 1 0 1 0 + 0 1 0 1 + 1 0 1 1 + cmap_2 + 0 1 0 1 + 1 1 1 0 + 1 1 1 1 + 1 1 1 0 + """ + dummy_cmap_1 = [(1, 1), (3, 1), (4, 1), (2, 2), (4, 2), (3, 3), (4, 4)] + dummy_cmap_2 = [(1, 1), (3, 1), (2, 1), (2, 2), (4, 2), (3, 2), (3, 3), (4, 4)] + expected_mcc = [10, 1, 4, 4] + expected_mcc_smooth = [2, 3, 4, 4, 2] + dummy_display_settings = DisplayControlSettings(factor=0, seq_length=4) + diff = tracks_utils.calculate_diff(dummy_cmap_1, dummy_cmap_2, dummy_display_settings) + mcc = tracks_utils.get_cmap_mcc(dummy_cmap_1, dummy_cmap_2, dummy_display_settings.seq_length, smooth=False) + mcc_smooth = tracks_utils.get_cmap_mcc(dummy_cmap_1, dummy_cmap_2, dummy_display_settings.seq_length) + self.assertListEqual(mcc, expected_mcc) + self.assertListEqual(mcc_smooth, expected_mcc_smooth) + self.assertListEqual(mcc_smooth, diff) + + def test_3(self): + """ + cmap_1 + 1 1 0 1 + 1 0 1 0 + 0 1 0 1 + 1 0 1 1 + cmap_2 + 0 1 0 1 + 1 1 1 0 + 1 1 1 1 + 1 1 1 0 + """ + dummy_cmap_1 = [(1, 1), (3, 1), (4, 1), (2, 2), (4, 2), (3, 3), (4, 4)] + dummy_cmap_2 = [(1, 1), (3, 1), (2, 1), (2, 2), (4, 2), (3, 2), (3, 3), (4, 4)] + expected_diff = [3, 3, 3, 3, 1] + dummy_display_settings = DisplayControlSettings(factor=1, seq_length=4) + + diff = tracks_utils.calculate_diff(dummy_cmap_1, dummy_cmap_2, dummy_display_settings) + self.assertListEqual(diff, expected_diff) + + def test_4(self): + """ + cmap_1 + 2 8 9 0 + 5 0 0 9 + 7 0 0 8 + 0 7 5 2 + cmap_2 + 9 6 0 0 + 5 1 0 0 + 5 0 1 6 + 0 5 5 9 + """ + cmap_1 = [ + [2, 1, 0, 7], + [3, 1, 0, 5], + [4, 1, 0, 2], + [4, 2, 0, 8], + [3, 2, 0, 0], + [4, 3, 0, 9] + ] + + cmap_2 = [ + [2, 1, 0, 5], + [3, 1, 0, 5], + [4, 1, 0, 9], + [3, 2, 0, 1], + [4, 2, 0, 6], + [4, 3, 0, 0] + ] + + expected = [10, 8, 10, 10] + expected_smooth = [4, 6, 8, 8, 6] + output = tracks_utils.get_cmap_rmsd(cmap_1, cmap_2, 4, smooth=False) + output_smooth = tracks_utils.get_cmap_rmsd(cmap_1, cmap_2, 4, smooth=True) + self.assertListEqual(output, expected) + self.assertListEqual(output_smooth, expected_smooth) diff --git a/utils/tracks_utils.py b/utils/tracks_utils.py index 7036036..a5ad32f 100644 --- a/utils/tracks_utils.py +++ b/utils/tracks_utils.py @@ -1,59 +1,123 @@ -from parsers import DatasetStates -from loaders import AdditionalDatasetReference, DatasetReference -from utils import create_cmap_trace, color_palettes, cache_utils -from sklearn.cluster import estimate_bandwidth -from sklearn.neighbors import KernelDensity import numpy as np +from loaders import AdditionalDatasetReference, DatasetReference +from parsers import DatasetStates +from utils import create_cmap_trace, color_palettes, cache_utils, lookup_data, cmap_utils, math_utils def calculate_density(cmap, seq_length, factor): - if cmap[-1] == 'PDB' or cmap[-1] == 'DISTO': - cmap.pop(-1) - contact_list = cmap[:int(round(seq_length / factor, 0))] - return get_contact_density(contact_list, seq_length) + contact_list = cmap_utils.slice_cmap(cmap, seq_length, factor) + return math_utils.get_contact_density(contact_list, seq_length) + + +DISTANCE_BINS = {0: 0, 1: 5, 2: 7, 3: 9, 4: 11, 5: 13, 6: 15, 7: 17, 8: 19, 9: 20} + + +def get_distance_array(cmap, seq_length): + array = np.full((seq_length, seq_length), 20) + for contact in cmap: + array[seq_length - contact[0], contact[1] - 1] = DISTANCE_BINS[contact[3]] + array[seq_length - contact[1], contact[0] - 1] = DISTANCE_BINS[contact[3]] + return array + + +def get_cmap_mcc(cmap_1, cmap_2, size, smooth=True): + cmap_1_set = {resn: {(c[0], c[1]) for c in cmap_1 if resn in (c[0], c[1])} for resn in range(1, size + 1)} + cmap_2_set = {resn: {(c[0], c[1]) for c in cmap_2 if resn in (c[0], c[1])} for resn in range(1, size + 1)} + diff = [] + for resn in cmap_1_set.keys(): + tp = len(cmap_1_set[resn] & cmap_2_set[resn]) + fp = len(cmap_2_set[resn] - cmap_1_set[resn]) + fn = len(cmap_1_set[resn] - cmap_2_set[resn]) + tn = size - sum((tp, fp, fn)) + mcc = math_utils.calculate_mcc(tp, fp, tn, fn) + diff.append(mcc) + + if smooth: + return math_utils.convolution_smooth_values(diff).astype(int).tolist() + + return [int(round(mcc, 0)) for mcc in diff] + + +def get_cmap_rmsd(cmap_1, cmap_2, seq_length, smooth=True): + cmap_1_array = get_distance_array(cmap_1, seq_length) + cmap_2_array = get_distance_array(cmap_2, seq_length) + rmsd = math_utils.calculate_rmsd(cmap_1_array, cmap_2_array, seq_length) + if smooth: + rmsd = math_utils.convolution_smooth_values(rmsd) * 2 + return rmsd.astype(int).tolist() + else: + rmsd = np.round(rmsd, 0) * 2 + return rmsd.astype(int).tolist() + + +def calculate_diff(cmap_1, cmap_2, display_settings): + if cmap_utils.contains_distances(cmap_1) and cmap_utils.contains_distances(cmap_2): + return get_cmap_rmsd(cmap_1[1:], cmap_2[1:], display_settings.seq_length) + else: + cmap_1 = cmap_utils.slice_cmap(cmap_1, display_settings.seq_length, display_settings.factor) + cmap_2 = cmap_utils.slice_cmap(cmap_2, display_settings.seq_length, display_settings.factor) + return get_cmap_mcc(cmap_1, cmap_2, display_settings.seq_length) + + +def get_diff_args(session, fname, factor): + cmap_1_fname = fname.split('|')[0].rstrip().lstrip() + cmap_1 = session[cmap_1_fname.encode()] + cmap_2_fname = fname.split('|')[1].rstrip().lstrip() + cmap_2 = session[cmap_2_fname.encode()] + if cmap_utils.contains_distances(cmap_1) and cmap_utils.contains_distances(cmap_2): + cachekey = cache_utils.CacheKeys.CMAP_DIFF.value.format(cmap_1_fname, cmap_2_fname, '1').encode() + else: + cachekey = cache_utils.CacheKeys.CMAP_DIFF.value.format(cmap_1_fname, cmap_2_fname, factor).encode() -def retrieve_dataset_prediction(session_id, session, fname, display_settings, cache): + return cmap_1, cmap_2, cachekey + + +def get_dataset_prediction(session_id, session, fname, display_settings, cache): if fname == session[DatasetReference.SEQUENCE.value.encode()]: return DatasetReference.HYDROPHOBICITY.value, session[DatasetReference.HYDROPHOBICITY.value.encode()] if fname in session[DatasetReference.CONTACT_MAP.value.encode()]: - cachekey = '{}_{}_{}'.format(fname, cache_utils.CacheKeys.METADATA_TAG.value, display_settings.factor).encode() - if cachekey in session.keys(): - density = session[cachekey] - elif cache.hexists(session_id, cachekey): - density = cache_utils.retrieve_density(session_id, cachekey, cache) - else: + cachekey = cache_utils.get_cachekey(session, fname, display_settings.factor) + density = lookup_data(session, session_id, cachekey, cache) + if not density: density = calculate_density(session[fname.encode()], display_settings.seq_length, display_settings.factor) - cache_utils.store_density(session_id, cachekey, density, cache) + cache_utils.store_data(session_id, cachekey, density, cache_utils.CacheKeys.CONTACT_DENSITY.value, cache) + return DatasetReference.CONTACT_DENSITY.value, density + if cache_utils.MetadataTags.SEPARATOR.value in fname: + cmap_1, cmap_2, cachekey = get_diff_args(session, fname, display_settings.factor) + diff = lookup_data(session, session_id, cachekey, cache) + if not diff: + diff = calculate_diff(cmap_1, cmap_2, display_settings) + cache_utils.store_data(session_id, cachekey, diff, cache_utils.CacheKeys.CONTACT_DIFF.value, cache) + return DatasetReference.CONTACT_DIFF.value, diff + for dataset in AdditionalDatasetReference: if dataset.value.encode() in session.keys() and fname in session[dataset.value.encode()]: return dataset.value, session[fname.encode()] -def transform_coords_diagonal_axis(coord, distance, low_bound=False, ratio=1, y_axis=True): - if coord is None: - return None +def transform_coords_diagonal_xaxis(indices, distance, track_idx, ratio=1): + factor = distance / (1 + ratio ** 2) + if track_idx < 4: + factor = factor * -1 + return [idx + factor for idx in indices] - if y_axis: - factor = ratio * (distance / (1 + ratio ** 2)) - if low_bound: - factor = factor * -1 - else: - factor = distance / (1 + ratio ** 2) - if not low_bound: - factor = factor * -1 - return coord + factor +def transform_coords_diagonal_yaxis(prediction, state, distance, track_idx, ratio=1): + factor = ratio * (distance / (1 + ratio ** 2)) + if track_idx > 4: + factor = factor * -1 + return [idx + factor if residue == state else None for idx, residue in enumerate(prediction, 1)] def get_diagonal_trace(prediction, dataset, marker_size, sequence, alpha, color_palette): if prediction is None: return None - x_diagonal = [idx for idx in range(1, len(prediction) + 1)] + x = [idx for idx in range(1, len(prediction) + 1)] states = DatasetStates.__getattr__(dataset).value palette = color_palettes.DatasetColorPalettes.__getattr__(dataset).value.__getattr__(color_palette).value traces = [] @@ -62,14 +126,9 @@ def get_diagonal_trace(prediction, dataset, marker_size, sequence, alpha, color_ y = [idx if residue == state.value else None for idx, residue in enumerate(prediction, 1)] if not any(y): continue - - hovertext = ['Residue: {} ({}) | {}'.format(sequence[idx - 1], idx, state.name) for idx in x_diagonal] - color = palette.__getattr__(state.name).value - color = color.format(alpha) - - traces.append( - create_cmap_trace(x_diagonal, y, 'diamond', marker_size=marker_size, color=color, hovertext=hovertext) - ) + hovertext = ['Residue: {} ({}) | {}'.format(resid, idx, state.name) for idx, resid in enumerate(sequence, 1)] + color = palette.__getattr__(state.name).value.format(alpha) + traces.append(create_cmap_trace(x, y, 'diamond', marker_size=marker_size, color=color, hovertext=hovertext)) return traces @@ -84,34 +143,16 @@ def get_traces(prediction, dataset, track_idx, track_separation, marker_size, al palette = color_palettes.DatasetColorPalettes.__getattr__(dataset).value.__getattr__(color_palette).value track_origin = abs(4 - track_idx) track_distance = track_separation * track_origin - if track_idx > 4: - low_bound = True - else: - low_bound = False + + x = transform_coords_diagonal_xaxis(x_diagonal, track_distance, track_idx) for state in states: - y_diagonal = [idx if residue == state.value else None for idx, residue in enumerate(prediction, 1)] - if not any(y_diagonal): + y = transform_coords_diagonal_yaxis(prediction, state.value, track_distance, track_idx) + if not any(y): continue - - y = [transform_coords_diagonal_axis(y, track_distance, low_bound=low_bound) for y in y_diagonal] - x = [transform_coords_diagonal_axis(x, track_distance, low_bound=low_bound, y_axis=False) for x in x_diagonal] - hovertext = ['%s' % state.name for idx in enumerate(x)] - color = palette.__getattr__(state.name).value - color = color.format(alpha) + hovertext = ['%s' % state.name for i in x] + color = palette.__getattr__(state.name).value.format(alpha) traces.append(create_cmap_trace(x, y, 'diamond', marker_size=marker_size, color=color, hovertext=hovertext)) return traces - - -def get_contact_density(contact_list, seq_length): - """Credits to Felix Simkovic; code taken from GitHub rigdenlab/conkit/core/contactmap.py""" - x = np.array([i for c in contact_list for i in np.arange(c[1], c[0] + 1)], dtype=np.int64)[:, np.newaxis] - bw = estimate_bandwidth(x) - kde = KernelDensity(bandwidth=bw).fit(x) - x_fit = np.arange(1, seq_length + 1)[:, np.newaxis] - density = np.exp(kde.score_samples(x_fit)).tolist() - density_max = max(density) - density = [int(round(float(i) / density_max, 1) * 10) for i in density] - return density