diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index d701079..65e3378 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -39,6 +39,7 @@ jobs:
redis-version: 5
- name: Run tests.py
env:
+ THIS_IS_GH_ACTIONS: 1
KEYDB_URL: $ {{ secrets.KEYDB_URL }}
run: |
python tests.py
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 9913c32..c18258b 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -2,6 +2,20 @@ Changelog
=========
+0.4.1
+-----
+
+Added
+~~~~~
+- Added contact diff track with smoothed values (MCC for contacts and RMSD for distograms)
+
+Changed
+~~~~~
+- Increased contrast in sequence hydrophobicity color palettes
+- Use AMISE to estimate bandwidth required to calculate contact density
+- Updated track selector layout
+
+
0.4
-----
diff --git a/README.md b/README.md
index 9ab07fb..72d7772 100644
--- a/README.md
+++ b/README.md
@@ -59,21 +59,25 @@ Once you have installed `redis`, you will need to start the service by running:
$ sudo service redis start
```
-You will also need to create a environment variable called `KEYDB_URL` with
-the URL to connect to the redis server you just started on your machine:
+Now you'll need to clone this repository, install the requirements and setup environment variables.
+Please note that ConPlot requires at least `python 3.6`.
```bash
-$ KEYDB_URL=redis://localhost:6379
+$ git clone https://github.com/rigdenlab/conplot
+$ cd conplot
+$ python3.6 -m pip install -r requirements.txt
+$ echo "KEYDB_URL=0://localhost:6379" > .env
+$ echo "KEYDB_TIMEOUT=3600" >> .env
```
-After this, all you need to do is clone this repository, install the requirements
-and start the Flask development server on your machine. Please note that ConPlot
-requires at least `python 3.6`.
+With the last two commands you will also have created an `.env` file with a variable named
+`KEYDB_URL` pointing to the redis server and a `KEYDB_TIMEOUT` variable with the session
+timeout value. This is the time at which a session expires after inactivity. By default in
+`www.conplot.org` this has a value of 3600 minutes, but if running locally you can set this
+time to any other value. The only thing left to do is to start the Flask development
+server on your machine:
```bash
-$ git clone https://github.com/rigdenlab/conplot
-$ cd conplot
-$ python3.6 -m pip install -r requirements.txt
$ python3.6 app.py
```
diff --git a/app.py b/app.py
index 88af642..7270a1a 100644
--- a/app.py
+++ b/app.py
@@ -24,7 +24,7 @@ def serve_layout():
except (keydb.ConnectionError, TypeError, KeyError) as e:
app.logger.error('Redis connection error! {}'.format(e))
return layouts.RedisConnectionError()
- session_id = session_utils.initiate_session(cache, app.logger)
+ session_id = session_utils.initiate_session(cache, app.logger, keydb_timeout)
return layouts.Base(session_id)
@@ -44,6 +44,7 @@ def serve_layout():
'requests_pathname_prefix': '/conplot/',
})
keydb_pool = keydb_utils.create_pool(os.environ.get('KEYDB_URL'))
+keydb_timeout = os.environ.get('KEYDB_TIMEOUT')
app.layout = serve_layout
@@ -407,7 +408,7 @@ def javascript_exe_button(n_clicks, session_id):
elif 'new-session' in trigger['prop_id'] or session_utils.is_expired_session(session_id, cache, app.logger):
cache = keydb.KeyDB(connection_pool=keydb_pool)
- new_session_id = session_utils.initiate_session(cache, app.logger)
+ new_session_id = session_utils.initiate_session(cache, app.logger, keydb_timeout)
return "location.reload();", no_update, new_session_id
else:
@@ -454,7 +455,7 @@ def create_ConPlot(plot_click, refresh_click, factor, contact_marker_size, track
if any([True for x in (factor, contact_marker_size, track_marker_size, track_separation) if x is None or x < 0]):
app.logger.info('Session {} invalid display control value detected'.format(session_id))
return no_update, components.InvalidInputModal(), no_update, no_update
- elif superimpose and ('---' in cmap_selection or len(set(cmap_selection)) == 1):
+ elif superimpose and ('--- Empty ---' in cmap_selection or len(set(cmap_selection)) == 1):
return no_update, components.InvalidMapSelectionModal(), no_update, no_update
app.logger.info('Session {} creating conplot'.format(session_id))
diff --git a/components/__init__.py b/components/__init__.py
index ef69f27..b4c52cf 100644
--- a/components/__init__.py
+++ b/components/__init__.py
@@ -11,6 +11,7 @@ class UserReadableTrackNames(Enum):
heatmap = 'Heatmap'
hydrophobicity = 'Hydrophobicity'
density = 'Contact density'
+ diff = 'Contact diff'
class EmailIssueReference(Enum):
diff --git a/components/cards.py b/components/cards.py
index 0e7d141..0dc0b0d 100644
--- a/components/cards.py
+++ b/components/cards.py
@@ -1,3 +1,4 @@
+from utils import cache_utils
import components
import dash_core_components as dcc
import dash_bootstrap_components as dbc
@@ -286,23 +287,23 @@ def DisplayControlCard(available_tracks=None, selected_tracks=None, selected_cma
html.H5("Active tracks", className="card-text", style={'text-align': "center"}),
html.Hr(),
html.Br(),
- TrackSelectionCard('-4', selected_tracks[0], available_tracks=available_tracks),
+ dbc.Card(components.TrackLayoutSelector('-4', available_tracks, selected_tracks[0]), outline=False),
html.Br(),
- TrackSelectionCard('-3', selected_tracks[1], available_tracks=available_tracks),
+ dbc.Card(components.TrackLayoutSelector('-3', available_tracks, selected_tracks[1]), outline=False),
html.Br(),
- TrackSelectionCard('-2', selected_tracks[2], available_tracks=available_tracks),
+ dbc.Card(components.TrackLayoutSelector('-2', available_tracks, selected_tracks[2]), outline=False),
html.Br(),
- TrackSelectionCard('-1', selected_tracks[3], available_tracks=available_tracks),
+ dbc.Card(components.TrackLayoutSelector('-1', available_tracks, selected_tracks[3]), outline=False),
html.Br(),
- TrackSelectionCard(' 0', selected_tracks[4], available_tracks=available_tracks),
+ dbc.Card(components.TrackLayoutSelector('0', available_tracks, selected_tracks[4]), outline=False),
html.Br(),
- TrackSelectionCard('+1', selected_tracks[5], available_tracks=available_tracks),
+ dbc.Card(components.TrackLayoutSelector('+1', available_tracks, selected_tracks[5]), outline=False),
html.Br(),
- TrackSelectionCard('+2', selected_tracks[6], available_tracks=available_tracks),
+ dbc.Card(components.TrackLayoutSelector('+2', available_tracks, selected_tracks[6]), outline=False),
html.Br(),
- TrackSelectionCard('+3', selected_tracks[7], available_tracks=available_tracks),
+ dbc.Card(components.TrackLayoutSelector('+3', available_tracks, selected_tracks[7]), outline=False),
html.Br(),
- TrackSelectionCard('+4', selected_tracks[8], available_tracks=available_tracks),
+ dbc.Card(components.TrackLayoutSelector('+4', available_tracks, selected_tracks[8]), outline=False),
html.Br(),
html.Br(),
html.H5("Colour palettes", className="card-text", style={'text-align': "center"}),
@@ -310,21 +311,23 @@ def DisplayControlCard(available_tracks=None, selected_tracks=None, selected_cma
html.Br(),
ColorPaletteSelectionCard('density', selected_palettes[0]),
html.Br(),
- ColorPaletteSelectionCard('custom', selected_palettes[1]),
+ ColorPaletteSelectionCard('diff', selected_palettes[1]),
html.Br(),
- ColorPaletteSelectionCard('heatmap', selected_palettes[2]),
+ ColorPaletteSelectionCard('custom', selected_palettes[2]),
html.Br(),
- ColorPaletteSelectionCard('hydrophobicity', selected_palettes[3]),
+ ColorPaletteSelectionCard('heatmap', selected_palettes[3]),
html.Br(),
- ColorPaletteSelectionCard('membranetopology', selected_palettes[4]),
+ ColorPaletteSelectionCard('hydrophobicity', selected_palettes[4]),
html.Br(),
- ColorPaletteSelectionCard('msa', selected_palettes[5]),
+ ColorPaletteSelectionCard('membranetopology', selected_palettes[5]),
html.Br(),
- ColorPaletteSelectionCard('conservation', selected_palettes[6]),
+ ColorPaletteSelectionCard('msa', selected_palettes[6]),
html.Br(),
- ColorPaletteSelectionCard('disorder', selected_palettes[7]),
+ ColorPaletteSelectionCard('conservation', selected_palettes[7]),
html.Br(),
- ColorPaletteSelectionCard('secondarystructure', selected_palettes[8]),
+ ColorPaletteSelectionCard('disorder', selected_palettes[8]),
+ html.Br(),
+ ColorPaletteSelectionCard('secondarystructure', selected_palettes[9]),
html.Br(),
])
]
@@ -334,14 +337,6 @@ def DisplayControlCard(available_tracks=None, selected_tracks=None, selected_cma
else:
raise ValueError('This should not occur! Please report.')
-
-def TrackSelectionCard(track_idx, track_value, available_tracks):
- track_options = [{'label': '---', 'value': '---'}]
- track_options += [{'label': fname, 'value': fname} for fname in available_tracks]
-
- return dbc.Card(components.TrackLayoutSelector(track_idx, track_options, track_value), outline=False)
-
-
def ColorPaletteSelectionCard(dataset, selected_palette):
available_palettes = []
for palette in color_palettes.DatasetColorPalettes.__getattr__(dataset).value:
@@ -351,7 +346,7 @@ def ColorPaletteSelectionCard(dataset, selected_palette):
def HalfSquareSelectionCard(square_idx, selection, available_cmaps):
- cmap_options = [{'label': '---', 'value': '---'}]
+ cmap_options = [{'label': '--- Empty ---', 'value': '--- Empty ---'}]
cmap_options += [{'label': fname, 'value': fname} for fname in available_cmaps]
return dbc.Card(components.HalfSquareSelector(square_idx, cmap_options, selection), outline=False)
diff --git a/components/listgrpoups.py b/components/listgrpoups.py
index cbea080..853b0d8 100644
--- a/components/listgrpoups.py
+++ b/components/listgrpoups.py
@@ -191,48 +191,38 @@ def AdjustPlotHelpList():
'series of input menus:',
html.Ul([
html.Li(['L/N selector: Change the values of ', html.I('N'),
- ' with this selector to choose how many contacts should be '
- 'included in the plot (L is the number of residues in the '
- 'protein sequence, residues are sorted by their probability '
- 'score). If you set ', html.I('N'),
- ' to 0, then all contacts in the file will be displayed. Please '
- 'note that only numerical values between 0 and 10 are recommended.']),
- html.Li('Size selector: Change the size of the contact markers in the '
- 'plot. ConPlot will set a default value depending on the size of '
- 'the protein you are working with, but you can still change this '
- 'if you would like to make the markers smaller or bigger. Please '
+ ' with this selector to choose how many contacts should be included in the plot (L is '
+ 'the number of residues in the protein sequence, residues are sorted by their '
+ 'probability score). If you set ', html.I('N'),
+ ' to 0, then all contacts in the file will be displayed. Please note that only numerical '
+ 'values between 0 and 10 are recommended. Additionally, please remember that contact '
+ 'data shown for PDB files is unaltered by this selector.']),
+ html.Li('Size selector: Change the size of the contact markers in the plot. ConPlot will set a '
+ 'default value depending on the size of the protein you are working with, but you can '
+ 'still change this if you would like to make the markers smaller or bigger. Please '
'note that only numerical values between 1 and 15 are recommended.'),
- html.Li(['Map A and Map B selectors: These two selectors let you choose '
- 'which contact data should be displayed on the plot. By '
- 'default, ', html.I('Map A'),
- ' refers to the top half triangle of the map, and ',
- html.I('Map B'), ' to the lower one. If the ',
- html.I('Superimpose Maps'),
- ' switch is activated, then the roles of these two dropdown '
- 'menus change: ', html.I('Map A'),
- ' is now used to select the reference map, which will be '
- 'compared with the secondary map selected with the ',
- html.I('Map B'), ' selector.']),
- html.Li(['Superimpose Maps Switch: As explained above, if this switch '
- 'is activated ', html.I('Map A'),
- ' will be used as a reference map to be compared with ',
- html.I('Map B'),
- '. In this mode, contacts will be coloured according to their '
- 'presence in the reference map and the secondary map. Contacts '
- 'that appear on both the reference and the secondary map will be '
- 'coloured in black -match-, those that only appear in the '
- 'reference in grey -absent-, and those that only appear in the '
- 'secondary map in red -mismatch-. Please note that you can only '
- 'use this mode if you select two different contact map files in ',
+ html.Li(['Map A and Map B selectors: These two selectors let you choose which contact data should '
+ 'be displayed on the plot. By ' 'default, ', html.I('Map A'),
+ ' refers to the top half triangle of the map, and ', html.I('Map B'),
+ ' to the lower one. If the ', html.I('Superimpose Maps'),
+ ' switch is activated, then these roles change: ', html.I('Map A'),
+ ' is now used to select the reference map, which will be compared with the secondary map '
+ 'selected with the ', html.I('Map B'), ' selector.']),
+ html.Li(['Superimpose Maps Switch: As explained above, if this switch is activated ',
+ html.I('Map A'), ' will be used as a reference map to be compared with ', html.I('Map B'),
+ '. In this mode, contacts will be coloured according to their presence in the reference '
+ 'map and the secondary map. Contacts that appear on both the reference and the secondary '
+ 'map will be coloured in black -match-, those that only appear in the reference in grey '
+ '-absent-, and those that only appear in the secondary map in red -mismatch-. Please '
+ 'note that you can only use this mode if you select two different contact map files in ',
html.I('Map A'), ' and ', html.I('Map B'), ' selectors.']),
html.Li(['Create Heatmap Switch: If this switch is activated, a heatmap will be created with the '
'provided residue contact information. By default, if a contact map is uploaded, the '
'intensity of the colours in this heatmap will correspond with the confidence of each '
- 'contact. Alternatively, if a residue-residue distance prediction file has been uploaded '
- '(', html.I('CASPRR_MODE2'),
- ' format), the heatmap will correspond with the predicted distances for '
- 'each residue pair oin this file. Please note that when this mode is active, the ',
- html.I('L/N'), ' selector and the ', html.I('Size'),
+ 'contact. Alternatively, if a residue-residue distance prediction file has been '
+ 'uploaded, the heatmap will correspond with the predicted distances for each residue '
+ 'pair oin this file. Please note that when this mode is active, the ', html.I('L/N'),
+ ' selector and the ', html.I('Size'),
' selector will be disabled. You can read more about how to visualise residue-residue '
'distance predictions at ',
html.I('Tutorial 4. Residue-Residue distance predictions'), '.']),
@@ -242,9 +232,8 @@ def AdjustPlotHelpList():
'would normally be displayed.')
])],
style={"font-size": "110%", 'text-align': "justify"}),
- html.Li(['Section 2: Adjust additional tracks. In this section you will find selectors '
- 'that will let you control aspects about how the additional tracks are being '
- 'displayed in the plot:',
+ html.Li(['Section 2: Adjust additional tracks. In this section you will find selectors that will let you '
+ 'control aspects about how the additional tracks are being displayed in the plot:',
html.Ul([
html.Li('Size selector: Change the size of the tiles used to create the '
'tracks on the diagonal of the plot. By changing this value, '
@@ -326,6 +315,15 @@ def AdditionalFormatsHelpList():
html.A(html.U('here'), href=UrlIndex.CONSURF_CITATION.value),
'.'],
style={"font-size": "110%", 'text-align': "justify"}),
+ html.Li(['A3M file. This is a multiple sequence alignment file that should have been obtained using the '
+ 'sequence of interest as a query. ConPlot will parse the file and calculate the MSA coverage along '
+ 'the query sequence, normalise these values (1-10) and create a track where each residue '
+ 'is coloured according to the number of sequences aligned in that particular position These '
+ 'files are used in most contact prediction pipelines, and visualising the MSA coverage can help you '
+ 'understand the quality of the information used to obtain your predictions. Several alignment tools '
+ 'will create MSA files in this format, like for example HHBLITS, which you can use '
+ 'online ', html.A(html.U('here'), href=UrlIndex.HHBLITS_URL.value), '.'],
+ style={"font-size": "110%", 'text-align': "justify"}),
html.Li(['CUSTOM file. These files are plain text files that can be created manually '
'by users to include additional tracks of information to the plot. These '
'files enable limitless personalisation of the contact map plot, as it '
diff --git a/layouts/help.py b/layouts/help.py
index c50c693..36735e8 100644
--- a/layouts/help.py
+++ b/layouts/help.py
@@ -24,7 +24,7 @@ def Body(cache):
components.TutorialTwoModal(),
components.TutorialThreeModal(),
components.TutorialFourModal(),
- #components.TutorialFiveModal(),
+ # components.TutorialFiveModal(),
components.CustomFormatDescriptionModal(),
dbc.Row([
dbc.Col([
@@ -168,6 +168,11 @@ def Body(cache):
dbc.Alert(['TIPS: ',
html.Ul([
html.Br(),
+ html.Li(['Remember that the ', html.I('L/N'),
+ ' selector will not affect any data being shown for PDB files. '
+ 'Similarly, data will also not be affected if the ',
+ html.I('Create heatmap'), ' switch is turned on.'
+ ]),
html.Li(['If you have just created a plot with the ',
html.I('Generate Plot'),
' button and you can see individual squared tiles in the diagonal '
@@ -262,19 +267,30 @@ def Body(cache):
'memory server used by ConPlot.']),
dbc.Col([
html.Plaintext('$ sudo apt update\n$ sudo apt install redis-server\n$ sudo '
- 'service redis start\n$ KEYDB_URL=redis://localhost:6379')
+ 'service redis start')
], style={'background-color': '#EAEAEA'}, align='center'),
- html.P('With the above commands you will have installed Redis and started the server. You '
- 'will also have created a environment variable called "KEYDB_URL" containing '
- 'the URL to connect to your redis server. ConPlot will need to read this '
- 'environment variable to access the redis database. After this, all you need to do '
- 'is clone ConPlot repository, install the requirements and start the Flask '
- 'development server on your machine. Please note that ConPlot requires at least '
- 'python 3.6 installed:'),
+ html.P('Once you have installed `redis`, you will need to start the service by running:'),
+ dbc.Col([
+ html.Plaintext('$ sudo service redis start')
+ ], style={'background-color': '#EAEAEA'}, align='center'),
+ html.P('Now you will need to clone the repository, install the requirements and '
+ 'setup environment variables. Please note that ConPlot requires at least '
+ 'python 3.6.'),
dbc.Col([
html.Plaintext('$ git clone https://github.com/rigdenlab/conplot\n'
'$ cd conplot\n$ python3.6 -m pip install -r requirements\n$ '
- 'python3.6 app.py')
+ 'echo "KEYDB_URL=0://localhost:6379" > .env\n$ echo "KEYDB_TIME'
+ 'OUT=3600" >> .env')
+ ], style={'background-color': '#EAEAEA'}, align='center'),
+ html.P('With the last two commands you will also have created an .env file with a '
+ 'variable named KEYDB_URL pointing to the redis server and a KEYDB_TIMEOUT '
+ 'variable with the session timeout value. This is the time at which a session '
+ 'expires after inactivity. By default in www.conplot.org this has a value of 3600 '
+ 'minutes, but if running locally you can set this time to any other value. '
+ 'The only thing left to do is to start the Flask development server on your '
+ 'machine:'),
+ dbc.Col([
+ html.Plaintext('$ python3.6 app.py')
], style={'background-color': '#EAEAEA'}, align='center'),
html.P(['Now you will be able to access the app on ',
html.A(html.U('http://127.0.0.1:8050/home'),
diff --git a/loaders/__init__.py b/loaders/__init__.py
index a6075a9..7d56d76 100644
--- a/loaders/__init__.py
+++ b/loaders/__init__.py
@@ -1,6 +1,5 @@
from enum import Enum
import base64
-from parsers import HydrophobicityStates
class DatasetReference(Enum):
@@ -8,6 +7,7 @@ class DatasetReference(Enum):
HYDROPHOBICITY = 'hydrophobicity'
CONTACT_MAP = 'contact'
CONTACT_DENSITY = 'density'
+ CONTACT_DIFF = 'diff'
MEMBRANE_TOPOLOGY = 'membranetopology'
SECONDARY_STRUCTURE = 'secondarystructure'
CONSERVATION = 'conservation'
@@ -130,6 +130,19 @@ def SequenceLoader(*args, **kwargs):
9: 'CONTACT_DENSITY_9',
10: 'CONTACT_DENSITY_10',
},
+ DatasetReference.CONTACT_DIFF.value:{
+ 0: 'CONTACT_DIFF_0',
+ 1: 'CONTACT_DIFF_1',
+ 2: 'CONTACT_DIFF_2',
+ 3: 'CONTACT_DIFF_3',
+ 4: 'CONTACT_DIFF_4',
+ 5: 'CONTACT_DIFF_5',
+ 6: 'CONTACT_DIFF_6',
+ 7: 'CONTACT_DIFF_7',
+ 8: 'CONTACT_DIFF_8',
+ 9: 'CONTACT_DIFF_9',
+ 10: 'CONTACT_DIFF_10',
+ },
DatasetReference.MSA.value: {
0: 'MSA_COVERAGE_0',
1: 'MSA_COVERAGE_1',
diff --git a/parsers/__init__.py b/parsers/__init__.py
index c16e1e0..75b09d0 100644
--- a/parsers/__init__.py
+++ b/parsers/__init__.py
@@ -1,4 +1,5 @@
from enum import Enum
+from operator import itemgetter
def ConsurfParser(*args, **kwargs):
@@ -257,6 +258,20 @@ class DensityStates(Enum):
CONTACT_DENSITY_10 = 10
+class DiffStates(Enum):
+ CONTACT_DIFF_0 = 0
+ CONTACT_DIFF_1 = 1
+ CONTACT_DIFF_2 = 2
+ CONTACT_DIFF_3 = 3
+ CONTACT_DIFF_4 = 4
+ CONTACT_DIFF_5 = 5
+ CONTACT_DIFF_6 = 6
+ CONTACT_DIFF_7 = 7
+ CONTACT_DIFF_8 = 8
+ CONTACT_DIFF_9 = 9
+ CONTACT_DIFF_10 = 10
+
+
class MsaStates(Enum):
MSA_COVERAGE_0 = 0
MSA_COVERAGE_1 = 1
@@ -279,4 +294,13 @@ class DatasetStates(Enum):
custom = CustomStates
hydrophobicity = HydrophobicityStates
density = DensityStates
+ diff = DiffStates
msa = MsaStates
+
+
+def get_unique_distances(elements):
+ key = itemgetter(0)
+ unique_contacts = list({key(el): el for el in elements}.values())
+ output = ['DISTO']
+ output += sorted([(*contact[0], *contact[1:]) for contact in unique_contacts], key=itemgetter(2), reverse=True)
+ return output
diff --git a/parsers/casprr2parser.py b/parsers/casprr2parser.py
index d600018..cbb9e2f 100644
--- a/parsers/casprr2parser.py
+++ b/parsers/casprr2parser.py
@@ -1,4 +1,4 @@
-from utils import get_unique_distances
+from parsers import get_unique_distances
from utils.exceptions import InvalidFormat
diff --git a/parsers/mappred.py b/parsers/mappred.py
index 8616471..e6e03ac 100644
--- a/parsers/mappred.py
+++ b/parsers/mappred.py
@@ -1,4 +1,4 @@
-from utils import get_unique_distances
+from parsers import get_unique_distances
from utils.exceptions import InvalidFormat
diff --git a/parsers/npzparser.py b/parsers/npzparser.py
index 6287828..375afe4 100644
--- a/parsers/npzparser.py
+++ b/parsers/npzparser.py
@@ -2,7 +2,7 @@
import base64
import numpy as np
from utils.exceptions import InvalidFormat
-from utils import get_unique_distances
+from parsers import get_unique_distances
def parse_array(array):
diff --git a/parsers/pdbparser.py b/parsers/pdbparser.py
index 9b82634..3f3602f 100644
--- a/parsers/pdbparser.py
+++ b/parsers/pdbparser.py
@@ -61,6 +61,6 @@ def PDBParser(input, input_format=None):
if not contacts:
raise InvalidFormat('Unable to parse contacts')
- output = sorted(contacts, key=itemgetter(2), reverse=True)
- output.append("PDB")
+ output = ["PDB"]
+ output += sorted(contacts, key=itemgetter(2), reverse=True)
return output
diff --git a/parsers/tests/test_casprr2parser.py b/parsers/tests/test_casprr2parser.py
index ed8e08f..d913415 100644
--- a/parsers/tests/test_casprr2parser.py
+++ b/parsers/tests/test_casprr2parser.py
@@ -35,7 +35,7 @@ def test_1(self):
output = CASPRR2Parser(dummy_prediction)
- self.assertEqual('DISTO', output.pop(-1))
+ self.assertEqual('DISTO', output.pop(0))
self.assertEqual(12, len(output))
self.assertListEqual(expected_res1, [contact[0] for contact in output])
self.assertListEqual(expected_res2, [contact[1] for contact in output])
diff --git a/parsers/tests/test_mappred.py b/parsers/tests/test_mappred.py
index f6fa43b..7672893 100644
--- a/parsers/tests/test_mappred.py
+++ b/parsers/tests/test_mappred.py
@@ -24,7 +24,7 @@ def test_1(self):
output = MappredParser(dummy_prediction)
- self.assertEqual('DISTO', output.pop(-1))
+ self.assertEqual('DISTO', output.pop(0))
self.assertEqual(4, len(output))
self.assertListEqual(expected_res1, [contact[0] for contact in output])
self.assertListEqual(expected_res2, [contact[1] for contact in output])
diff --git a/parsers/tests/test_pdbparser.py b/parsers/tests/test_pdbparser.py
index d48a300..782bc50 100644
--- a/parsers/tests/test_pdbparser.py
+++ b/parsers/tests/test_pdbparser.py
@@ -33,7 +33,7 @@ def test_1(self):
output = PDBParser(dummy_prediction)
self.assertEqual(7, len(output))
- self.assertEqual('PDB', output.pop(-1))
+ self.assertEqual('PDB', output.pop(0))
self.assertListEqual(expected_res1, [contact[0] for contact in output])
self.assertListEqual(expected_res2, [contact[1] for contact in output])
self.assertListEqual(expected_score, [contact[2] for contact in output])
diff --git a/requirements.txt b/requirements.txt
index a633f1b..5fffe4b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -21,4 +21,7 @@ keyring~=22.0.1
keyrings.cryptfile~=1.3.6
numpy~=1.19.4
fast-enum~=1.3.0
-scikit-learn~=0.24.1
\ No newline at end of file
+scikit-learn~=0.24.1
+numba~=0.53.1
+conkit~=0.12.0
+python-dotenv~=0.17.1
\ No newline at end of file
diff --git a/utils/__init__.py b/utils/__init__.py
index 8449257..77b2fd1 100644
--- a/utils/__init__.py
+++ b/utils/__init__.py
@@ -5,7 +5,7 @@
def conplot_version():
- return 'v0.4'
+ return 'v0.4.1'
def get_base_url():
@@ -106,6 +106,7 @@ class UrlIndex(Enum):
IUPRED_CITATION = 'https://doi.org/10.1093/nar/gky384'
CONSURF_WEB = 'https://consurf.tau.ac.il/'
CONSURF_CITATION = 'https://doi.org/10.1093/nar/gkw408'
+ HHBLITS_URL = 'https://toolkit.tuebingen.mpg.de/tools/hhblits'
GDPR_WEBSITE = 'https://gdpr-info.eu'
DOCKER_HUB = 'https://hub.docker.com/r/filosanrod/conplot'
CONPLOT_DOCKER = 'https://github.com/rigdenlab/conplot-docker'
@@ -134,6 +135,12 @@ def retrieve_sequence_fname(*args, **kwargs):
return retrieve_sequence_fname(*args, **kwargs)
+def contains_distances(*args, **kwargs):
+ from utils.cmap_utils import contains_distances
+
+ return contains_distances(*args, **kwargs)
+
+
def CacheKeys(*args, **kwargs):
from utils.cache_utils import CacheKeys
@@ -170,6 +177,24 @@ def get_active_sessions(*args, **kwargs):
return get_active_sessions(*args, **kwargs)
+def lookup_data(*args, **kwargs):
+ from utils.data_utils import lookup_data
+
+ return lookup_data(*args, **kwargs)
+
+
+def create_cmap_sets(*args, **kwargs):
+ from utils.cmap_utils import create_cmap_sets
+
+ return create_cmap_sets(*args, **kwargs)
+
+
+def slice_cmap(*args, **kwargs):
+ from utils.cmap_utils import slice_cmap
+
+ return slice_cmap(*args, **kwargs)
+
+
def load_figure_json(*args, **kwargs):
from utils.plot_utils import load_figure_json
@@ -255,12 +280,3 @@ def get_unique_contacts(elements):
output = [(*contact[0], contact[1]) for contact in unique]
output = sorted(output, key=itemgetter(2), reverse=True)
return output
-
-
-def get_unique_distances(elements):
- key = itemgetter(0)
- unique_contacts = list({key(el): el for el in elements}.values())
- output = [(*contact[0], *contact[1:]) for contact in unique_contacts]
- output = sorted(output, key=itemgetter(2), reverse=True)
- output.append('DISTO')
- return output
diff --git a/utils/cache_utils.py b/utils/cache_utils.py
index 58f1f02..19a213c 100644
--- a/utils/cache_utils.py
+++ b/utils/cache_utils.py
@@ -13,6 +13,7 @@ class CacheKeys(Enum):
DISPLAY_CONTROL_JSON = 'display_control_json'
CONTACT_MAP = loaders.DatasetReference.CONTACT_MAP.value
CONTACT_DENSITY = loaders.DatasetReference.CONTACT_DENSITY.value
+ CONTACT_DIFF = loaders.DatasetReference.CONTACT_DIFF.value
CUSTOM = loaders.DatasetReference.CUSTOM.value
SEQUENCE = loaders.DatasetReference.SEQUENCE.value
SEQUENCE_HYDROPHOBICITY = loaders.DatasetReference.HYDROPHOBICITY.value
@@ -20,29 +21,40 @@ class CacheKeys(Enum):
SECONDARY_STRUCTURE = loaders.DatasetReference.SECONDARY_STRUCTURE.value
CONSERVATION = loaders.DatasetReference.CONSERVATION.value
DISORDER = loaders.DatasetReference.DISORDER.value
- METADATA_TAG = 'CONPLOT-INTERNAL-USE-ONLY-METADATA-PROTECTED-TAG'
+ CMAP_DENSITY = '{}_CONPLOT-INTERNAL-USE-ONLY-METADATA-DENSITY-TAG_{}'
+ CMAP_DIFF = '{}_{}_CONPLOT-INTERNAL-USE-ONLY-METADATA-DIFF-TAG_{}'
+ PROTECETED_TAG = 'CONPLOT-INTERNAL-USE-ONLY-METADATA'
-def retrieve_density(session_id, density_cachekey, cache):
- density = cache.hget(session_id, density_cachekey)
+class MetadataTags(Enum):
+ DENSITY = ' - density'
+ HYDROPHOBICITY = ' - hydrophobicity'
+ DIFF = ' - diff'
+ SEPARATOR = '|'
+ HYPHEN = '---'
+ TAG = 'CONPLOT-INTERNAL-USE-ONLY-METADATA'
+
+
+def retrieve_data(session_id, cachekey, cache):
+ density = cache.hget(session_id, cachekey)
return decompress_data(density)
-def store_density(session_id, density_cachekey, density, cache):
- cache.hset(session_id, density_cachekey, compress_data(density))
- store_fname(cache, session_id, density_cachekey.decode(), CacheKeys.CONTACT_DENSITY.value)
+def store_data(session_id, cachekey, data, dataset, cache):
+ cache.hset(session_id, cachekey, compress_data(data))
+ store_fname(cache, session_id, cachekey.decode(), dataset)
-def remove_all_density(session_id, cache):
- density_list = cache.hget(session_id, CacheKeys.CONTACT_DENSITY.value)
- if not density_list:
+def remove_all(session_id, dataset, cache):
+ cachekey_list = cache.hget(session_id, dataset)
+ if not cachekey_list:
return
- density_list = decompress_data(density_list)
- for density in density_list:
- cache.hdel(session_id, density)
+ cachekey_list = decompress_data(cachekey_list)
+ for cachekey in cachekey_list:
+ cache.hdel(session_id, cachekey)
- cache.hdel(session_id, CacheKeys.CONTACT_DENSITY.value)
+ cache.hdel(session_id, dataset)
def remove_density(session_id, cache, fname):
@@ -51,7 +63,7 @@ def remove_density(session_id, cache, fname):
return
density_list = decompress_data(density_list)
- density_cachekey = '{}_{}'.format(fname, CacheKeys.METADATA_TAG.value)
+ density_cachekey = '{}_{}'.format(fname, CacheKeys.PROTECETED_TAG.value)
for density in density_list:
if density_cachekey in density:
cache.hdel(session_id, density)
@@ -59,8 +71,21 @@ def remove_density(session_id, cache, fname):
cache.hset(session_id, CacheKeys.CONTACT_DENSITY.value, compress_data(density_list))
+def remove_diff(session_id, cache, fname):
+ diff_list = cache.hget(session_id, CacheKeys.CONTACT_DIFF.value)
+ if not diff_list:
+ return
+ diff_list = decompress_data(diff_list)
+
+ for diff in diff_list:
+ if fname in diff:
+ cache.hdel(session_id, diff)
+ diff_list = [diff for diff in diff_list if fname not in diff]
+ cache.hset(session_id, CacheKeys.CONTACT_DIFF.value, compress_data(diff_list))
+
+
def is_valid_fname(fname):
- if CacheKeys.METADATA_TAG.value in fname or fname in [x.value for x in CacheKeys]:
+ if any([x for x in CacheKeys if x.value == fname]) or any([tag for tag in MetadataTags if tag.value in fname]):
return False
return True
@@ -148,7 +173,8 @@ def clear_cache(session_id, cache):
remove_datasets(session_id, cache)
remove_figure(session_id, cache)
remove_sequence(session_id, cache)
- remove_all_density(session_id, cache)
+ remove_all(session_id, CacheKeys.CONTACT_DENSITY.value, cache)
+ remove_all(session_id, CacheKeys.CONTACT_DIFF.value, cache)
def remove_datasets(session_id, cache):
@@ -184,3 +210,10 @@ def is_redis_available(cache):
def get_active_sessions(cache):
return cache.dbsize()
+
+
+def get_cachekey(session, fname, factor):
+ if 'PDB' == session[fname.encode()][0]:
+ return CacheKeys.CMAP_DENSITY.value.format(fname, fname).encode()
+ else:
+ return CacheKeys.CMAP_DENSITY.value.format(fname, factor).encode()
diff --git a/utils/cmap_utils.py b/utils/cmap_utils.py
index 91056f3..c14304c 100644
--- a/utils/cmap_utils.py
+++ b/utils/cmap_utils.py
@@ -18,11 +18,7 @@ def create_cmap_trace(x, y, symbol, marker_size, color, hovertext=None):
def create_cmap(cmap, idx, display_settings, verbose_labels=None):
- if cmap[-1] == 'PDB' or cmap[-1] == 'DISTO':
- del cmap[-1]
-
- if display_settings.factor != 0:
- cmap = cmap[:int(round(display_settings.seq_length / display_settings.factor, 0))]
+ cmap = slice_cmap(cmap, display_settings.seq_length, display_settings.factor)
if idx == 1:
idx_x = 0
@@ -39,10 +35,9 @@ def create_cmap(cmap, idx, display_settings, verbose_labels=None):
for contact in cmap:
res1_list.append(contact[idx_x])
res2_list.append(contact[idx_y])
- res_x_label = verbose_labels[contact[idx_x] - 1]
- res_y_label = verbose_labels[contact[idx_y] - 1]
- hover.append(HoverTemplates.CMAP_VERBOSE.format(contact[idx_x], contact[idx_y], contact[2], res_x_label,
- res_y_label))
+ xlabel = verbose_labels[contact[idx_x] - 1]
+ ylabel = verbose_labels[contact[idx_y] - 1]
+ hover.append(HoverTemplates.CMAP_VERBOSE.format(contact[idx_x], contact[idx_y], contact[2], xlabel, ylabel))
else:
for contact in cmap:
res1_list.append(contact[idx_x])
@@ -52,45 +47,39 @@ def create_cmap(cmap, idx, display_settings, verbose_labels=None):
return res1_list, res2_list, hover
-def superimpose_cmaps(reference_cmap, predicted_cmap, display_settings):
- if display_settings.factor != 0:
- predicted_cmap = predicted_cmap[:int(round(display_settings.seq_length / display_settings.factor, 0))]
- if reference_cmap[-1] == 'PDB':
- del reference_cmap[-1]
- reference_cmap = [contact for contact in reference_cmap if contact[2] > 0]
- elif reference_cmap[-1] == 'DISTO':
- del reference_cmap[-1]
- reference_cmap = reference_cmap[:int(round(display_settings.seq_length / display_settings.factor, 0))]
- else:
- reference_cmap = reference_cmap[:int(round(display_settings.seq_length / display_settings.factor, 0))]
- elif reference_cmap[-1] == 'PDB' or reference_cmap[-1] == 'DISTO':
- del reference_cmap[-1]
+def contains_distances(cmap):
+ if len(cmap[-1]) > 3:
+ return True
+ return False
+
+
+def slice_cmap(cmap, seq_length, factor):
+ if cmap[0] == 'PDB':
+ return [contact for contact in cmap[1:] if contact[2] > 0]
+ elif cmap[0] == 'DISTO':
+ cmap = cmap[1:]
- reference_contacts = [contact[:2] for contact in reference_cmap]
- predicted_contacts = [contact[:2] for contact in predicted_cmap]
+ if factor != 0:
+ cmap = cmap[:int(round(seq_length / factor, 0))]
- matched = []
- mismatched = []
- reference = []
+ return cmap
- for contact in reference_cmap:
- if contact[:2] in predicted_contacts:
- matched.append(contact)
- else:
- reference.append(contact)
- for contact in predicted_cmap:
- if contact[:2] not in reference_contacts:
- mismatched.append(contact)
+def create_cmap_sets(reference_cmap, predicted_cmap, display_settings):
+ reference_cmap = slice_cmap(reference_cmap, display_settings.seq_length, display_settings.factor)
+ predicted_cmap = slice_cmap(predicted_cmap, display_settings.seq_length, display_settings.factor)
+ predicted_set = {(x[0], x[1]): x[2] for x in predicted_cmap}
+ reference_set = {(x[0], x[1]): x[2] for x in reference_cmap}
- return reference, matched, mismatched
+ return reference_set, predicted_set
def create_superimposed_cmap(reference_cmap, predicted_cmap, display_settings, verbose_labels):
traces = []
- ref, match, mismatch = superimpose_cmaps(reference_cmap, predicted_cmap, display_settings)
- predicted_set = {(x[0], x[1]): x[2] for x in predicted_cmap}
- reference_set = {(x[0], x[1]): x[2] for x in reference_cmap}
+ reference_set, predicted_set = create_cmap_sets(reference_cmap, predicted_cmap, display_settings)
+ ref = reference_set.keys() - predicted_set.keys()
+ mismatch = predicted_set.keys() - reference_set.keys()
+ match = reference_set.keys() & predicted_set.keys()
x, y, hover = process_superimposed_cmap(ref, reference_set, predicted_set, verbose_labels)
traces.append(create_cmap_trace(x, y, 'circle', display_settings.contact_marker_size, 'grey', hover))
@@ -112,16 +101,8 @@ def process_superimposed_cmap(contacts, reference_set, predicted_set, verbose_la
if verbose_labels is not None:
for contact in contacts:
-
- if tuple(contact[:2]) in predicted_set.keys():
- pred_confidence = predicted_set[tuple(contact[:2])]
- else:
- pred_confidence = 0
- if tuple(contact[:2]) in reference_set.keys():
- ref_confidence = reference_set[tuple(contact[:2])]
- else:
- ref_confidence = 0
-
+ pred_confidence = predicted_set[contact] if contact in predicted_set.keys() else 0
+ ref_confidence = reference_set[contact] if contact in reference_set.keys() else 0
res1_list.append(contact[0])
res2_list.append(contact[1])
res_1_label = verbose_labels[contact[0] - 1]
@@ -132,15 +113,8 @@ def process_superimposed_cmap(contacts, reference_set, predicted_set, verbose_la
hover_2.append(HoverTemplates.CMAP_SUPERIMPOSE_VERBOSE.format(*label))
else:
for contact in contacts:
- if tuple(contact[:2]) in predicted_set.keys():
- pred_confidence = predicted_set[tuple(contact[:2])]
- else:
- pred_confidence = 0
- if tuple(contact[:2]) in reference_set.keys():
- ref_confidence = reference_set[tuple(contact[:2])]
- else:
- ref_confidence = 0
-
+ pred_confidence = predicted_set[contact] if contact in predicted_set.keys() else 0
+ ref_confidence = reference_set[contact] if contact in reference_set.keys() else 0
res1_list.append(contact[0])
res2_list.append(contact[1])
label = (contact[0], contact[1], ref_confidence, pred_confidence)
diff --git a/utils/color_palettes.py b/utils/color_palettes.py
index 899d7dd..b189738 100644
--- a/utils/color_palettes.py
+++ b/utils/color_palettes.py
@@ -177,6 +177,20 @@ class Density_GreyColorPalette(Enum):
CONTACT_DENSITY_10 = 'rgb(0,0,0,{})'
+class Diff_GreyColorPalette(Enum):
+ CONTACT_DIFF_0 = 'rgba(255,255,255,{})'
+ CONTACT_DIFF_1 = 'rgba(229,229,229,{})'
+ CONTACT_DIFF_2 = 'rgba(204,204,204,{})'
+ CONTACT_DIFF_3 = 'rgba(179,179,179,{})'
+ CONTACT_DIFF_4 = 'rgba(153,153,153,{})'
+ CONTACT_DIFF_5 = 'rgba(127,127,127,{})'
+ CONTACT_DIFF_6 = 'rgba(102,102,102,{})'
+ CONTACT_DIFF_7 = 'rgba(77,77,77,{})'
+ CONTACT_DIFF_8 = 'rgba(51,51,51,{})'
+ CONTACT_DIFF_9 = 'rgba(25,25,25,{})'
+ CONTACT_DIFF_10 = 'rgb(0,0,0,{})'
+
+
class Coverage_GreyColorPalette(Enum):
MSA_COVERAGE_0 = 'rgba(255,255,255,{})'
MSA_COVERAGE_1 = 'rgba(229,229,229,{})'
@@ -219,6 +233,20 @@ class Density_Viridis(Enum):
CONTACT_DENSITY_10 = sequential.Viridis[9]
+class Diff_Viridis(Enum):
+ CONTACT_DIFF_0 = sequential.Viridis[0]
+ CONTACT_DIFF_1 = sequential.Viridis[0]
+ CONTACT_DIFF_2 = sequential.Viridis[1]
+ CONTACT_DIFF_3 = sequential.Viridis[2]
+ CONTACT_DIFF_4 = sequential.Viridis[3]
+ CONTACT_DIFF_5 = sequential.Viridis[4]
+ CONTACT_DIFF_6 = sequential.Viridis[5]
+ CONTACT_DIFF_7 = sequential.Viridis[6]
+ CONTACT_DIFF_8 = sequential.Viridis[7]
+ CONTACT_DIFF_9 = sequential.Viridis[8]
+ CONTACT_DIFF_10 = sequential.Viridis[9]
+
+
class Coverage_Viridis(Enum):
MSA_COVERAGE_0 = sequential.Viridis[0]
MSA_COVERAGE_1 = sequential.Viridis[0]
@@ -261,6 +289,20 @@ class Density_BuRd(Enum):
CONTACT_DENSITY_10 = diverging.RdYlBu[1]
+class Diff_BuRd(Enum):
+ CONTACT_DIFF_0 = diverging.RdYlBu[10]
+ CONTACT_DIFF_1 = diverging.RdYlBu[10]
+ CONTACT_DIFF_2 = diverging.RdYlBu[9]
+ CONTACT_DIFF_3 = diverging.RdYlBu[8]
+ CONTACT_DIFF_4 = diverging.RdYlBu[7]
+ CONTACT_DIFF_5 = diverging.RdYlBu[6]
+ CONTACT_DIFF_6 = diverging.RdYlBu[5]
+ CONTACT_DIFF_7 = diverging.RdYlBu[4]
+ CONTACT_DIFF_8 = diverging.RdYlBu[3]
+ CONTACT_DIFF_9 = diverging.RdYlBu[2]
+ CONTACT_DIFF_10 = diverging.RdYlBu[1]
+
+
class Coverage_BuRd(Enum):
MSA_COVERAGE_0 = diverging.RdYlBu[10]
MSA_COVERAGE_1 = diverging.RdYlBu[10]
@@ -303,6 +345,20 @@ class Density_Inferno(Enum):
CONTACT_DENSITY_10 = sequential.Inferno[9]
+class Diff_Inferno(Enum):
+ CONTACT_DIFF_0 = sequential.Inferno[0]
+ CONTACT_DIFF_1 = sequential.Inferno[0]
+ CONTACT_DIFF_2 = sequential.Inferno[1]
+ CONTACT_DIFF_3 = sequential.Inferno[2]
+ CONTACT_DIFF_4 = sequential.Inferno[3]
+ CONTACT_DIFF_5 = sequential.Inferno[4]
+ CONTACT_DIFF_6 = sequential.Inferno[5]
+ CONTACT_DIFF_7 = sequential.Inferno[6]
+ CONTACT_DIFF_8 = sequential.Inferno[7]
+ CONTACT_DIFF_9 = sequential.Inferno[8]
+ CONTACT_DIFF_10 = sequential.Inferno[9]
+
+
class Coverage_Inferno(Enum):
MSA_COVERAGE_0 = sequential.Inferno[0]
MSA_COVERAGE_1 = sequential.Inferno[0]
@@ -331,6 +387,20 @@ class Heatmap_Hot(Enum):
BIN_10 = 'rgb(255.0, 255.0, 255.0)'
+class Diff_Hot(Enum):
+ CONTACT_DIFF_10 = 'rgb(10.607999999999999, 0.0, 0.0)'
+ CONTACT_DIFF_9 = 'rgb(76.23763084702213, 0.0, 0.0)'
+ CONTACT_DIFF_8 = 'rgb(144.4924469279252, 0.0, 0.0)'
+ CONTACT_DIFF_7 = 'rgb(210.12207777494734, 0.0, 0.0)'
+ CONTACT_DIFF_6 = 'rgb(255.0, 23.37520639028961, 0.0)'
+ CONTACT_DIFF_5 = 'rgb(255.0, 91.62509548421984, 0.0)'
+ CONTACT_DIFF_4 = 'rgb(255.0, 157.24998884376814, 0.0)'
+ CONTACT_DIFF_3 = 'rgb(255.0, 225.49987793769836, 0.0)'
+ CONTACT_DIFF_2 = 'rgb(255.0, 255.0, 54.18729918729921)'
+ CONTACT_DIFF_1 = 'rgb(255.0, 255.0, 156.56240156240156)'
+ CONTACT_DIFF_0 = 'rgb(255.0, 255.0, 255.0)'
+
+
class Density_Hot(Enum):
CONTACT_DENSITY_10 = 'rgb(10.607999999999999, 0.0, 0.0)'
CONTACT_DENSITY_9 = 'rgb(76.23763084702213, 0.0, 0.0)'
@@ -383,41 +453,65 @@ class MsaCoverage_ColorPalettes(Enum):
PALETTE_5 = Coverage_Hot
-class Hydrophobicity_BlueGreyColorPalette(Enum):
- HYDROPATHY_10 = 'rgba(66,138,245,{})'
- HYDROPATHY_9 = 'rgba(72,137,234,{})'
- HYDROPATHY_8 = 'rgba(79,136,222,{})'
- HYDROPATHY_7 = 'rgba(85,136,211,{})'
- HYDROPATHY_6 = 'rgba(92,135,199,{})'
- HYDROPATHY_5 = 'rgba(98,134,188,{})'
- HYDROPATHY_4 = 'rgba(104,133,176,{})'
- HYDROPATHY_3 = 'rgba(111,132,165,{})'
- HYDROPATHY_2 = 'rgba(117,132,153,{})'
- HYDROPATHY_1 = 'rgba(124,131,142,{})'
- HYDROPATHY_0 = 'rgba(130,130,130,{})'
-
-
-class Hydrophobicity_GreenGreyColorPalette(Enum):
- HYDROPATHY_10 = 'rgba(59,237,74,{})'
- HYDROPATHY_9 = 'rgba(66,226,80,{})'
- HYDROPATHY_8 = 'rgba(73,216,85,{})'
- HYDROPATHY_7 = 'rgba(80,205,91,{})'
- HYDROPATHY_6 = 'rgba(87,194,96,{})'
- HYDROPATHY_5 = 'rgba(95,184,102,{})'
- HYDROPATHY_4 = 'rgba(102,173,108,{})'
- HYDROPATHY_3 = 'rgba(109,162,113,{})'
- HYDROPATHY_2 = 'rgba(116,151,119,{})'
- HYDROPATHY_1 = 'rgba(123,141,124,{})'
- HYDROPATHY_0 = 'rgba(130,130,130,{})'
+class Diff_ColorPalettes(Enum):
+ PALETTE_1 = Diff_GreyColorPalette
+ PALETTE_2 = Diff_Viridis
+ PALETTE_3 = Diff_BuRd
+ PALETTE_4 = Diff_Inferno
+ PALETTE_5 = Diff_Hot
+
+
+class Hydrophobicity_BlueColorPalette(Enum):
+ HYDROPATHY_10 = sequential.ice[1].replace(')', ', {})').replace('rgb', 'rgba')
+ HYDROPATHY_9 = sequential.ice[1].replace(')', ', {})').replace('rgb', 'rgba')
+ HYDROPATHY_8 = sequential.ice[2].replace(')', ', {})').replace('rgb', 'rgba')
+ HYDROPATHY_7 = sequential.ice[3].replace(')', ', {})').replace('rgb', 'rgba')
+ HYDROPATHY_6 = sequential.ice[4].replace(')', ', {})').replace('rgb', 'rgba')
+ HYDROPATHY_5 = sequential.ice[5].replace(')', ', {})').replace('rgb', 'rgba')
+ HYDROPATHY_4 = sequential.ice[6].replace(')', ', {})').replace('rgb', 'rgba')
+ HYDROPATHY_3 = sequential.ice[7].replace(')', ', {})').replace('rgb', 'rgba')
+ HYDROPATHY_2 = sequential.ice[8].replace(')', ', {})').replace('rgb', 'rgba')
+ HYDROPATHY_1 = sequential.ice[9].replace(')', ', {})').replace('rgb', 'rgba')
+ HYDROPATHY_0 = sequential.ice[9].replace(')', ', {})').replace('rgb', 'rgba')
+
+
+class Hydrophobicity_GreenColorPalette(Enum):
+ HYDROPATHY_10 = 'rgba(8, 28, 21,{})'
+ HYDROPATHY_9 = 'rgba(8, 28, 21,{})'
+ HYDROPATHY_8 = 'rgba(27, 67, 50,{})'
+ HYDROPATHY_7 = 'rgba(45, 106, 79,{})'
+ HYDROPATHY_6 = 'rgba(64, 145, 108,{})'
+ HYDROPATHY_5 = 'rgba(82, 183, 136,{})'
+ HYDROPATHY_4 = 'rgba(116, 198, 157,{})'
+ HYDROPATHY_3 = 'rgba(149, 213, 178,{})'
+ HYDROPATHY_2 = 'rgba(183, 228, 199,{})'
+ HYDROPATHY_1 = 'rgba(216, 243, 220,{})'
+ HYDROPATHY_0 = 'rgba(216, 243, 220,{})'
+
+
+class Hydrophobicity_RedColorPalette(Enum):
+ HYDROPATHY_10 = sequential.amp[9].replace(')', ', {})').replace('rgb', 'rgba')
+ HYDROPATHY_9 = sequential.amp[9].replace(')', ', {})').replace('rgb', 'rgba')
+ HYDROPATHY_8 = sequential.amp[8].replace(')', ', {})').replace('rgb', 'rgba')
+ HYDROPATHY_7 = sequential.amp[7].replace(')', ', {})').replace('rgb', 'rgba')
+ HYDROPATHY_6 = sequential.amp[6].replace(')', ', {})').replace('rgb', 'rgba')
+ HYDROPATHY_5 = sequential.amp[5].replace(')', ', {})').replace('rgb', 'rgba')
+ HYDROPATHY_4 = sequential.amp[4].replace(')', ', {})').replace('rgb', 'rgba')
+ HYDROPATHY_3 = sequential.amp[3].replace(')', ', {})').replace('rgb', 'rgba')
+ HYDROPATHY_2 = sequential.amp[2].replace(')', ', {})').replace('rgb', 'rgba')
+ HYDROPATHY_1 = sequential.amp[1].replace(')', ', {})').replace('rgb', 'rgba')
+ HYDROPATHY_0 = sequential.amp[1].replace(')', ', {})').replace('rgb', 'rgba')
class HydrophobicityColorPalettes(Enum):
- PALETTE_1 = Hydrophobicity_BlueGreyColorPalette
- PALETTE_2 = Hydrophobicity_GreenGreyColorPalette
+ PALETTE_1 = Hydrophobicity_BlueColorPalette
+ PALETTE_2 = Hydrophobicity_RedColorPalette
+ PALETTE_3 = Hydrophobicity_GreenColorPalette
class DatasetColorPalettes(Enum):
density = Density_ColorPalettes
+ diff = Diff_ColorPalettes
custom = Custom_ColorPalettes
heatmap = Heatmap_ColorPalettes
hydrophobicity = HydrophobicityColorPalettes
@@ -430,6 +524,7 @@ class DatasetColorPalettes(Enum):
class PaletteDefaultLayout(Enum):
CONTACT_DENSITY = DatasetReference.CONTACT_DENSITY.value.encode()
+ CONTACT_DIFF = DatasetReference.CONTACT_DIFF.value.encode()
CUSTOM = DatasetReference.CUSTOM.value.encode()
HEATMAP = b'heatmap'
HYDROPHOBICITY = DatasetReference.HYDROPHOBICITY.value.encode()
diff --git a/utils/data_utils.py b/utils/data_utils.py
index 3bea872..afe50ec 100644
--- a/utils/data_utils.py
+++ b/utils/data_utils.py
@@ -12,8 +12,8 @@ def check_sequence_mismatch(session_id, cache, seq_length):
cmap_fnames = decompress_data(cache.hget(session_id, cache_utils.CacheKeys.CONTACT_MAP.value))
for cmap_fname in cmap_fnames:
cmap_data = decompress_data(cache.hget(session_id, cmap_fname))
- if cmap_data[-1] == 'PDB' or cmap_data[-1] == 'DISTO':
- cmap_data.pop()
+ if cmap_data[0] == 'PDB' or cmap_data[0] == 'DISTO':
+ cmap_data = cmap_data[1:]
cmap_max_register = max((max(cmap_data, key=itemgetter(0))[0], max(cmap_data, key=itemgetter(1))[0]))
if cmap_max_register > seq_length:
mismatched.append(cmap_fname)
@@ -42,8 +42,8 @@ def check_dataset_mismatch(session_id, cache, data, dataset):
return seq_fname
else:
return False
- elif data[-1] == 'PDB' or data[-1] == 'DISTO':
- max_register = max((max(data[:-1], key=itemgetter(0))[0], max(data[:-1], key=itemgetter(1))[0]))
+ elif data[0] == 'PDB' or data[0] == 'DISTO':
+ max_register = max((max(data[1:], key=itemgetter(0))[0], max(data[1:], key=itemgetter(1))[0]))
else:
max_register = max((max(data, key=itemgetter(0))[0], max(data, key=itemgetter(1))[0]))
@@ -127,6 +127,18 @@ def remove_dataset(trigger, cache, session_id, logger):
cache_utils.remove_fname(cache, session_id, fname, dataset)
if dataset == loaders.DatasetReference.SEQUENCE.value:
- cache_utils.remove_all_density(session_id, cache)
+ cache_utils.remove_all(session_id, cache_utils.CacheKeys.CONTACT_DENSITY.value, cache)
+ cache_utils.remove_all(session_id, cache_utils.CacheKeys.CONTACT_DIFF.value, cache)
elif dataset == loaders.DatasetReference.CONTACT_MAP.value:
cache_utils.remove_density(session_id, cache, fname)
+ cache_utils.remove_diff(session_id, cache, fname)
+
+
+def lookup_data(session, session_id, cachekey, cache):
+ if cachekey in session.keys():
+ data = session[cachekey]
+ elif cache.hexists(session_id, cachekey):
+ data = cache_utils.retrieve_data(session_id, cachekey, cache)
+ else:
+ return None
+ return data
diff --git a/utils/heatmap_utils.py b/utils/heatmap_utils.py
index df13597..53d5325 100644
--- a/utils/heatmap_utils.py
+++ b/utils/heatmap_utils.py
@@ -1,39 +1,47 @@
import plotly.graph_objects as go
+import numpy as np
from utils import color_palettes, DistanceLabels, HoverTemplates
+DISTANCE_BINS = {0: 0, 1: 5, 2: 7, 3: 9, 4: 11, 5: 13, 6: 15, 7: 17, 8: 19, 9: 20}
+
def init_heatmap(seq_length):
- heat = [[0 for x in range(seq_length + 1)] for y in range(seq_length + 1)]
- hover = [[None for x in range(seq_length + 1)] for y in range(seq_length + 1)]
+ shape = (seq_length + 1, seq_length + 1)
+ heat = np.zeros(shape)
+ hover = np.full(shape, None)
return heat, hover
+def get_array(cmap, seq_length):
+ array = np.full((seq_length + 1, seq_length + 1), 20)
+ for contact in cmap:
+ array[contact[0], contact[1]] = DISTANCE_BINS[contact[3]]
+ array[contact[1], contact[0]] = DISTANCE_BINS[contact[3]]
+ return array
+
+
def create_heatmap(session, display_settings, verbose_labels):
heat, hover = init_heatmap(display_settings.seq_length)
for idx, fname in enumerate(display_settings.cmap_selection):
- if fname == '---':
+ if fname == '--- Empty ---':
continue
heat, hover = populate_heatmap(session[fname.encode()], idx, heat, hover, verbose_labels)
palette_idx = [x.value for x in color_palettes.PaletteDefaultLayout].index(b'heatmap')
colorscale = color_palettes.get_heatmap_colorscale(display_settings.selected_palettes[palette_idx])
- return heat, hover, colorscale
+ return heat.tolist(), hover.tolist(), colorscale
def superimpose_heatmaps(session, display_settings, verbose_labels):
- heat, hover = init_heatmap(display_settings.seq_length)
- for idx, fname in enumerate(display_settings.cmap_selection):
- if fname == '---':
- continue
- heat, hover = populate_superimposed_heatmap(session[display_settings.cmap_selection[0].encode()],
- session[display_settings.cmap_selection[1].encode()],
- heat, hover, verbose_labels)
+ heat, hover = create_superimposed_heatmap(session[display_settings.cmap_selection[0].encode()][1:],
+ session[display_settings.cmap_selection[1].encode()][1:],
+ display_settings.seq_length, verbose_labels)
palette_idx = [x.value for x in color_palettes.PaletteDefaultLayout].index(b'heatmap')
colorscale = color_palettes.get_heatmap_colorscale(display_settings.selected_palettes[palette_idx])
- return heat, hover, colorscale
+ return heat.tolist(), hover.tolist(), colorscale
-def populate_heatmap(cmap, idx, distances, hover, verbose_labels=None):
+def populate_heatmap(cmap, idx, heat, hover, verbose_labels=None):
if idx == 1:
idx_x = 1
idx_y = 0
@@ -41,92 +49,102 @@ def populate_heatmap(cmap, idx, distances, hover, verbose_labels=None):
idx_x = 0
idx_y = 1
- if cmap[-1] == 'DISTO' or cmap[-1] == 'PDB':
- cmap = cmap[:-1]
+ hover_labels = []
+
+ if cmap[0] == 'DISTO' or cmap[0] == 'PDB':
+ cmap = cmap[1:]
+ cmap_array = np.array(cmap)
+ res_1 = cmap_array[:, idx_x]
+ res_1 = res_1.astype(int)
+ res_2 = cmap_array[:, idx_y]
+ res_2 = res_2.astype(int)
+ distances = cmap_array[:, 3]
+ scores = cmap_array[:, 4]
+ heat[res_1.astype(int), res_2.astype(int)] = 9 - distances
if verbose_labels is not None:
- for contact in cmap:
- distances[contact[idx_x]][contact[idx_y]] = 9 - contact[3]
- label = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(contact[3]))
- hover_label = HoverTemplates.DISTOGRAM_VERBOSE.format(contact[idx_y], contact[idx_x], label, contact[4],
- verbose_labels[contact[idx_y] - 1],
- verbose_labels[contact[idx_x] - 1])
- hover[contact[idx_x]][contact[idx_y]] = hover_label
+ for x, y, distance, score in zip(res_1, res_2, distances.astype(int), scores):
+ label = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(distance))
+ hover_label = HoverTemplates.DISTOGRAM_VERBOSE.format(y, x, label, score, verbose_labels[y - 1],
+ verbose_labels[x - 1])
+ hover_labels.append(hover_label)
+
else:
- for contact in cmap:
- distances[contact[idx_x]][contact[idx_y]] = 9 - contact[3]
- label = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(contact[3]))
- hover_label = HoverTemplates.DISTOGRAM.format(contact[idx_y], contact[idx_x], label, contact[4])
- hover[contact[idx_x]][contact[idx_y]] = hover_label
+ for x, y, distance, score in zip(res_1, res_2, distances.astype(int), scores):
+ label = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(distance))
+ hover_label = HoverTemplates.DISTOGRAM.format(y, x, label, score)
+ hover_labels.append(hover_label)
+
+ hover[res_1.astype(int), res_2.astype(int)] = hover_labels
- return distances, hover
+ return heat, hover
+
+ cmap_array = np.array(cmap)
+ res_1 = cmap_array[:, idx_x]
+ res_1 = res_1.astype(int)
+ res_2 = cmap_array[:, idx_y]
+ res_2 = res_2.astype(int)
+ scores = cmap_array[:, 2]
+ heat[res_1, res_2] = scores
if verbose_labels is None:
- for contact in cmap:
- distances[contact[idx_x]][contact[idx_y]] = contact[2]
- hover_label = HoverTemplates.CMAP.format(contact[idx_y], contact[idx_x], contact[2])
- hover[contact[idx_x]][contact[idx_y]] = hover_label
+ for x, y, score in zip(res_1, res_2, scores):
+ hover_labels.append(HoverTemplates.CMAP.format(y, x, score))
else:
- for contact in cmap:
- distances[contact[idx_x]][contact[idx_y]] = contact[2]
- hover_label = HoverTemplates.CMAP_VERBOSE.format(contact[idx_y], contact[idx_x], contact[2],
- verbose_labels[contact[idx_y] - 1],
- verbose_labels[contact[idx_x] - 1])
- hover[contact[idx_x]][contact[idx_y]] = hover_label
+ for x, y, score in zip(res_1, res_2, scores):
+ hover_label = HoverTemplates.CMAP_VERBOSE.format(y, x, score, verbose_labels[y - 1], verbose_labels[x - 1])
+ hover_labels.append(hover_label)
- return distances, hover
+ hover[res_1.astype(int), res_2.astype(int)] = hover_labels
+ return heat, hover
-def populate_superimposed_heatmap(reference_cmap, secondary_cmap, heat, hover, verbose_labels=None):
- idx_x = 1
- idx_y = 0
- reference_ftype = reference_cmap.pop(-1)
- secondary_ftype = secondary_cmap.pop(-1)
- predicted_set = {(x[0], x[1]): x[3] for x in secondary_cmap}
+
+def create_superimposed_heatmap(reference_cmap, predicted_cmap, seq_length, verbose_labels=None):
+ hover = np.full((seq_length + 1, seq_length + 1), None)
+ reference_array = get_array(reference_cmap, seq_length)
+ predicted_array = get_array(predicted_cmap, seq_length)
+ difference_heatmap = np.abs(reference_array - predicted_array)
+ predicted_set = {(x[0], x[1]): x[3] for x in predicted_cmap}
+ reference_set = {(x[0], x[1]): x[3] for x in reference_cmap}
if verbose_labels is not None:
- for reference_distance in reference_cmap:
- if tuple(reference_distance[:2]) in predicted_set.keys():
- predicted_distance = predicted_set[tuple(reference_distance[:2])]
- else:
- predicted_distance = 9
- error = abs((9 - reference_distance[3]) - (9 - predicted_distance))
- heat[reference_distance[idx_x]][reference_distance[idx_y]] = error
- heat[reference_distance[idx_y]][reference_distance[idx_x]] = error
- map_a_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(reference_distance[3]))
- map_b_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(predicted_distance))
- hover_label_a = HoverTemplates.DISTOGRAM_SUPERIMPOSE_VERBOSE. \
- format(reference_distance[idx_y], reference_distance[idx_x], map_a_distance, map_b_distance,
- error, verbose_labels[reference_distance[idx_y] - 1],
- verbose_labels[reference_distance[idx_x] - 1])
- hover_label_b = HoverTemplates.DISTOGRAM_SUPERIMPOSE_VERBOSE. \
- format(reference_distance[idx_x], reference_distance[idx_y], map_a_distance, map_b_distance,
- error, verbose_labels[reference_distance[idx_x] - 1],
- verbose_labels[reference_distance[idx_y] - 1])
- hover[reference_distance[idx_x]][reference_distance[idx_y]] = hover_label_a
- hover[reference_distance[idx_y]][reference_distance[idx_x]] = hover_label_b
+ for x in range(1, seq_length + 1):
+ for y in range(x + 5, seq_length + 1):
+ residues = (y, x)
+ predicted_bin = predicted_set[residues] if residues in predicted_set.keys() else 9
+ reference_bin = reference_set[residues] if residues in reference_set.keys() else 9
+ error = '{} Å'.format(difference_heatmap[x, y])
+ map_a_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(reference_bin))
+ map_b_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(predicted_bin))
+ hover_label_a = HoverTemplates.DISTOGRAM_SUPERIMPOSE_VERBOSE.format(y, x, map_a_distance,
+ map_b_distance, error,
+ verbose_labels[y - 1],
+ verbose_labels[x - 1])
+ hover_label_b = HoverTemplates.DISTOGRAM_SUPERIMPOSE_VERBOSE.format(x, y, map_a_distance,
+ map_b_distance, error,
+ verbose_labels[x - 1],
+ verbose_labels[y - 1])
+ hover[x, y] = hover_label_a
+ hover[y, x] = hover_label_b
else:
- for reference_distance in reference_cmap:
- if tuple(reference_distance[:2]) in predicted_set.keys():
- predicted_distance = predicted_set[tuple(reference_distance[:2])]
- else:
- predicted_distance = 9
- error = abs((9 - reference_distance[3]) - (9 - predicted_distance))
- heat[reference_distance[idx_x]][reference_distance[idx_y]] = error
- heat[reference_distance[idx_y]][reference_distance[idx_x]] = error
- map_a_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(reference_distance[3]))
- map_b_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(predicted_distance))
- hover_label_a = HoverTemplates.DISTOGRAM_SUPERIMPOSE.format(reference_distance[idx_y],
- reference_distance[idx_x], map_a_distance,
- map_b_distance, error)
- hover_label_b = HoverTemplates.DISTOGRAM_SUPERIMPOSE.format(reference_distance[idx_x],
- reference_distance[idx_y], map_a_distance,
- map_b_distance, error)
- hover[reference_distance[idx_x]][reference_distance[idx_y]] = hover_label_a
- hover[reference_distance[idx_y]][reference_distance[idx_x]] = hover_label_b
-
- return heat, hover
+ for x in range(1, seq_length + 1):
+ for y in range(x + 5, seq_length + 1):
+ residues = (y, x)
+ predicted_bin = predicted_set[residues] if residues in predicted_set.keys() else 9
+ reference_bin = reference_set[residues] if residues in reference_set.keys() else 9
+ error = '{} Å'.format(difference_heatmap[x, y])
+ map_a_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(reference_bin))
+ map_b_distance = DistanceLabels.__getitem__(DistanceLabels, 'BIN_{}'.format(predicted_bin))
+ hover_label_a = HoverTemplates.DISTOGRAM_SUPERIMPOSE.format(y, x, map_a_distance,
+ map_b_distance, error)
+ hover_label_b = HoverTemplates.DISTOGRAM_SUPERIMPOSE.format(x, y, map_a_distance,
+ map_b_distance, error)
+ hover[x, y] = hover_label_a
+ hover[y, x] = hover_label_b
+
+ return difference_heatmap, hover
def create_heatmap_trace(distances, colorscale, hovertext=None):
diff --git a/utils/math_utils.py b/utils/math_utils.py
new file mode 100644
index 0000000..973bc9e
--- /dev/null
+++ b/utils/math_utils.py
@@ -0,0 +1,65 @@
+from conkit.misc.bandwidth import bandwidth_factory
+import math
+from numba import njit, vectorize
+import numpy as np
+from sklearn.neighbors import KernelDensity
+
+
+@njit()
+def calculate_mcc(tp, fp, tn, fn):
+ denominator = (tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)
+ denominator = math.sqrt(denominator)
+ if denominator == 0:
+ return 1
+ numerator = (tp * tn - fp * fn) * 10
+ if numerator < 0:
+ return 10
+ mcc = 10 - (numerator / denominator)
+ return mcc
+
+
+@vectorize('float64(int64, int64)')
+def get_difference(expected, observed):
+ difference = expected - observed
+ difference_squared = difference ** 2
+ return difference_squared
+
+
+@vectorize('float64(int64, float64)')
+def populate_rmsd(seq_length, sum_squared_differences):
+ rmsd = math.sqrt(sum_squared_differences / seq_length)
+ if rmsd > 5:
+ return 5
+ return rmsd
+
+
+@njit()
+def calculate_rmsd(expected_array, observed_array, seq_length):
+ squared_differences = get_difference(expected_array, observed_array)
+ seq_length_array = np.full(seq_length, seq_length)
+ sum_squared_differences = np.sum(squared_differences, axis=0)
+ rmsd = populate_rmsd(seq_length_array, sum_squared_differences)
+ return rmsd
+
+
+def convolution_smooth_values(x, window=5):
+ box = np.ones(window) / window
+ x_smooth = np.convolve(x, box, mode='same')
+ return np.round(x_smooth, 0)
+
+
+def cumsum_smooth(x, window=5):
+ cumsum_vec = np.cumsum(np.insert(x, 0, 0))
+ return (cumsum_vec[window:] - cumsum_vec[:-window]) / window
+
+
+def get_contact_density(contact_list, seq_length):
+ """Credits to Felix Simkovic; code taken from GitHub rigdenlab/conkit"""
+ x = np.array([i for c in contact_list for i in np.arange(c[1], c[0] + 1)], dtype=np.int64)[:, np.newaxis]
+ bw = bandwidth_factory('amise')(x).bw
+ kde = KernelDensity(bandwidth=bw).fit(x)
+ x_fit = np.arange(1, seq_length + 1)[:, np.newaxis]
+ density = np.exp(kde.score_samples(x_fit)).tolist()
+ density_max = max(density)
+ density = [int(round(float(i) / density_max, 1) * 10) for i in density]
+ return density
diff --git a/utils/plot_utils.py b/utils/plot_utils.py
index dddc2b6..7ceadfa 100644
--- a/utils/plot_utils.py
+++ b/utils/plot_utils.py
@@ -3,6 +3,7 @@
from dash.dash import no_update
import dash_core_components as dcc
from enum import Enum
+import itertools
import json
from loaders import DatasetReference, AdditionalDatasetReference, STATES
from layouts import ContextReference
@@ -29,11 +30,10 @@ def create_ConPlot(session_id, cache, trigger, selected_tracks, cmap_selection,
contact_marker_size=5, track_marker_size=5, track_separation=2, transparent=True, superimpose=False,
heatmap=False, verbose_labels=False):
session = cache.hgetall(session_id)
- session, display_settings, verbose_labels, error = process_args(session_id, session, trigger, selected_tracks,
- cmap_selection, factor, contact_marker_size,
- track_separation, transparent, selected_palettes,
- superimpose, track_marker_size, heatmap,
- verbose_labels, cache)
+ session, display_settings, error = process_args(session_id, session, trigger, selected_tracks, cmap_selection,
+ factor, contact_marker_size, track_separation, transparent,
+ selected_palettes, superimpose, track_marker_size, heatmap,
+ verbose_labels, cache)
if error is not None:
return error
@@ -41,8 +41,11 @@ def create_ConPlot(session_id, cache, trigger, selected_tracks, cmap_selection,
display_card = get_display_control_card(display_settings)
figure = create_figure(display_settings.axis_range)
- add_contact_trace(session, display_settings, figure, verbose_labels)
- add_additional_tracks(session_id, session, display_settings, figure, cache)
+ verbose_labels, additional_traces = add_additional_tracks(session_id, session, display_settings, figure, cache)
+ contact_traces = add_contact_trace(session, display_settings, figure, verbose_labels)
+
+ figure.add_traces(contact_traces)
+ figure.add_traces(additional_traces)
figure.update_xaxes(spikemode="across", showspikes=False)
figure.update_yaxes(spikemode="across", showspikes=False)
@@ -57,53 +60,66 @@ def create_ConPlot(session_id, cache, trigger, selected_tracks, cmap_selection,
def add_additional_tracks(session_id, session, display_settings, figure, cache):
+ prediction_labels = {}
+ traces = []
for idx, fname in enumerate(display_settings.selected_tracks):
- if fname == '---':
+ if fname == '--- Empty ---':
continue
- dataset, prediction = tracks_utils.retrieve_dataset_prediction(session_id, session, fname, display_settings,
- cache)
+ dataset, prediction = tracks_utils.get_dataset_prediction(session_id, session, fname, display_settings, cache)
+ if display_settings.verbose_labels and fname not in prediction_labels:
+ prediction_labels[fname] = [STATES[dataset][x] for x in prediction]
palette_idx = [x.name for x in color_palettes.DatasetColorPalettes].index(dataset)
palette = display_settings.selected_palettes[palette_idx]
if idx == 4:
- traces = tracks_utils.get_diagonal_trace(prediction, dataset, display_settings.track_marker_size,
- session[display_settings.seq_fname.encode()],
- display_settings.alpha, palette)
+ traces += tracks_utils.get_diagonal_trace(prediction, dataset, display_settings.track_marker_size,
+ session[display_settings.seq_fname.encode()],
+ display_settings.alpha, palette)
else:
- traces = tracks_utils.get_traces(prediction, dataset, idx, display_settings.track_separation,
- display_settings.track_marker_size, display_settings.alpha, palette)
+ traces += tracks_utils.get_traces(prediction, dataset, idx, display_settings.track_separation,
+ display_settings.track_marker_size, display_settings.alpha, palette)
+
+ if display_settings.verbose_labels:
+ verbose_labels = []
+ sequence = session[display_settings.seq_fname.encode()]
+ all_predictions = list(prediction_labels.values())
+ label_template = '------
Residue {} ({})' + '
{}' * len(all_predictions)
+ for idx, residue_info in enumerate(zip(sequence, *all_predictions), 1):
+ verbose_labels.append(label_template.format(idx, *residue_info))
+
+ return verbose_labels, traces
- for trace in traces:
- figure.add_trace(trace)
+ return None, traces
def add_contact_trace(session, display_settings, figure, verbose_labels):
if display_settings.superimpose and display_settings.heatmap:
heat, hover, colorscale = heatmap_utils.superimpose_heatmaps(session, display_settings, verbose_labels)
- figure.add_trace(heatmap_utils.create_heatmap_trace(hovertext=hover, distances=heat, colorscale=colorscale))
+ return heatmap_utils.create_heatmap_trace(hovertext=hover, distances=heat, colorscale=colorscale)
elif display_settings.heatmap:
heat, hover, colorscale = heatmap_utils.create_heatmap(session, display_settings, verbose_labels)
- figure.add_trace(heatmap_utils.create_heatmap_trace(hovertext=hover, distances=heat, colorscale=colorscale))
+ return heatmap_utils.create_heatmap_trace(hovertext=hover, distances=heat, colorscale=colorscale)
elif display_settings.superimpose:
reference_cmap = session[display_settings.cmap_selection[0].encode()]
predicted_cmap = session[display_settings.cmap_selection[1].encode()]
- traces = cmap_utils.create_superimposed_cmap(reference_cmap, predicted_cmap, display_settings, verbose_labels)
- for trace in traces:
- figure.add_trace(trace)
+ return cmap_utils.create_superimposed_cmap(reference_cmap, predicted_cmap, display_settings, verbose_labels)
else:
+ traces = []
for idx, fname in enumerate(display_settings.cmap_selection):
- if fname == '---':
+ if fname == '--- Empty ---':
continue
cmap = session[fname.encode()]
size = display_settings.contact_marker_size
x, y, hover = cmap_utils.create_cmap(cmap, idx, display_settings, verbose_labels)
- figure.add_trace(cmap_utils.create_cmap_trace(x, y, 'circle', size, 'black', hover))
+ traces.append(cmap_utils.create_cmap_trace(x, y, 'circle', size, 'black', hover))
+
+ return traces
def get_display_control_card(display_settings):
@@ -152,14 +168,14 @@ def lookup_input_errors(session_id, session, cmap_selection, superimpose, heatma
error = components.PlotPlaceHolder(), \
components.MissingInputModal(*[missing.name for missing in missing_data]), \
components.DisplayControlCard(), True
- return None, None, None, error
+ return None, None, error
if superimpose and heatmap:
reference_cmap = session[cmap_selection[0].encode()]
predicted_cmap = session[cmap_selection[1].encode()]
- error = no_update, components.InvalidSuperposeHeatmapModal(), no_update, no_update
- if not isinstance(reference_cmap[-1], str) or not isinstance(predicted_cmap[-1], str):
- return None, None, None, error
+ if not isinstance(reference_cmap[0], str) or not isinstance(predicted_cmap[0], str):
+ error = no_update, components.InvalidSuperposeHeatmapModal(), no_update, no_update
+ return None, None, error
return None
@@ -209,41 +225,93 @@ def process_args(session_id, session, trigger, selected_tracks, cmap_selection,
cmap_selection=cmap_selection, available_cmaps=available_cmaps,
heatmap=heatmap, verbose_labels=verbose_labels)
- if verbose_labels:
- fnames = [fname for fname in selected_tracks if fname != '---']
- verbose_labels = get_verbose_labels(session_id, session, fnames, display_settings, cache)
- else:
- verbose_labels = None
+ return session, display_settings, None
+
- return session, display_settings, verbose_labels, None
+def separate_pdb_cmaps(session, cmap_fname_list):
+ non_pdb_fnames = []
+ pdb_fnames = []
+
+ for fname in cmap_fname_list:
+ cmap = session[fname.encode()]
+ if cmap[0] == 'PDB':
+ pdb_fnames.append(fname)
+ else:
+ non_pdb_fnames.append(fname)
+
+ return pdb_fnames, non_pdb_fnames
def get_available_data(session):
- available_tracks = []
- for dataset in AdditionalDatasetReference:
- if dataset.value.encode() in session.keys() and session[dataset.value.encode()]:
- available_tracks += session[dataset.value.encode()]
+ available_tracks = [{'label': '--- Empty ---', 'value': 'Empty_1'},
+ {'label': '--- Seq. Hydrophobicity ---', 'value': 'Hydrophobicity_Header', 'disabled': True},
+ {'label': session[DatasetReference.SEQUENCE.value.encode()],
+ 'value': session[DatasetReference.SEQUENCE.value.encode()]},
+ {'label': '--- Contact Density ---', 'value': 'Density_Header', 'disabled': True}]
+
+ available_cmaps, cmap_fname_list, cmap_density = get_cmap_density_tracks(session)
+
+ if not cmap_fname_list:
+ available_tracks.append({'label': '--- Empty ---', 'value': 'Empty_2'})
+ available_tracks.append({'label': '--- Contact Diff ---', 'value': 'Diff_Header', 'disabled': True})
+ available_tracks.append({'label': '--- Empty ---', 'value': 'Empty_3'})
+ else:
+ available_tracks += sorted(cmap_density, key=lambda k: k['label'])
+ available_tracks.append({'label': '--- Contact Diff ---', 'value': 'Diff_Header', 'disabled': True})
+ cmap_diff = get_cmap_diff_tracks(cmap_fname_list)
+ if not cmap_diff:
+ available_tracks.append({'label': '--- Empty ---', 'value': 'Empty_3'})
+ else:
+ available_tracks += sorted(cmap_diff, key=lambda k: k['label'])
+
+ available_tracks.append({'label': '--- Other Tracks ---', 'value': 'AdditionalTracks_Header', 'disabled': True})
+ other_tracks = get_other_tracks(session)
+ if not other_tracks:
+ available_tracks.append({'label': '--- Empty ---', 'value': 'Empty_4'})
+ else:
+ available_tracks += sorted(other_tracks, key=lambda k: k['label'])
+ return available_tracks, sorted(available_cmaps)
+
+
+def get_cmap_density_tracks(session):
+ cmap_density = []
available_cmaps = []
- for cmap_fname in session[DatasetReference.CONTACT_MAP.value.encode()]:
+ cmap_fname_list = session[DatasetReference.CONTACT_MAP.value.encode()]
+ for cmap_fname in cmap_fname_list:
available_cmaps.append(cmap_fname)
- available_tracks.append(cmap_fname)
+ cmap_density.append({'label': cmap_fname, 'value': cmap_fname})
+ return available_cmaps, cmap_fname_list, cmap_density
- available_tracks.append(session[DatasetReference.SEQUENCE.value.encode()])
- return available_tracks, available_cmaps
+def get_cmap_diff_tracks(cmap_fname_list):
+ cmap_diff = []
+ for combination in itertools.combinations(cmap_fname_list, 2):
+ label = '{} | {}'.format(*combination)
+ cmap_diff.append({'label': label, 'value': label})
+ return cmap_diff
+
+
+def get_other_tracks(session):
+ other_tracks = []
+ for dataset in AdditionalDatasetReference:
+ if dataset.value.encode() in session.keys() and session[dataset.value.encode()]:
+ for fname in session[dataset.value.encode()]:
+ other_tracks.append({'label': fname, 'value': fname})
+ return other_tracks
def get_user_selection(cmap_selection, available_cmaps, track_selection, available_tracks):
if len(cmap_selection) == 0:
- cmap_selection = ['---'] * 2
+ cmap_selection = ['--- Empty ---'] * 2
else:
- cmap_selection = [fname if fname in available_cmaps else '---' for fname in cmap_selection]
+ cmap_selection = [fname if fname in available_cmaps else '--- Empty ---' for fname in cmap_selection]
if len(track_selection) == 0:
- track_selection = ['---'] * 9
+ track_selection = ['--- Empty ---'] * 9
else:
- track_selection = [track if track in available_tracks else '---' for track in track_selection]
+ available_track_labels = [track['label'] for track in available_tracks]
+ track_selection = [track if track in available_track_labels else '--- Empty ---' for track in track_selection]
return track_selection, cmap_selection
@@ -258,9 +326,9 @@ def get_default_layout(session):
tracks.append(session[dataset.value][0])
if not any(tracks):
- return ['---'] * 9, (cmap_fname, cmap_fname), selected_palettes
+ return ['--- Empty ---'] * 9, (cmap_fname, cmap_fname), selected_palettes
else:
- missing_tracks = ['---' for missing in range(0, 5 - len(tracks))]
+ missing_tracks = ['--- Empty ---' for missing in range(0, 5 - len(tracks))]
tracks += missing_tracks
return tracks[1:][::-1] + tracks, (cmap_fname, cmap_fname), selected_palettes
@@ -278,23 +346,3 @@ def create_figure(axis_range):
plot_bgcolor='rgba(0,0,0,0)'
)
)
-
-
-def get_verbose_labels(session_id, session, fnames, display_settings, cache):
- sequence = session[display_settings.seq_fname.encode()]
- all_predictions = []
- for fname in set(fnames):
- dataset, prediction = tracks_utils.retrieve_dataset_prediction(session_id, session, fname,
- display_settings, cache)
- dataset_dict = STATES[dataset]
- prediction = [dataset_dict[x] for x in prediction]
- all_predictions.append(prediction)
-
- labels = []
- for idx, residue in enumerate(sequence, 1):
- current_label = '------
Residue {} ({})'.format(idx, residue)
- for prediction in all_predictions:
- current_label += '
{}'.format(prediction[idx - 1])
- labels.append(current_label)
-
- return labels
diff --git a/utils/tests/test_cache_utils.py b/utils/tests/test_cache_utils.py
index 626a04c..89e75c0 100644
--- a/utils/tests/test_cache_utils.py
+++ b/utils/tests/test_cache_utils.py
@@ -78,15 +78,27 @@ def test_8(self):
self.assertDictEqual(expected, self.cache.hgetall(self.session_id))
def test_9(self):
- cachekey_1 = 'fname_1_{}_2'.format(cache_utils.CacheKeys.METADATA_TAG.value).encode()
+ cachekey_1 = cache_utils.CacheKeys.CMAP_DENSITY.value.format('fname_1', '2').encode()
density_1 = [1, 2, 3, 3, 4, 5]
- cachekey_2 = 'fname_2_{}_2'.format(cache_utils.CacheKeys.METADATA_TAG.value).encode()
+ cachekey_2 = cache_utils.CacheKeys.CMAP_DENSITY.value.format('fname_2', '2').encode()
density_2 = [5, 6, 7, 8, 9, 0]
- cache_utils.store_density(self.session_id, cachekey_1, density_1, self.cache)
- cache_utils.store_density(self.session_id, cachekey_2, density_2, self.cache)
- output = cache_utils.retrieve_density(self.session_id, cachekey_2, self.cache)
+ cache_utils.store_data(self.session_id, cachekey_1, density_1,
+ cache_utils.CacheKeys.CONTACT_DENSITY.value, self.cache)
+ cache_utils.store_data(self.session_id, cachekey_2, density_2,
+ cache_utils.CacheKeys.CONTACT_DENSITY.value, self.cache)
+ output = cache_utils.retrieve_data(self.session_id, cachekey_2, self.cache)
self.assertListEqual(output, density_2)
expected_cache = {b'id': cache_utils.compress_data(self.session_id)}
- cache_utils.remove_all_density(self.session_id, self.cache)
+ cache_utils.remove_all(self.session_id, cache_utils.CacheKeys.CONTACT_DENSITY.value, self.cache)
self.assertDictEqual(expected_cache, self.cache.hgetall(self.session_id))
+
+ def test_10(self):
+ self.assertTrue(cache_utils.is_valid_fname('fname_1'))
+ self.assertTrue(cache_utils.is_valid_fname('fname_1-METADATA-DENSITY'))
+ self.assertFalse(cache_utils.is_valid_fname('fname_CONPLOT-INTERNAL-USE-ONLY-METADATA_1'))
+ self.assertFalse(cache_utils.is_valid_fname('{}_CONPLOT-INTERNAL-USE-ONLY-METADATA_{}'))
+ self.assertFalse(cache_utils.is_valid_fname('fname - density'))
+ self.assertFalse(cache_utils.is_valid_fname('seq - hydrophobicity'))
+ self.assertFalse(cache_utils.is_valid_fname(cache_utils.CacheKeys.CMAP_DENSITY.value.format('fname_1', '2')))
+
diff --git a/utils/tests/test_heatmap_utils.py b/utils/tests/test_heatmap_utils.py
new file mode 100644
index 0000000..8418d1d
--- /dev/null
+++ b/utils/tests/test_heatmap_utils.py
@@ -0,0 +1,12 @@
+import unittest
+from utils import heatmap_utils
+
+
+class HeatmapUtilsTestCase(unittest.TestCase):
+
+ def test_1(self):
+ expected_heat = [[0, 0, 0], [0, 0, 0], [0, 0, 0]]
+ expected_hover = [[None, None, None], [None, None, None], [None, None, None]]
+ heat, hover = heatmap_utils.init_heatmap(2)
+ self.assertListEqual(expected_hover, hover.tolist())
+ self.assertListEqual(expected_heat, heat.tolist())
diff --git a/utils/tests/test_math_utils.py b/utils/tests/test_math_utils.py
new file mode 100644
index 0000000..66c12fa
--- /dev/null
+++ b/utils/tests/test_math_utils.py
@@ -0,0 +1,35 @@
+import os
+import unittest
+from utils import math_utils
+
+
+class MathUtilsTestCase(unittest.TestCase):
+
+ @unittest.skipIf('THIS_IS_GH_ACTIONS' in os.environ, "not implemented in Github Actions")
+ def test_1(self):
+ dummy_cmap = [(52, 50), (53, 51), (145, 143), (142, 140), (150, 148), (53, 50), (147, 145), (141, 139),
+ (143, 141), (148, 146)]
+ expected_density = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 7, 10, 10, 7, 4, 1, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 5, 8, 10, 9, 8, 8, 8, 8, 8, 7, 6, 4, 2, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+
+ density = math_utils.get_contact_density(dummy_cmap, 168)
+ self.assertListEqual(density, expected_density)
+
+ def test_2(self):
+ expected_output = 3.0210772833723656
+ output = math_utils.calculate_mcc(5, 2, 120, 2)
+ self.assertEqual(output, expected_output)
+
+ def test_3(self):
+ expected_output = 1
+ output = math_utils.calculate_mcc(0, 0, 120, 2)
+ self.assertEqual(output, expected_output)
+
+ def test_4(self):
+ expected_output = 10
+ output = math_utils.calculate_mcc(12, 1, 0, 2)
+ self.assertEqual(output, expected_output)
diff --git a/utils/tests/test_tracks_utils.py b/utils/tests/test_tracks_utils.py
index c03f479..c14cdca 100644
--- a/utils/tests/test_tracks_utils.py
+++ b/utils/tests/test_tracks_utils.py
@@ -1,18 +1,107 @@
+import os
import unittest
from utils import tracks_utils
+from collections import namedtuple
+DisplayControlSettings = namedtuple('DisplayControlSettings', ('factor', 'seq_length'))
-class SessionUtilsTestCase(unittest.TestCase):
+class TrackUtilsTestCase(unittest.TestCase):
+
+ @unittest.skipIf('THIS_IS_GH_ACTIONS' in os.environ, "not implemented in Github Actions")
def test_1(self):
dummy_cmap = [(52, 50), (53, 51), (145, 143), (142, 140), (150, 148), (53, 50), (147, 145), (141, 139),
(143, 141), (148, 146)]
expected_density = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 5, 7, 8, 8, 7, 5, 3, 2, 1, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 7, 10, 10, 7, 4, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 3, 4, 6, 8, 9, 10, 10, 10, 10, 9, 9, 8, 6, 5, 3, 2, 1,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 4, 6, 7, 6, 5, 6, 6, 5, 5, 4, 4, 3, 2, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- density = tracks_utils.get_contact_density(dummy_cmap, 168)
+ density = tracks_utils.calculate_density(dummy_cmap, 168, 20)
self.assertListEqual(density, expected_density)
+
+ def test_2(self):
+ """
+ cmap_1
+ 1 1 0 1
+ 1 0 1 0
+ 0 1 0 1
+ 1 0 1 1
+ cmap_2
+ 0 1 0 1
+ 1 1 1 0
+ 1 1 1 1
+ 1 1 1 0
+ """
+ dummy_cmap_1 = [(1, 1), (3, 1), (4, 1), (2, 2), (4, 2), (3, 3), (4, 4)]
+ dummy_cmap_2 = [(1, 1), (3, 1), (2, 1), (2, 2), (4, 2), (3, 2), (3, 3), (4, 4)]
+ expected_mcc = [10, 1, 4, 4]
+ expected_mcc_smooth = [2, 3, 4, 4, 2]
+ dummy_display_settings = DisplayControlSettings(factor=0, seq_length=4)
+ diff = tracks_utils.calculate_diff(dummy_cmap_1, dummy_cmap_2, dummy_display_settings)
+ mcc = tracks_utils.get_cmap_mcc(dummy_cmap_1, dummy_cmap_2, dummy_display_settings.seq_length, smooth=False)
+ mcc_smooth = tracks_utils.get_cmap_mcc(dummy_cmap_1, dummy_cmap_2, dummy_display_settings.seq_length)
+ self.assertListEqual(mcc, expected_mcc)
+ self.assertListEqual(mcc_smooth, expected_mcc_smooth)
+ self.assertListEqual(mcc_smooth, diff)
+
+ def test_3(self):
+ """
+ cmap_1
+ 1 1 0 1
+ 1 0 1 0
+ 0 1 0 1
+ 1 0 1 1
+ cmap_2
+ 0 1 0 1
+ 1 1 1 0
+ 1 1 1 1
+ 1 1 1 0
+ """
+ dummy_cmap_1 = [(1, 1), (3, 1), (4, 1), (2, 2), (4, 2), (3, 3), (4, 4)]
+ dummy_cmap_2 = [(1, 1), (3, 1), (2, 1), (2, 2), (4, 2), (3, 2), (3, 3), (4, 4)]
+ expected_diff = [3, 3, 3, 3, 1]
+ dummy_display_settings = DisplayControlSettings(factor=1, seq_length=4)
+
+ diff = tracks_utils.calculate_diff(dummy_cmap_1, dummy_cmap_2, dummy_display_settings)
+ self.assertListEqual(diff, expected_diff)
+
+ def test_4(self):
+ """
+ cmap_1
+ 2 8 9 0
+ 5 0 0 9
+ 7 0 0 8
+ 0 7 5 2
+ cmap_2
+ 9 6 0 0
+ 5 1 0 0
+ 5 0 1 6
+ 0 5 5 9
+ """
+ cmap_1 = [
+ [2, 1, 0, 7],
+ [3, 1, 0, 5],
+ [4, 1, 0, 2],
+ [4, 2, 0, 8],
+ [3, 2, 0, 0],
+ [4, 3, 0, 9]
+ ]
+
+ cmap_2 = [
+ [2, 1, 0, 5],
+ [3, 1, 0, 5],
+ [4, 1, 0, 9],
+ [3, 2, 0, 1],
+ [4, 2, 0, 6],
+ [4, 3, 0, 0]
+ ]
+
+ expected = [10, 8, 10, 10]
+ expected_smooth = [4, 6, 8, 8, 6]
+ output = tracks_utils.get_cmap_rmsd(cmap_1, cmap_2, 4, smooth=False)
+ output_smooth = tracks_utils.get_cmap_rmsd(cmap_1, cmap_2, 4, smooth=True)
+ self.assertListEqual(output, expected)
+ self.assertListEqual(output_smooth, expected_smooth)
diff --git a/utils/tracks_utils.py b/utils/tracks_utils.py
index 7036036..a5ad32f 100644
--- a/utils/tracks_utils.py
+++ b/utils/tracks_utils.py
@@ -1,59 +1,123 @@
-from parsers import DatasetStates
-from loaders import AdditionalDatasetReference, DatasetReference
-from utils import create_cmap_trace, color_palettes, cache_utils
-from sklearn.cluster import estimate_bandwidth
-from sklearn.neighbors import KernelDensity
import numpy as np
+from loaders import AdditionalDatasetReference, DatasetReference
+from parsers import DatasetStates
+from utils import create_cmap_trace, color_palettes, cache_utils, lookup_data, cmap_utils, math_utils
def calculate_density(cmap, seq_length, factor):
- if cmap[-1] == 'PDB' or cmap[-1] == 'DISTO':
- cmap.pop(-1)
- contact_list = cmap[:int(round(seq_length / factor, 0))]
- return get_contact_density(contact_list, seq_length)
+ contact_list = cmap_utils.slice_cmap(cmap, seq_length, factor)
+ return math_utils.get_contact_density(contact_list, seq_length)
+
+
+DISTANCE_BINS = {0: 0, 1: 5, 2: 7, 3: 9, 4: 11, 5: 13, 6: 15, 7: 17, 8: 19, 9: 20}
+
+
+def get_distance_array(cmap, seq_length):
+ array = np.full((seq_length, seq_length), 20)
+ for contact in cmap:
+ array[seq_length - contact[0], contact[1] - 1] = DISTANCE_BINS[contact[3]]
+ array[seq_length - contact[1], contact[0] - 1] = DISTANCE_BINS[contact[3]]
+ return array
+
+
+def get_cmap_mcc(cmap_1, cmap_2, size, smooth=True):
+ cmap_1_set = {resn: {(c[0], c[1]) for c in cmap_1 if resn in (c[0], c[1])} for resn in range(1, size + 1)}
+ cmap_2_set = {resn: {(c[0], c[1]) for c in cmap_2 if resn in (c[0], c[1])} for resn in range(1, size + 1)}
+ diff = []
+ for resn in cmap_1_set.keys():
+ tp = len(cmap_1_set[resn] & cmap_2_set[resn])
+ fp = len(cmap_2_set[resn] - cmap_1_set[resn])
+ fn = len(cmap_1_set[resn] - cmap_2_set[resn])
+ tn = size - sum((tp, fp, fn))
+ mcc = math_utils.calculate_mcc(tp, fp, tn, fn)
+ diff.append(mcc)
+
+ if smooth:
+ return math_utils.convolution_smooth_values(diff).astype(int).tolist()
+
+ return [int(round(mcc, 0)) for mcc in diff]
+
+
+def get_cmap_rmsd(cmap_1, cmap_2, seq_length, smooth=True):
+ cmap_1_array = get_distance_array(cmap_1, seq_length)
+ cmap_2_array = get_distance_array(cmap_2, seq_length)
+ rmsd = math_utils.calculate_rmsd(cmap_1_array, cmap_2_array, seq_length)
+ if smooth:
+ rmsd = math_utils.convolution_smooth_values(rmsd) * 2
+ return rmsd.astype(int).tolist()
+ else:
+ rmsd = np.round(rmsd, 0) * 2
+ return rmsd.astype(int).tolist()
+
+
+def calculate_diff(cmap_1, cmap_2, display_settings):
+ if cmap_utils.contains_distances(cmap_1) and cmap_utils.contains_distances(cmap_2):
+ return get_cmap_rmsd(cmap_1[1:], cmap_2[1:], display_settings.seq_length)
+ else:
+ cmap_1 = cmap_utils.slice_cmap(cmap_1, display_settings.seq_length, display_settings.factor)
+ cmap_2 = cmap_utils.slice_cmap(cmap_2, display_settings.seq_length, display_settings.factor)
+ return get_cmap_mcc(cmap_1, cmap_2, display_settings.seq_length)
+
+
+def get_diff_args(session, fname, factor):
+ cmap_1_fname = fname.split('|')[0].rstrip().lstrip()
+ cmap_1 = session[cmap_1_fname.encode()]
+ cmap_2_fname = fname.split('|')[1].rstrip().lstrip()
+ cmap_2 = session[cmap_2_fname.encode()]
+ if cmap_utils.contains_distances(cmap_1) and cmap_utils.contains_distances(cmap_2):
+ cachekey = cache_utils.CacheKeys.CMAP_DIFF.value.format(cmap_1_fname, cmap_2_fname, '1').encode()
+ else:
+ cachekey = cache_utils.CacheKeys.CMAP_DIFF.value.format(cmap_1_fname, cmap_2_fname, factor).encode()
-def retrieve_dataset_prediction(session_id, session, fname, display_settings, cache):
+ return cmap_1, cmap_2, cachekey
+
+
+def get_dataset_prediction(session_id, session, fname, display_settings, cache):
if fname == session[DatasetReference.SEQUENCE.value.encode()]:
return DatasetReference.HYDROPHOBICITY.value, session[DatasetReference.HYDROPHOBICITY.value.encode()]
if fname in session[DatasetReference.CONTACT_MAP.value.encode()]:
- cachekey = '{}_{}_{}'.format(fname, cache_utils.CacheKeys.METADATA_TAG.value, display_settings.factor).encode()
- if cachekey in session.keys():
- density = session[cachekey]
- elif cache.hexists(session_id, cachekey):
- density = cache_utils.retrieve_density(session_id, cachekey, cache)
- else:
+ cachekey = cache_utils.get_cachekey(session, fname, display_settings.factor)
+ density = lookup_data(session, session_id, cachekey, cache)
+ if not density:
density = calculate_density(session[fname.encode()], display_settings.seq_length, display_settings.factor)
- cache_utils.store_density(session_id, cachekey, density, cache)
+ cache_utils.store_data(session_id, cachekey, density, cache_utils.CacheKeys.CONTACT_DENSITY.value, cache)
+
return DatasetReference.CONTACT_DENSITY.value, density
+ if cache_utils.MetadataTags.SEPARATOR.value in fname:
+ cmap_1, cmap_2, cachekey = get_diff_args(session, fname, display_settings.factor)
+ diff = lookup_data(session, session_id, cachekey, cache)
+ if not diff:
+ diff = calculate_diff(cmap_1, cmap_2, display_settings)
+ cache_utils.store_data(session_id, cachekey, diff, cache_utils.CacheKeys.CONTACT_DIFF.value, cache)
+ return DatasetReference.CONTACT_DIFF.value, diff
+
for dataset in AdditionalDatasetReference:
if dataset.value.encode() in session.keys() and fname in session[dataset.value.encode()]:
return dataset.value, session[fname.encode()]
-def transform_coords_diagonal_axis(coord, distance, low_bound=False, ratio=1, y_axis=True):
- if coord is None:
- return None
+def transform_coords_diagonal_xaxis(indices, distance, track_idx, ratio=1):
+ factor = distance / (1 + ratio ** 2)
+ if track_idx < 4:
+ factor = factor * -1
+ return [idx + factor for idx in indices]
- if y_axis:
- factor = ratio * (distance / (1 + ratio ** 2))
- if low_bound:
- factor = factor * -1
- else:
- factor = distance / (1 + ratio ** 2)
- if not low_bound:
- factor = factor * -1
- return coord + factor
+def transform_coords_diagonal_yaxis(prediction, state, distance, track_idx, ratio=1):
+ factor = ratio * (distance / (1 + ratio ** 2))
+ if track_idx > 4:
+ factor = factor * -1
+ return [idx + factor if residue == state else None for idx, residue in enumerate(prediction, 1)]
def get_diagonal_trace(prediction, dataset, marker_size, sequence, alpha, color_palette):
if prediction is None:
return None
- x_diagonal = [idx for idx in range(1, len(prediction) + 1)]
+ x = [idx for idx in range(1, len(prediction) + 1)]
states = DatasetStates.__getattr__(dataset).value
palette = color_palettes.DatasetColorPalettes.__getattr__(dataset).value.__getattr__(color_palette).value
traces = []
@@ -62,14 +126,9 @@ def get_diagonal_trace(prediction, dataset, marker_size, sequence, alpha, color_
y = [idx if residue == state.value else None for idx, residue in enumerate(prediction, 1)]
if not any(y):
continue
-
- hovertext = ['Residue: {} ({}) | {}'.format(sequence[idx - 1], idx, state.name) for idx in x_diagonal]
- color = palette.__getattr__(state.name).value
- color = color.format(alpha)
-
- traces.append(
- create_cmap_trace(x_diagonal, y, 'diamond', marker_size=marker_size, color=color, hovertext=hovertext)
- )
+ hovertext = ['Residue: {} ({}) | {}'.format(resid, idx, state.name) for idx, resid in enumerate(sequence, 1)]
+ color = palette.__getattr__(state.name).value.format(alpha)
+ traces.append(create_cmap_trace(x, y, 'diamond', marker_size=marker_size, color=color, hovertext=hovertext))
return traces
@@ -84,34 +143,16 @@ def get_traces(prediction, dataset, track_idx, track_separation, marker_size, al
palette = color_palettes.DatasetColorPalettes.__getattr__(dataset).value.__getattr__(color_palette).value
track_origin = abs(4 - track_idx)
track_distance = track_separation * track_origin
- if track_idx > 4:
- low_bound = True
- else:
- low_bound = False
+
+ x = transform_coords_diagonal_xaxis(x_diagonal, track_distance, track_idx)
for state in states:
- y_diagonal = [idx if residue == state.value else None for idx, residue in enumerate(prediction, 1)]
- if not any(y_diagonal):
+ y = transform_coords_diagonal_yaxis(prediction, state.value, track_distance, track_idx)
+ if not any(y):
continue
-
- y = [transform_coords_diagonal_axis(y, track_distance, low_bound=low_bound) for y in y_diagonal]
- x = [transform_coords_diagonal_axis(x, track_distance, low_bound=low_bound, y_axis=False) for x in x_diagonal]
- hovertext = ['%s' % state.name for idx in enumerate(x)]
- color = palette.__getattr__(state.name).value
- color = color.format(alpha)
+ hovertext = ['%s' % state.name for i in x]
+ color = palette.__getattr__(state.name).value.format(alpha)
traces.append(create_cmap_trace(x, y, 'diamond', marker_size=marker_size, color=color, hovertext=hovertext))
return traces
-
-
-def get_contact_density(contact_list, seq_length):
- """Credits to Felix Simkovic; code taken from GitHub rigdenlab/conkit/core/contactmap.py"""
- x = np.array([i for c in contact_list for i in np.arange(c[1], c[0] + 1)], dtype=np.int64)[:, np.newaxis]
- bw = estimate_bandwidth(x)
- kde = KernelDensity(bandwidth=bw).fit(x)
- x_fit = np.arange(1, seq_length + 1)[:, np.newaxis]
- density = np.exp(kde.score_samples(x_fit)).tolist()
- density_max = max(density)
- density = [int(round(float(i) / density_max, 1) * 10) for i in density]
- return density