Skip to content

Commit

Permalink
Merge pull request #19 from rigdenlab/dev
Browse files Browse the repository at this point in the history
Colour schemes changed
  • Loading branch information
Felix Simkovic committed Feb 21, 2017
2 parents aff982c + d32b2df commit e08ed2f
Show file tree
Hide file tree
Showing 29 changed files with 110 additions and 94 deletions.
7 changes: 4 additions & 3 deletions conkit/applications/Bbcontacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ class BbcontactsCommandLine(AbstractCommandline):
contacts between beta-strands by detecting patterns in matrices of
predicted couplings. bbcontacts can make use of a secondary structure
assignment or a secondary structure prediction.
.. [#] Andreani J., Söding J. (2015). bbcontacts: prediction of beta-strand
pairing from direct coupling patterns. Bioinformatics 31(11), 1729-1737.
Examples
--------
Expand All @@ -45,9 +49,6 @@ class BbcontactsCommandLine(AbstractCommandline):
<https://github.com/soedinglab/bbcontacts>`_, download the latest version
and install it using python setup.py install.
.. [#] Andreani J., Söding J. (2015). bbcontacts: prediction of beta-strand
pairing from direct coupling patterns. Bioinformatics 31(11), 1729-1737.
"""

def __init__(self, cmd="bbcontacts", **kwargs):
Expand Down
8 changes: 4 additions & 4 deletions conkit/applications/CCMpred.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ class CCMpredCommandLine(AbstractCommandline):
The CCMpred program is a very fast pseudo-likelihood maximisation
implementation of covariance detection in a Multiple Sequence
Alignment. This wrapper allows for easy-to-use Python implementation.
.. [#] Seemayer S, Gruber M, Söding J (2014). CCMpred--fast and precise
prediction of protein residue-residue contacts from correlated mutations.
Bioinformatics 30(21), 3128-3130.
Examples
--------
Expand All @@ -38,10 +42,6 @@ class CCMpredCommandLine(AbstractCommandline):
You would typically run the command line with :func:`ccmpred_cline` or via
the Python subprocess module.
.. [#] Seemayer S, Gruber M, Söding J (2014). CCMpred--fast and precise
prediction of protein residue-residue contacts from correlated mutations.
Bioinformatics 30(21), 3128-3130.
"""

def __init__(self, cmd="ccmpred", **kwargs):
Expand Down
15 changes: 7 additions & 8 deletions conkit/applications/Cdhit.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@ class CdhitCommandLine(AbstractCommandline):
helps to significantly reduce the computational and manual efforts in
many sequence analysis tasks and aids in understanding the data
structure and correct the bias within a dataset.
.. [#] Li W, Jaroszewski L, Godzik A(2001). Clustering of highly homologous sequences
to reduce thesize of large protein database. Bioinformatics 17, 282-283.
.. [#] Li W, Jaroszewski L, Godzik A (2002). Tolerating some redundancy significantly
speeds up clustering of large protein databases. Bioinformatics 18, 77-82.
Examples
--------
Expand All @@ -36,14 +43,6 @@ class CdhitCommandLine(AbstractCommandline):
You would typically run the command line with :func:`cdhit_cline` or via
the Python subprocess module.
Citations
---------
.. [#] Li W, Jaroszewski L, Godzik A(2001). Clustering of highly homologous sequences
to reduce thesize of large protein database. Bioinformatics 17, 282-283.
.. [#] Li W, Jaroszewski L, Godzik A (2002). Tolerating some redundancy significantly
speeds up clustering of large protein databases. Bioinformatics 18, 77-82.
"""

def __init__(self, cmd="cd-hit", **kwargs):
Expand Down
12 changes: 6 additions & 6 deletions conkit/applications/HHblits.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ class HHblitsCommandLine(AbstractCommandline):
The HHblits program is a homology detection tool by iterative HMM-HMM comparison.
.. [#] Alva V., Nam SZ., Söding J., Lupas AN. (2016). The MPI bioinformatics Toolkit as an
integrative platform for advanced protein sequence and structure analysis. Nucleic Acids Res. pii: gkw348.
.. [#] Remmert M., Biegert A., Hauser A., Söding J. (2011). HHblits: Lightning-fast iterative
protein sequence searching by HMM-HMM alignment. Nat Methods. 9(2):173-5.
Examples
--------
To generate a Multiple Sequence Alignment, use:
Expand All @@ -36,12 +42,6 @@ class HHblitsCommandLine(AbstractCommandline):
You would typically run the command line with :func:`hhblits_cline` or via
the Python subprocess module.
.. [#]_ Alva V., Nam SZ., Söding J., Lupas AN. (2016). The MPI bioinformatics Toolkit as an
integrative platform for advanced protein sequence and structure analysis. Nucleic Acids Res. pii: gkw348.
.. [#]_ Remmert M., Biegert A., Hauser A., Söding J. (2011). HHblits: Lightning-fast iterative
protein sequence searching by HMM-HMM alignment. Nat Methods. 9(2):173-5.
"""

def __init__(self, cmd="hhblits", **kwargs):
Expand Down
12 changes: 6 additions & 6 deletions conkit/applications/HHfilter.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ class HHfilterCommandLine(AbstractCommandline):
Filter an alignment by maximum sequence identity of match states and minimum coverage.
.. [#] Alva V., Nam SZ., Söding J., Lupas AN. (2016). The MPI bioinformatics Toolkit as an
integrative platform for advanced protein sequence and structure analysis. Nucleic Acids Res. pii: gkw348.
.. [#] Remmert M., Biegert A., Hauser A., Söding J. (2011). HHblits: Lightning-fast iterative
protein sequence searching by HMM-HMM alignment. Nat Methods. 9(2):173-5.
Examples
--------
To generate a Multiple Sequence Alignment, use:
Expand All @@ -33,12 +39,6 @@ class HHfilterCommandLine(AbstractCommandline):
You would typically run the command line with :func:`hhfilter_cline` or via
the Python subprocess module.
.. [#]_ Alva V., Nam SZ., Söding J., Lupas AN. (2016). The MPI bioinformatics Toolkit as an
integrative platform for advanced protein sequence and structure analysis. Nucleic Acids Res. pii: gkw348.
.. [#]_ Remmert M., Biegert A., Hauser A., Söding J. (2011). HHblits: Lightning-fast iterative
protein sequence searching by HMM-HMM alignment. Nat Methods. 9(2):173-5.
"""

def __init__(self, cmd='hhfilter', **kwargs):
Expand Down
6 changes: 3 additions & 3 deletions conkit/applications/Jackhmmer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ class JackhmmerCommandLine(AbstractCommandline):
Jackhmmer is an algorithm that uses iterative searches a protein sequence
against a protein sequence database to find sequence homologs.
.. [#] Johnson L. S., Eddy S. R., Portugaly E. (2010). Hidden Markov
Model Speed Heuristic and Iterative HMM Search Procedure. BMC Bioinformatics 11, 431.
Examples
--------
To generate a Multiple Sequence Alignment, use:
Expand All @@ -35,9 +38,6 @@ class JackhmmerCommandLine(AbstractCommandline):
You would typically run the command line with :func:`jackhmmer_cline` or via
the Python subprocess module.
.. [#] Johnson L. S., Eddy S. R., Portugaly E. (2010). Hidden Markov
Model Speed Heuristic and Iterative HMM Search Procedure. BMC Bioinformatics 11, 431.
"""

def __init__(self, cmd='jackhmmer', **kwargs):
Expand Down
8 changes: 4 additions & 4 deletions conkit/applications/Psicov.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ class PsicovCommandLine(AbstractCommandline):
The PSICOV program is a Accurate Contact Prediction from large
protein alignments.
.. [#] Jones, D.T., Buchan, D.W., Cozzetto, D. & Pontil, M. (2012). PSICOV:
Precise structural contact prediction using sparse inverse covariance
estimation on large multiple sequence alignments. Bioinformatics. 28, 184-190.
Examples
--------
To predict a contact map using a Multiple Sequence Alignment in
Expand All @@ -34,10 +38,6 @@ class PsicovCommandLine(AbstractCommandline):
You would typically run the command line with :func:`psicov_cline` or via
the Python subprocess module.
.. [#] Jones, D.T., Buchan, D.W., Cozzetto, D. & Pontil, M. (2012). PSICOV:
Precise structural contact prediction using sparse inverse covariance
estimation on large multiple sequence alignments. Bioinformatics. 28, 184-190.
"""

def __init__(self, cmd='psicov', **kwargs):
Expand Down
7 changes: 0 additions & 7 deletions conkit/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,3 @@
'PRO': 'P', 'PYL': 'O', 'SER': 'S', 'SEC': 'U', 'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V',
'ASX': 'B', 'GLX': 'Z', 'XAA': 'X', 'XLE': 'J'}

# ================================================
# Constants defining Contact().status color coding
# ================================================
TPCOLOR = '#2D9D00' # color true positive
FPCOLOR = '#AB0000' # color false positive
NTCOLOR = '#0482ff' # color undefined
RFCOLOR = '#B5B5B5' # color structure
2 changes: 0 additions & 2 deletions conkit/core/ContactFile.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
class ContactFile(Entity):
"""A contact file object representing a single prediction file
Description
-----------
The contact file class represents a data structure to hold all predictions
with a single contact map file. It contains functions to store,
manipulate and organise contact maps.
Expand Down
2 changes: 0 additions & 2 deletions conkit/core/ContactMap.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@ def __repr__(self):
class ContactMap(Entity):
"""A contact map object representing a single prediction
Description
-----------
The :obj:`ContactMap <conkit.core.ContactMap>` class represents a data structure to hold a single
contact map prediction in one place. It contains functions to store,
manipulate and organise :obj:`Contact <conkit.core.Contact>` instances.
Expand Down
2 changes: 0 additions & 2 deletions conkit/core/Entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
class Entity(object):
"""Base class for all entities used in this interface.
Description
-----------
It handles the storage of data. It also provides a high-efficiency
methods to allow fast lookup and iterations of each entity. It also
provides a hierarchical structure to remember parent and child
Expand Down
2 changes: 0 additions & 2 deletions conkit/core/SequenceFile.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@
class SequenceFile(Entity):
"""A sequence file object representing a single sequence file
Description
-----------
The :obj:`SequenceFile <conkit.core.SequenceFile>` class represents a data structure to hold
:obj:`Sequence <conkit.core.Sequence>` instances in a single sequence file. It contains
functions to store and analyze sequences.
Expand Down
2 changes: 0 additions & 2 deletions conkit/io/JonesIO.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
class JonesIO(_SequenceFileParser):
"""Parser class for Jones sequence files
Description
-----------
This format is a "new" definition of sequence-only records.
It assumes that there are no comments, headers or any other
Expand Down
2 changes: 0 additions & 2 deletions conkit/io/PconsIO.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@
class PconsParser(_ContactFileParser):
"""Class to parse a Pcons output
Description
-----------
This module can be used to parse all versions of the
Pcons programs, i.e. PconsC, PconsC2, and PconsC3.
Expand Down
28 changes: 15 additions & 13 deletions conkit/plot/ContactMapChordPlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,28 @@
import numpy

from conkit.plot._Figure import Figure
from conkit.plot._plottools import points_on_circle
from conkit.plot._plottools import ColorDefinitions, points_on_circle


class ContactMapChordFigure(Figure):
"""A Figure object specifically for a Contact Map chord diagram
Description
-----------
This figure will illustrate the contacts linking the residues
in the target sequence. This plot is a very common representation
of contacts. With this figure, you can illustrate intra-molecular.
Color scheme:
========== =========== ========== =========== ========== =========== ========== =========== ========== ===========
Amino acid Hex code Amino acid Hex code Amino acid Hex code Amino acid Hex code Amino acid Hex code
========== =========== ========== =========== ========== =========== ========== =========== ========== ===========
Ala ``#882D17`` Arg ``#B3446C`` Asn ``#F99379`` Asp ``#875692`` Cys ``#F3C300``
Gln ``#F6A600`` Glu ``#F38400`` Gly ``#BE0032`` His ``#C2B280`` Ile ``#848482``
Leu ``#E68FAC`` Lys ``#008856`` Met ``#0067A5`` Phe ``#A1CAF1`` Pro ``#604E97``
Ser ``#DCD300`` Thr ``#8DB600`` Trp ``#E25822`` Tyr ``#2B3D26`` Val ``#654522``
Unk ``#000000``
========== =========== ========== =========== ========== =========== ========== =========== ========== ===========
Attributes
----------
hierarchy : :obj:`ContactMap <conkit.core.ContactMap>`
Expand All @@ -37,14 +47,6 @@ class ContactMapChordFigure(Figure):
>>> conkit.plot.ContactMapChordFigure(cmap)
"""
AA_ENCODING = {
'A': '#882D17', 'C': '#F3C300', 'D': '#875692', 'E': '#F38400',
'F': '#A1CAF1', 'G': '#BE0032', 'H': '#C2B280', 'I': '#848482',
'K': '#008856', 'L': '#E68FAC', 'M': '#0067A5', 'N': '#F99379',
'P': '#604E97', 'Q': '#F6A600', 'R': '#B3446C', 'S': '#DCD300',
'T': '#8DB600', 'V': '#654522', 'W': '#E25822', 'Y': '#2B3D26',
'X': '#000000'
}

def __init__(self, hierarchy, **kwargs):
"""A new contact map plot
Expand Down Expand Up @@ -120,11 +122,11 @@ def _draw(self):
residue_data = residue_data.reshape(self_data.T[0].shape[0] * 2, 2)

# - compute a default color list
color_codes = dict([(k, ContactMapChordFigure.AA_ENCODING['X']) for k in self_data_range])
color_codes = dict([(k, ColorDefinitions.AA_ENCODING['X']) for k in self_data_range])

# - fill default dict with data we have
for k, v in numpy.vstack({tuple(row) for row in residue_data}):
color_codes[int(k)] = ContactMapChordFigure.AA_ENCODING[v]
color_codes[int(k)] = ColorDefinitions.AA_ENCODING[v]

# - create a color list
colors = [color_codes[k] for k in sorted(color_codes.keys())]
Expand Down
21 changes: 9 additions & 12 deletions conkit/plot/ContactMapPlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,13 @@
import matplotlib.pyplot
import numpy

from conkit import constants
from conkit.plot._Figure import Figure
from conkit.plot._plottools import ColorDefinitions


class ContactMapFigure(Figure):
"""A Figure object specifically for a Contact Map
Description
-----------
This figure will illustrate the contacts in a contact
map. This plot is a very common representation of contacts.
With this figure, you can illustrate either your contact
Expand Down Expand Up @@ -138,7 +136,7 @@ def _draw(self):
else:
reference_data = numpy.asarray([(c.res1_seq, c.res2_seq)
for c in self._reference if c.is_true_positive])
reference_colors = [constants.RFCOLOR for _ in range(len(reference_data))]
reference_colors = [ColorDefinitions.STRUCTURAL for _ in range(len(reference_data))]
ax.scatter(reference_data.T[0], reference_data.T[1], color=reference_colors,
s=10, marker='o', edgecolor='none', linewidths=0.0)
ax.scatter(reference_data.T[1], reference_data.T[0], color=reference_colors,
Expand Down Expand Up @@ -197,15 +195,15 @@ def _draw(self):

# Create a custom legend
if self._reference:
tp_artist = matplotlib.pyplot.Line2D((0, 1), (0, 0), color=constants.TPCOLOR,
tp_artist = matplotlib.pyplot.Line2D((0, 1), (0, 0), color=ColorDefinitions.MATCH,
marker='o', linestyle='', label='Match')
fp_artist = matplotlib.pyplot.Line2D((0, 1), (0, 0), color=constants.FPCOLOR,
fp_artist = matplotlib.pyplot.Line2D((0, 1), (0, 0), color=ColorDefinitions.MISMATCH,
marker='o', linestyle='', label='Mismatch')
rf_artist = matplotlib.pyplot.Line2D((0, 1), (0, 0), color=constants.RFCOLOR,
rf_artist = matplotlib.pyplot.Line2D((0, 1), (0, 0), color=ColorDefinitions.STRUCTURAL,
marker='o', linestyle='', label='Structural')
artists = [tp_artist, fp_artist, rf_artist]
else:
nt_artist = matplotlib.pyplot.Line2D((0, 1), (0, 0), color=constants.NTCOLOR,
nt_artist = matplotlib.pyplot.Line2D((0, 1), (0, 0), color=ColorDefinitions.NEUTRAL,
marker='o', linestyle='', label='Contact')
artists = [nt_artist]
ax.legend(handles=artists, numpoints=1, fontsize=10, bbox_to_anchor=(0., 1.02, 1., .102),
Expand All @@ -222,8 +220,7 @@ def _draw(self):
def _determine_color(h):
"""Determine the color of the contacts in order"""
return [
constants.TPCOLOR if contact.is_true_positive
else constants.FPCOLOR if contact.is_false_positive
else constants.NTCOLOR
for contact in h
ColorDefinitions.MATCH if contact.is_true_positive
else ColorDefinitions.MISMATCH if contact.is_false_positive
else ColorDefinitions.NEUTRAL for contact in h
]
9 changes: 4 additions & 5 deletions conkit/plot/PrecisionEvaluationPlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,12 @@
import numpy

from conkit.plot._Figure import Figure
from conkit.plot._plottools import ColorDefinitions


class PrecisionEvaluationFigure(Figure):
"""A Figure object specifically for a Precision evaluation.
Description
-----------
This figure will illustrate the precision scores of a contact
map at different precision scores. These can be determined at
various start and end points with different stepwise increases
Expand Down Expand Up @@ -168,12 +167,12 @@ def _draw(self):
fig, ax = matplotlib.pyplot.subplots()

# Add indicator lines for clarity of data
ax.axhline(0.5, color='#008E00', linestyle='-', label='50% Precision')
ax.axhline(0.5, color=ColorDefinitions.PRECISION50, linestyle='-', label='50% Precision')
if self.min_cutoff <= 1.0:
ax.axvline(1.0, color='#BDBDBD', linestyle='--', label='Factor L')
ax.axvline(1.0, color=ColorDefinitions.FACTOR1, linestyle='--', label='Factor L')

# Add data points itself
ax.plot(factors, precisions, color='#000000', marker='o', markersize=5, linestyle='-',
ax.plot(factors, precisions, color=ColorDefinitions.GENERAL, marker='o', markersize=5, linestyle='-',
label='Precision score')

# Prettify the plot
Expand Down
Loading

0 comments on commit e08ed2f

Please sign in to comment.