Skip to content

Commit

Permalink
plot_map script changed to universal plot script. Added accessibility…
Browse files Browse the repository at this point in the history
… of plotting via plot script. Modified plot look slightly.
  • Loading branch information
Felix Simkovic committed Feb 8, 2017
1 parent 0c35fcc commit bd1cdde
Show file tree
Hide file tree
Showing 13 changed files with 200 additions and 145 deletions.
6 changes: 4 additions & 2 deletions conkit/plot/SequenceCoveragePlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,14 @@ def _draw(self):
aa_frequencies = numpy.asarray(self._hierarchy.calculate_freq()) * self._hierarchy.top_sequence.seq_len

fig, ax = matplotlib.pyplot.subplots(dpi=self.dpi)
ax.plot(residues, aa_frequencies, color='#000000', marker='o', linestyle='-',
markersize=5, label='Amino acid count')

ax.axhline(self._hierarchy.top_sequence.seq_len * 0.3, color='r', label='30% Coverage')
ax.axhline(self._hierarchy.top_sequence.seq_len * 0.6, color='g', label='60% Coverage')

ax.plot(residues, aa_frequencies, color='#000000', marker='o', linestyle='-',
markersize=5, label='Amino acid count')


# Prettify the plot
xticks = numpy.arange(1, self._hierarchy.top_sequence.seq_len + 1, 5)
ax.set_xticks(xticks)
Expand Down
9 changes: 5 additions & 4 deletions docs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,9 @@ pseudoxml:
figures:
mkdir _tmp
git clone https://github.com/rigdenlab/conkit-examples.git _tmp/conkit-examples
conkit.plot_map -o examples/images/toxd_cmap_simple.png _tmp/conkit-examples/toxd/toxd.fasta fasta _tmp/conkit-examples/toxd/toxd.mat ccmpred
conkit.plot_map -o examples/images/toxd_cmap_reference.png -p _tmp/conkit-examples/toxd/toxd.pdb _tmp/conkit-examples/toxd/toxd.fasta fasta _tmp/conkit-examples/toxd/toxd.mat ccmpred
conkit.plot_map -o examples/images/toxd_cmap_advanced.png -e _tmp/conkit-examples/toxd/toxd.psicov -ef psicov -p _tmp/conkit-examples/toxd/toxd.pdb _tmp/conkit-examples/toxd/toxd.fasta fasta _tmp/conkit-examples/toxd/toxd.mat ccmpred
conkit.plot_map -o examples/images/toxd_cmap_confidence.png --confidence -e _tmp/conkit-examples/toxd/toxd.psicov -ef psicov -p _tmp/conkit-examples/toxd/toxd.pdb _tmp/conkit-examples/toxd/toxd.fasta fasta _tmp/conkit-examples/toxd/toxd.mat ccmpred
conkit.plot cmap -dpi 200 -o examples/images/toxd_cmap_simple.png _tmp/conkit-examples/toxd/toxd.fasta fasta _tmp/conkit-examples/toxd/toxd.mat ccmpred
conkit.plot cmap -dpi 200 -o examples/images/toxd_cmap_reference.png -p _tmp/conkit-examples/toxd/toxd.pdb _tmp/conkit-examples/toxd/toxd.fasta fasta _tmp/conkit-examples/toxd/toxd.mat ccmpred
conkit.plot cmap -dpi 200 -o examples/images/toxd_cmap_advanced.png -e _tmp/conkit-examples/toxd/toxd.psicov -ef psicov -p _tmp/conkit-examples/toxd/toxd.pdb _tmp/conkit-examples/toxd/toxd.fasta fasta _tmp/conkit-examples/toxd/toxd.mat ccmpred
conkit.plot cmap -dpi 200 -o examples/images/toxd_cmap_confidence.png --confidence -e _tmp/conkit-examples/toxd/toxd.psicov -ef psicov -p _tmp/conkit-examples/toxd/toxd.pdb _tmp/conkit-examples/toxd/toxd.fasta fasta _tmp/conkit-examples/toxd/toxd.mat ccmpred
conkit.plot scov -dpi 200 -o examples/images/toxd_freq_plot.png _tmp/conkit-examples/toxd/toxd.a3m a3m
rm -rf _tmp
4 changes: 4 additions & 0 deletions docs/examples/analyse_msa.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ The output tells you the file and format you have provided. It also prints which
.. image:: images/toxd_freq_plot.png
:alt: Toxd Frequency Plot

.. note::

You can also generate the final plot using the ``conkit.plot`` script.

--------------------------------------------------------

Using Python
Expand Down
Binary file modified docs/examples/images/toxd_cmap_advanced.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/examples/images/toxd_cmap_confidence.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/examples/images/toxd_cmap_reference.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/examples/images/toxd_cmap_simple.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/examples/images/toxd_freq_plot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
12 changes: 6 additions & 6 deletions docs/examples/plot_map.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ Plotting a contact map
Using a script
^^^^^^^^^^^^^^

If you would like to plot a contact map using ConKit without the overhead of using Python, you can simply use the ``conkit.plot_map`` script.
If you would like to plot a contact map using ConKit without the overhead of using Python, you can simply use the ``conkit.plot`` script.

.. code-block:: bash
$> conkit.plot_map toxd/toxd.fasta fasta toxd/toxd.mat ccmpred
$> conkit.plot cmap toxd/toxd.fasta fasta toxd/toxd.mat ccmpred
The call above uses the contact prediction file ``toxd.mat`` file, which is in ``ccmpred`` format, and plots the following 2D contact map stored in the file ``toxd/toxd.png``

Expand All @@ -25,7 +25,7 @@ You can also add a reference structure to determine which contacts are true and

.. code-block:: bash
$> conkit.plot_map -p toxd/toxd.pdb toxd/toxd.fasta fasta toxd/toxd.mat ccmpred
$> conkit.plot cmap -p toxd/toxd.pdb toxd/toxd.fasta fasta toxd/toxd.mat ccmpred
The call above produces a contact map plot looking like this. The gray points are the reference contacts, green show true positive contacts in your prediction and red false positive ones.

Expand All @@ -40,7 +40,7 @@ You could also add a second contact prediction file to the call to compare two m

.. code-block:: bash
$> conkit.plot_map -e toxd/toxd.psicov -ef psicov -p toxd/toxd.pdb toxd/toxd.fasta fasta toxd/toxd.mat ccmpred
$> conkit.plot cmap -e toxd/toxd.psicov -ef psicov -p toxd/toxd.pdb toxd/toxd.fasta fasta toxd/toxd.mat ccmpred
The call above produces a contact map plot looking like this. The gray points are the reference contacts, green show true positive contacts in your prediction and red false positive ones. The top triangle is the second contact map from file ``toxd/toxd.psicov`` whereas the bottom one is from ``toxd/toxd.mat``.

Expand All @@ -53,7 +53,7 @@ Finally, you could also illustrate the confidence with which each contact was pr

.. code-block:: bash
$> conkit.plot_map --confidence -e toxd/toxd.psicov -ef psicov -p toxd/toxd.pdb toxd/toxd.fasta fasta toxd/toxd.mat ccmpred
$> conkit.plot cmap --confidence -e toxd/toxd.psicov -ef psicov -p toxd/toxd.pdb toxd/toxd.fasta fasta toxd/toxd.mat ccmpred
The call above produces a contact map plot looking like this. All parameters and settings are identical to the previous map except the ``--confidence`` flag, which will show more confidently predicted contacts as larger markers.

Expand All @@ -69,7 +69,7 @@ The call above produces a contact map plot looking like this. All parameters and
Using Python
^^^^^^^^^^^^

Two simplified versions of the ``conkit.plot_map`` script is shown below to illustrate how you can plot your own contact map using Python.
Two simplified versions of the ``conkit.plot`` script is shown below to illustrate how you can plot your own contact map using Python.

.. only:: html

Expand Down
2 changes: 1 addition & 1 deletion docs/examples/predict_pipeline.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ The script to run contact prediction using `HHblits <https://github.com/soedingl
$> conkit.predict sequence <path/to/hhblits> <path/to/hhblits_database> <path/to/ccmpred> toxd/toxd.fasta fasta
The call above uses your sequence file ``toxd/toxd.fasta`` in ``fasta`` format to first generate a Multiple Sequence Alignment. It will then analyse your alignment identical to the ``conkit.msatool`` script. It will also sort out all the required conversions before executing CCMpred to run the contact prediction. Finally, it will analyse your contact prediciton and plot a contact map, just like the ``conkit.plot_map`` script does.
The call above uses your sequence file ``toxd/toxd.fasta`` in ``fasta`` format to first generate a Multiple Sequence Alignment. It will then analyse your alignment identical to the ``conkit.msatool`` script. It will also sort out all the required conversions before executing CCMpred to run the contact prediction. Finally, it will analyse your contact prediciton and plot a contact map, just like the ``conkit.plot`` script does.

2. Starting with a Multiple Sequence Alignment
++++++++++++++++++++++++++++++++++++++++++++++
Expand Down
179 changes: 179 additions & 0 deletions scripts/conkit.plot
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
#!/usr/bin/env python

__author__ = "Felix Simkovic"
__date__ = "08 Feb 2017"
__version__ = 0.1

import argparse
import conkit
import logging
import sys

logging.basicConfig(format='%(message)s', level=logging.INFO)


def add_default_args(parser):
"""Define default arguments"""
parser.add_argument('-dpi', dest='dpi', default=300, type=int,
help="the resolution in DPI [default: 300]")
parser.add_argument('-o', dest='output', default=None, type=str,
help="the figure file. Note, the format is determined by the suffix")


def add_contact_map_args(subparsers):
description = u"""
This command will plot a contact map using the provided contacts
alongside any additional information.
If you provide a reference structure, the true positive contacts
are identified by a distance of <8\u212B between C\u03B2-C\u03B2 atoms.
!!! IMPORTANT
=============
If contacts cannot be matched between your prediction and the
reference structure, they will not be plotted.
"""
contact_map_subparser = subparsers.add_parser('cmap', help="Plot a contact map", description=description,
formatter_class=argparse.RawDescriptionHelpFormatter)
add_default_args(contact_map_subparser)
contact_map_subparser.add_argument('-c', dest='pdbchain', default=None,
help='PDB chain to use [default: first in file]. Inter-molecular predictions '
'use two letter convention, i.e AD for contacts between A and D.')
contact_map_subparser.add_argument('-d', dest='dtn', default=5, type=int,
help='Minimum sequence separation [default: 5]')
contact_map_subparser.add_argument('-e', dest='otherfile', default=None,
help='a second contact map to plot for comparison')
contact_map_subparser.add_argument('-ef', dest='otherformat', default=None,
help='the format of the second contact map')
contact_map_subparser.add_argument('-f', dest='dfactor', default=1.0, type=float,
help='number of contacts to include relative to sequence length [default: 1.0]')
contact_map_subparser.add_argument('-p', dest='pdbfile', default=None, type=str,
help="A reference PDB file")
contact_map_subparser.add_argument('-pf', dest='pdbformat', default='pdb', type=str,
help="A reference PDB file")
contact_map_subparser.add_argument('--confidence', action="store_true", default=False,
help='Plot the confidence scores')
contact_map_subparser.add_argument('--interchain', action="store_true", default=False,
help='Plot inter-chain contacts')
contact_map_subparser.add_argument('seqfile',
help="Path to the sequence file")
contact_map_subparser.add_argument('seqformat',
help="Format of the sequence file")
contact_map_subparser.add_argument('confile',
help="Path to the contact file")
contact_map_subparser.add_argument('conformat',
help="Format of the contact file")
contact_map_subparser.set_defaults(which='contact_map')


def add_sequence_coverage_args(subparsers):
description = u"""
This command will plot a coverage plot for every position in your alignment.
"""
sequence_coverage_subparser = subparsers.add_parser('scov', help="Plot the sequence coverage",
description=description,
formatter_class=argparse.RawDescriptionHelpFormatter)
add_default_args(sequence_coverage_subparser)
sequence_coverage_subparser.add_argument('-id', dest='identity', default=0.7, type=float,
help='sequence identity [default: 0.7]')
sequence_coverage_subparser.add_argument('msafile',
help='Multiple Sequence Alignment file')
sequence_coverage_subparser.add_argument('msaformat',
help='Multiple Sequence Alignment format')
sequence_coverage_subparser.set_defaults(which='sequence_coverage')


def main():

description = u"""
This script provides a command-line interface to ConKit's plotting functionality.
You are provided with a single access point to many different kinds of plots.
"""
parser = argparse.ArgumentParser(description=description, formatter_class=argparse.RawDescriptionHelpFormatter)
subparsers = parser.add_subparsers()
# Add the subparsers
add_contact_map_args(subparsers)
add_sequence_coverage_args(subparsers)
# Parse all arguments
args = parser.parse_args()

if args.which == 'contact_map':
if args.interchain:
logging.info('This script is experimental for inter-chain contact plotting')

logging.info('Distance to neighbors: {0}'.format(args.dtn))
logging.info('Contact list cutoff factor: {0}'.format(args.dfactor))

seq = conkit.io.read(args.seqfile, args.seqformat)[0]
con = conkit.io.read(args.confile, args.conformat)[0]

con.sequence = seq
con.assign_sequence_register()
con.remove_neighbors(min_distance=args.dtn, inplace=True)
ncontacts = int(seq.seq_len * args.dfactor)
con.sort('raw_score', reverse=True, inplace=True)
con_sliced = con[:ncontacts]

if args.otherfile:
other = conkit.io.read(args.otherfile, args.otherformat)[0]
other.sequence = seq
other.assign_sequence_register()
other.remove_neighbors(min_distance=args.dtn, inplace=True)
other.sort('raw_score', reverse=True, inplace=True)
other_sliced = other[:ncontacts]
else:
other_sliced = None

if args.pdbfile:
if args.pdbchain:
pdb = conkit.io.read(args.pdbfile, 'pdb')[args.pdbchain]
elif args.pdbfile:
pdb = conkit.io.read(args.pdbfile, 'pdb')[0]
reference = pdb
con_matched = con_sliced.match(pdb, renumber=True, remove_unmatched=True)
if other_sliced:
other_matched = other_sliced.match(pdb, renumber=True, remove_unmatched=True)
else:
other_matched = other_sliced
else:
reference = None
con_matched = con_sliced
other_matched = other_sliced

def altloc_remove(map):
altloc = False
for contact in map.copy():
if contact.res1_chain != contact.res2_chain:
altloc = True
break
if contact.res1_chain == contact.res2_chain and args.interchain:
map.remove(contact.id)
elif contact.res1_chain != contact.res2_chain and not args.interchain:
map.remove(contact.id)
return altloc

altloc = altloc_remove(con_matched)
if other_matched:
altloc = altloc_remove(other_matched)

outformat = 'png'
outfile = args.output if args.output else args.confile.rsplit('.', 1)[0] + '.' + outformat
conkit.plot.ContactMapFigure(con_matched, other=other_matched, reference=reference,
file_name=outfile, altloc=altloc, use_conf=args.confidence,
dpi=args.dpi)

elif args.which == 'sequence_coverage':
hierarchy = conkit.io.read(args.msafile, args.msaformat)
outformat = 'png'
outfile = args.output if args.output else args.msafile.rsplit('.', 1)[0] + '.' + outformat
conkit.plot.SequenceCoverageFigure(hierarchy, file_name=outfile, dpi=args.dpi)

logging.info('Final plot written in {0} format to: {1}'.format(outformat.upper(), outfile))
return 0


if __name__ == "__main__":
sys.exit(main())
Loading

0 comments on commit bd1cdde

Please sign in to comment.