Skip to content

Commit

Permalink
documentation and clarity edited for examples and scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
Felix Simkovic committed Feb 1, 2017
1 parent fa7e8d1 commit 2c4e09d
Show file tree
Hide file tree
Showing 11 changed files with 238 additions and 53 deletions.
2 changes: 1 addition & 1 deletion conkit/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def contact_map(hierarchy, other=None, reference=None, altloc=False, dpi=300,
# Allow dynamic x and y limits
min_res_seq = numpy.min(self_data.ravel())
max_res_seq = numpy.max(self_data.ravel())
if other and other_data:
if other:
min_res_seq = numpy.min(numpy.append(self_data.ravel(), other_data.ravel()))
max_res_seq = numpy.max(numpy.append(self_data.ravel(), other_data.ravel()))
ax.set_xlim(min_res_seq - 0.5, max_res_seq + 0.5)
Expand Down
48 changes: 48 additions & 0 deletions docs/examples/code/map_plotting_reference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""
Simple contact map plotting 2
=============================
This script contains a simple example of how you can plot
contact maps with a reference structure using ConKit
"""

import conkit

# Define the input variables
sequence_file = "toxd/toxd.fasta"
sequence_format = "fasta"
contact_file = "toxd/toxd.mat"
contact_format = "ccmpred"

# Create ConKit hierarchies
# Note, we only need the first Sequence/ContactMap
# from each file
seq = conkit.io.read(sequence_file, sequence_format).top_sequence
conpred = conkit.io.read(contact_file, contact_format).top_map

# Assign the sequence register to your contact prediction
conpred.sequence = seq
conpred.assign_sequence_register()

# We need to tidy our contact prediction before plotting
conpred.remove_neighbors(inplace=True)
conpred.sort('raw_score', reverse=True, inplace=True)

# Finally, we don't want to plot all contacts but only the top-L,
# so we need to slice the contact map
map = conpred[:conpred.sequence.seq_len]

# ====================================================
# The code above is identical to the previous example
# Now we need to compare it to our reference structure
pdb_file = "toxd/toxd.pdb"
pdb = conkit.io.read(pdb_file, "pdb").top_map
# The two keywords do the following:
# - remove_unmatched : remove contacts absent from the pdb_file
# - renumber : match the numbering to the pdb_file
map_matched = map.match(pdb, remove_unmatched=True, renumber=True)

# Then we can plot the map
contact_plot = "toxd/toxd.png"
conkit.plot.contact_map(map_matched, reference=pdb, file_name=contact_plot)
38 changes: 38 additions & 0 deletions docs/examples/code/map_plotting_simple.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""
Simple contact map plotting 1
=============================
This script contains a simple example of how you can plot
contact maps using ConKit
"""

import conkit

# Define the input variables
sequence_file = "toxd/toxd.fasta"
sequence_format = "fasta"
contact_file = "toxd/toxd.mat"
contact_format = "ccmpred"

# Create ConKit hierarchies
# Note, we only need the first Sequence/ContactMap
# from each file
seq = conkit.io.read(sequence_file, sequence_format).top_sequence
conpred = conkit.io.read(contact_file, contact_format).top_map

# Assign the sequence register to your contact prediction
conpred.sequence = seq
conpred.assign_sequence_register()

# We need to tidy our contact prediction before plotting
conpred.remove_neighbors(inplace=True)
conpred.sort('raw_score', reverse=True, inplace=True)

# Finally, we don't want to plot all contacts but only the top-L,
# so we need to slice the contact map
map = conpred[:conpred.sequence.seq_len]

# Then we can plot the map
contact_plot = "toxd/toxd.png"
conkit.plot.contact_map(map, file_name=contact_plot)
2 changes: 1 addition & 1 deletion docs/examples/code/predict_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,5 +63,5 @@

# Convert the contact prediction to a standardised format
casp_file = "toxd/toxd.rr"
conkit.io.write(casp_file, "casprr", conpred)
conkit.io.convert(mat_file, "ccmpred", casp_file, "casprr")
print("Final Contact Prediction File: %s" % casp_file)
2 changes: 1 addition & 1 deletion docs/examples/hierarchy.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
.. _example_constructing_a_hierarchy:

Hierarchy Construction
======================
----------------------

.. note::
The easiest way to construct a contact file hierarchy is to choose the corresponding parser.
Expand Down
Binary file added docs/examples/images/toxd_cmap_advanced.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/examples/images/toxd_cmap_reference.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/examples/images/toxd_cmap_simple.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
89 changes: 73 additions & 16 deletions docs/examples/map_plotting.rst
Original file line number Diff line number Diff line change
@@ -1,35 +1,92 @@
.. _example_plotting_a_map:

Contact Map Plotting
====================

To plot contacts in form of a contact map using ConKit, you need a contact prediction file.
--------------------

.. warning::
You require the `Matplotlib <http://matplotlib.org/>`_ package to use this functionality. If you are unsure if it is installed on your system, refer to the :ref:`installation` documentation
You require the `Matplotlib <http://matplotlib.org/>`_ package to use this script. If you are unsure if it is installed on your system, refer to the :ref:`installation` documentation

Using a script
^^^^^^^^^^^^^^

If you would like to plot a contact map using ConKit without the overhead of using Python, you can simply use the ``conkit.plot_map`` script.

--------------------------------------------------------------

.. code-block:: bash
$> conkit.plot_map toxd/toxd.fasta fasta toxd/toxd.mat ccmpred
The call above uses the contact prediction file ``toxd.mat`` file, which is in ``ccmpred`` format, and plots the following 2D contact map stored in the file ``toxd/toxd.png``

.. _Toxd Simple Contact Map:

.. image:: images/toxd_cmap_simple.png
:alt: Toxd CMap Simple

--------------------------------------------------------------

You can also add a reference structure to determine which contacts are true and false positive contacts. By default, all contacts are identified in the reference structure by measuring the distance between Cβ atoms, whereby all atoms closer than 8Å are considered to be in contact.

.. code-block:: bash
$> conkit.plot_map -p toxd/toxd.pdb toxd/toxd.fasta fasta toxd/toxd.mat ccmpred
The call above produces a contact map plot looking like this. The gray points are the reference contacts, green show true positive contacts in your prediction and red false positive ones.

.. _Toxd Reference Contact Map:

.. image:: images/toxd_cmap_reference.png
:alt: Toxd CMap Reference

--------------------------------------------------------------

Finally, you could also add a second contact prediction file to the call to compare two maps against each other.

.. code-block:: bash
$> conkit.plot_map -e toxd/toxd.psicov -ef psicov -p toxd/toxd.pdb toxd/toxd.fasta fasta toxd/toxd.mat ccmpred
The call above produces a contact map plot looking like this. The gray points are the reference contacts, green show true positive contacts in your prediction and red false positive ones. The top triangle is the second contact map from file ``toxd/toxd.psicov`` whereas the bottom one is from ``toxd/toxd.mat``.

.. image:: images/toxd_cmap_advanced.png
:alt: Toxd CMap Advanced

.. note::
ConKit provides a script that allows you to plot contact maps from the command line. The script is called ``conkit.plot_map`` and installed automatically.

You can use the last example also **without** a reference structure!

--------------------------------------------------------

Using Python
^^^^^^^^^^^^

Two simplified versions of the ``conkit.plot_map`` script is shown below to illustrate how you can plot your own contact map using Python.

--------------------------------------------------------------

.. only:: html

To start, you need to read the file into its hierarchy.
.. note::

.. code-block:: python
You can download this script :download:`here <code/map_plotting_simple.py>`.

>>> import conkit
>>> contact_h = conkit.io.read('pathto/contact.file', 'contact_format')
.. literalinclude:: /../docs/examples/code/map_plotting_simple.py
:language: python
:linenos:

With the code above, we created a :obj:`conkit.core.ContactFile` hierarchy. This allows you to store contact maps in a single hierarchy. Here, we are only interested in the first, and thus we can remove the rest.
This will produce the `Toxd Simple Contact Map`_ plot.

.. code-block:: python
--------------------------------------------------------------

>>> contact_map = contact_h.top_map
.. only:: html

Finally, to plot the contact map, all we need to do is invoke the relevant function.
.. note::

.. code-block:: python
You can download this script :download:`here <code/map_plotting_reference.py>`.

>>> contact_map.plot_map()
.. literalinclude:: /../docs/examples/code/map_plotting_reference.py
:language: python
:linenos:

This will create a contact map plot that will be saved to your disk.
This will produce the `Toxd Reference Contact Map`_ plot.
108 changes: 75 additions & 33 deletions scripts/conkit.plot_map
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,17 @@ import sys

logging.basicConfig(format='%(message)s', level=logging.INFO)

DESCRIPTION = """
Plot a simple contact map
DESCRIPTION = u"""
This script will plot a contact map using the provided contacts
alongside any additional information.
If you provide a reference structure, the true positive contacts
are identified by a distance of <8\u212B between C\u03B2-C\u03B2 atoms.
!!! IMPORTANT
=============
If contacts cannot be matched between your prediction and the
reference structure, they will not be plotted.
File format options are:
Expand All @@ -24,60 +33,93 @@ File format options are:

def main():
parser = argparse.ArgumentParser(description=DESCRIPTION, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-c', dest='pdbchain', default=None, help='PDB chain to use [default: first in file]. Inter-molecular predictions use two letter convention, i.e AD for contacts between A and D.')
parser.add_argument('-d', dest='dtn', default=5, type=int, help='Minimum sequence separation [default: 5]')
parser.add_argument('-f', dest='dfactor', default=1.0, type=float, help='number of contacts to include relative to sequence length [default: 1.0]')
parser.add_argument('-p', dest='pdbfile', default=None, type=str)
parser.add_argument('--interchain', action="store_true", default=False, help='Plot inter-chain contacts')
parser.add_argument('seqfile')
parser.add_argument('seqformat')
parser.add_argument('confile')
parser.add_argument('conformat')
parser.add_argument('-c', dest='pdbchain', default=None,
help='PDB chain to use [default: first in file]. Inter-molecular '
'predictions use two letter convention, i.e AD for contacts between A and D.')
parser.add_argument('-d', dest='dtn', default=5, type=int,
help='Minimum sequence separation [default: 5]')
parser.add_argument('-e', dest='otherfile', default=None,
help='a second contact map to plot for comparison')
parser.add_argument('-ef', dest='otherformat', default=None,
help='the format of the second contact map')
parser.add_argument('-f', dest='dfactor', default=1.0, type=float,
help='number of contacts to include relative to sequence length [default: 1.0]')
parser.add_argument('-p', dest='pdbfile', default=None, type=str,
help="A reference PDB file")
parser.add_argument('--interchain', action="store_true", default=False,
help='Plot inter-chain contacts')
parser.add_argument('seqfile',
help="Path to the sequence file")
parser.add_argument('seqformat',
help="Format of the sequence file")
parser.add_argument('confile',
help="Path to the contact file")
parser.add_argument('conformat',
help="Format of the contact file")
args = parser.parse_args()

logging.info('This script is experimental for inter-chain contact plotting')

if args.interchain:
logging.info('This script is experimental for inter-chain contact plotting')

logging.info('Distance to neighbors: {0}'.format(args.dtn))
logging.info('Contact list cutoff factor: {0}'.format(args.dfactor))

seq = conkit.io.read(args.seqfile, args.seqformat)[0]
con = conkit.io.read(args.confile, args.conformat)[0]

con.sequence = seq
con.assign_sequence_register()

logging.info('Distance to neighbors: {0}'.format(args.dtn))
con.remove_neighbors(min_distance=args.dtn, inplace=True)

ncontacts = int(seq.seq_len * args.dfactor)
logging.info('Number of contacts: {0}'.format(ncontacts))
con.sort('raw_score', reverse=True, inplace=True)
con_sliced = con[:ncontacts]


if args.otherfile:
other = conkit.io.read(args.otherfile, args.otherformat)[0]
other.sequence = seq
other.assign_sequence_register()
other.remove_neighbors(min_distance=args.dtn, inplace=True)
other.sort('raw_score', reverse=True, inplace=True)
other_sliced = other[:ncontacts]
else:
other_sliced = None

if args.pdbfile:
if args.pdbchain:
pdb = conkit.io.read(args.pdbfile, 'pdb')[args.pdbchain]
elif args.pdbfile:
pdb = conkit.io.read(args.pdbfile, 'pdb')[0]
reference = pdb
logging.info('Potentially removing unmatched contact pairs')
con_matched = con_sliced.match(pdb, renumber=True, remove_unmatched=True)
if other_sliced:
other_matched = other_sliced.match(pdb, renumber=True, remove_unmatched=True)
else:
other_matched = other_sliced
else:
reference = None
con_matched = con_sliced

altloc=False
for contact in con_matched.copy():
if contact.res1_chain != contact.res2_chain:
logging.debug('Setting altloc to True')
altloc = True
break
if contact.res1_chain == contact.res2_chain and args.interchain:
con_matched.remove(contact.id)
elif contact.res1_chain != contact.res2_chain and not args.interchain:
con_matched.remove(contact.id)
other_matched = other_sliced

def altloc_remove(map):
altloc=False
for contact in map.copy():
if contact.res1_chain != contact.res2_chain:
altloc = True
break
if contact.res1_chain == contact.res2_chain and args.interchain:
map.remove(contact.id)
elif contact.res1_chain != contact.res2_chain and not args.interchain:
map.remove(contact.id)
return altloc
altloc = altloc_remove(con_matched)
if other_matched:
altloc = altloc_remove(other_matched)

mapformat = 'pdf'
mapfile = os.path.basename(args.confile).rsplit('.', 1)[0] + '.' + mapformat
mapfile = args.confile.rsplit('.', 1)[0] + '.' + mapformat
logging.info('Contact map written in {0} format to: {1}'.format(mapformat.upper(), mapfile))
con_matched.plot_map(reference=reference, file_format=mapformat, file_name=mapfile, altloc=altloc)
conkit.plot.contact_map(con_matched, other=other_matched, reference=reference,
file_format=mapformat, file_name=mapfile, altloc=altloc)

return 0

Expand Down
2 changes: 1 addition & 1 deletion scripts/conkit.predict
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def main():

# Use the ccmpred parser to write a contact file
casprr_fname = os.path.join(args.wdir, args.prefix + '.rr')
conkit.io.write(casprr_fname, 'casprr', cmap)
conkit.io.convert(matrix_fname, 'ccmpred', casprr_fname, 'casprr')
logging.info('Final prediction file: {0}'.format(casprr_fname))

return 0
Expand Down

0 comments on commit 2c4e09d

Please sign in to comment.