Skip to content

Commit

Permalink
new precision evaluation plot added
Browse files Browse the repository at this point in the history
  • Loading branch information
Felix Simkovic committed Feb 8, 2017
1 parent bd1cdde commit ce0d949
Show file tree
Hide file tree
Showing 4 changed files with 224 additions and 8 deletions.
149 changes: 149 additions & 0 deletions conkit/plot/PrecisionEvaluationPlot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
"""
A module to produce a not sure plot
"""

from __future__ import division

__author__ = "Felix Simkovic"
__date__ = "07 Feb 2017"
__version__ = 0.1

import matplotlib.pyplot
import numpy

from conkit.plot._Figure import Figure


class PrecisionEvaluationFigure(Figure):

def __init__(self, hierarchy, cutoff_step=0.2, **kwargs):
"""A precision evaluation figure
Parameters
----------
hierarchy : :obj:`conkit.core.ContactMap`
The contact map hierarchy
step : float, optional
The cutoff step
**kwargs
General :obj:`conkit.plot._Figure.Figure` keyword arguments
"""
super(PrecisionEvaluationFigure, self).__init__(**kwargs)
self._hierarchy = None
self._cutoff_boundaries = [0.0, 100.0]
self._cutoff_step = 0.2

self.hierarchy = hierarchy
self.cutoff_step = cutoff_step

self._draw()

@property
def cutoff_step(self):
"""The cutoff step"""
return self._cutoff_step

@cutoff_step.setter
def cutoff_step(self, cutoff_step):
"""Define the cutoff step"""
self._cutoff_step = cutoff_step

@property
def hierarchy(self):
"""A ConKit :obj:`conkit.core.ContactMap`"""
return self._hierarchy

@hierarchy.setter
def hierarchy(self, hierarchy):
"""Define the ConKit :obj:`conkit.core.ContactMap`
Raises
------
RuntimeError
The hierarchy is not an alignment
"""
if hierarchy:
Figure._check_hierarchy(hierarchy, "ContactMap")
self._hierarchy = hierarchy

@property
def min_cutoff(self):
"""The minimum cutoff factor
Raises
------
ValueError
The minimum cutoff value is larger than or equal to the maximum
"""
if self._cutoff_boundaries[0] >= self._cutoff_boundaries[1]:
msg = "The minimum cutoff value is larger than or equal to the maximum"
raise ValueError(msg)
return self._cutoff_boundaries[0]

@min_cutoff.setter
def min_cutoff(self, min_cutoff):
"""Define the minimum cutoff factor"""
self._cutoff_boundaries[0] = min_cutoff

@property
def max_cutoff(self):
"""The maximum cutoff factor
Raises
------
ValueError
The maximum cutoff value is smaller than the the minimum
"""
if self._cutoff_boundaries[1] < self._cutoff_boundaries[0]:
msg = "The maximum cutoff value is smaller than the the minimum"
raise ValueError(msg)
return self._cutoff_boundaries[1]

@max_cutoff.setter
def max_cutoff(self, max_cutoff):
"""Define the maximum cutoff factor"""
self._cutoff_boundaries[1] = max_cutoff

def redraw(self):
"""Re-draw the plot with updated parameters"""
self._draw()

def _draw(self):
"""Draw the actual plot"""

factors = numpy.arange(self.min_cutoff, self.max_cutoff + 0.1, self.cutoff_step)
precisions = numpy.zeros(factors.shape[0])
for i, factor in enumerate(factors):
ncontacts = int(self._hierarchy.sequence.seq_len * factor)
m = self._hierarchy[:ncontacts]
precisions[i] = m.precision

fig, ax = matplotlib.pyplot.subplots(dpi=self.dpi)

ax.axhline(0.5, color='g', label='50% Precision')

ax.plot(factors, precisions, color='#000000', marker='o', linestyle='-',
markersize=2, label='Precision score')

# Prettify the plot
step = int(factors.shape[0] / 6)
xticklabels = (factors * self._hierarchy.sequence.seq_len).astype(dtype=numpy.int64)
ax.set_xticks(factors[::step])
ax.set_xticklabels(xticklabels[::step])

yticks = numpy.arange(0, 1.01, 0.2)
ax.set_yticks(yticks)
ax.set_yticklabels(yticks)

ax.set_xlabel('Number of Contacts')
ax.set_ylabel('Precision')
ax.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode="expand", borderaxespad=0.)

# Make axes length proportional and remove whitespace around the plot
fig.tight_layout()

fig.savefig(self.file_name, bbox_inches='tight')
6 changes: 4 additions & 2 deletions conkit/plot/SequenceCoveragePlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,12 @@ def __init__(self, hierarchy, **kwargs):
**kwargs
General :obj:`conkit.plot._Figure.Figure` keyword arguments
"""
super(SequenceCoverageFigure, self).__init__(**kwargs)
self._hierarchy = hierarchy
self._hierarchy = None

self.hierarchy = hierarchy

self._draw()

@property
Expand Down
1 change: 1 addition & 0 deletions conkit/plot/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@
matplotlib.use('Agg')

from conkit.plot.ContactMapPlot import ContactMapFigure
from conkit.plot.PrecisionEvaluationPlot import PrecisionEvaluationFigure
from conkit.plot.SequenceCoveragePlot import SequenceCoverageFigure
76 changes: 70 additions & 6 deletions scripts/conkit.plot
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,42 @@ reference structure, they will not be plotted.
contact_map_subparser.set_defaults(which='contact_map')


def add_precision_evaluation_args(subparsers):
description = u"""
This command will plot an evaluation plot illustrating the precision score of
the provided contact prediction compared against a protein structure at different
cutoff thresholds.
"""
precision_evaluation_subparser = subparsers.add_parser('peval', help="Plot the precision evaluation plot",
description=description,
formatter_class=argparse.RawDescriptionHelpFormatter)
add_default_args(precision_evaluation_subparser)
precision_evaluation_subparser.add_argument('-c', dest='pdbchain', default=None,
help='PDB chain to use [default: first in file]. Inter-molecular '
'predictions use two letter convention, i.e AD for contacts '
'between A and D.')
precision_evaluation_subparser.add_argument('-d', dest='dtn', default=5, type=int,
help='Minimum sequence separation [default: 5]')
precision_evaluation_subparser.add_argument('-j', dest='cutoff_step', default=0.2, type=float,
help='The cutoff step for contact selection [default: 0.2]')
precision_evaluation_subparser.add_argument('--interchain', action="store_true", default=False,
help='Plot inter-chain contacts')
precision_evaluation_subparser.add_argument('pdbfile',
help="A reference PDB file")
precision_evaluation_subparser.add_argument('pdbformat',
help="A reference PDB file")
precision_evaluation_subparser.add_argument('seqfile',
help="Path to the sequence file")
precision_evaluation_subparser.add_argument('seqformat',
help="Format of the sequence file")
precision_evaluation_subparser.add_argument('confile',
help="Path to the contact file")
precision_evaluation_subparser.add_argument('conformat',
help="Format of the contact file")
precision_evaluation_subparser.set_defaults(which='precision_evaluation')


def add_sequence_coverage_args(subparsers):
description = u"""
This command will plot a coverage plot for every position in your alignment.
Expand Down Expand Up @@ -96,6 +132,7 @@ You are provided with a single access point to many different kinds of plots.
subparsers = parser.add_subparsers()
# Add the subparsers
add_contact_map_args(subparsers)
add_precision_evaluation_args(subparsers)
add_sequence_coverage_args(subparsers)
# Parse all arguments
args = parser.parse_args()
Expand Down Expand Up @@ -144,6 +181,7 @@ You are provided with a single access point to many different kinds of plots.
other_matched = other_sliced

def altloc_remove(map):
"""Remove alternative locations"""
altloc = False
for contact in map.copy():
if contact.res1_chain != contact.res2_chain:
Expand All @@ -161,19 +199,45 @@ You are provided with a single access point to many different kinds of plots.

outformat = 'png'
outfile = args.output if args.output else args.confile.rsplit('.', 1)[0] + '.' + outformat
conkit.plot.ContactMapFigure(con_matched, other=other_matched, reference=reference,
file_name=outfile, altloc=altloc, use_conf=args.confidence,
dpi=args.dpi)
plot = conkit.plot.ContactMapFigure(con_matched, other=other_matched, reference=reference,
file_name=outfile, altloc=altloc, use_conf=args.confidence,
dpi=args.dpi)

elif args.which == 'precision_evaluation':
if args.interchain:
logging.info('This script is experimental for inter-chain contact plotting')

logging.info('Distance to neighbors: {0}'.format(args.dtn))
logging.info('Contact list cutoff factor step: {0}'.format(args.cutoff_step))

seq = conkit.io.read(args.seqfile, args.seqformat)[0]
con = conkit.io.read(args.confile, args.conformat)[0]

con.sequence = seq
con.assign_sequence_register()
con.remove_neighbors(min_distance=args.dtn, inplace=True)
con.sort('raw_score', reverse=True, inplace=True)

if args.pdbchain:
pdb = conkit.io.read(args.pdbfile, 'pdb')[args.pdbchain]
else:
pdb = conkit.io.read(args.pdbfile, 'pdb')[0]
con_matched = con.match(pdb, renumber=True, remove_unmatched=True)

outformat = 'png'
outfile = args.output if args.output else args.confile.rsplit('.', 1)[0] + '.' + outformat
plot = conkit.plot.PrecisionEvaluationFigure(con_matched, cutoff_step=args.cutoff_step, file_name=outfile,
dpi=args.dpi)

elif args.which == 'sequence_coverage':
hierarchy = conkit.io.read(args.msafile, args.msaformat)
outformat = 'png'
outfile = args.output if args.output else args.msafile.rsplit('.', 1)[0] + '.' + outformat
conkit.plot.SequenceCoverageFigure(hierarchy, file_name=outfile, dpi=args.dpi)
plot = conkit.plot.SequenceCoverageFigure(hierarchy, file_name=outfile, dpi=args.dpi)

logging.info('Final plot written in {0} format to: {1}'.format(outformat.upper(), outfile))
logging.info('Final plot written in {0} format to: {1}'.format(plot.format.upper(), plot.file_name))
return 0


if __name__ == "__main__":
sys.exit(main())
sys.exit(main())

0 comments on commit ce0d949

Please sign in to comment.