Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Refactor main into more manageable subroutines.

  • Loading branch information...
commit 4189f3b28ee5afcc9f672c4a2ef9b2478bce3909 1 parent b54c8ec
@lmjohns3 authored
Showing with 103 additions and 64 deletions.
  1. +103 −64 scripts/py-grep-plot
View
167 scripts/py-grep-plot
@@ -22,13 +22,15 @@
'''A command-line script for plotting data from text files.'''
-import os
-import re
-import sys
+import bz2
import glob
-import numpy
+import gzip
import logging
+import numpy
import optparse
+import os
+import re
+import sys
from matplotlib import pyplot
@@ -78,29 +80,36 @@ LEGEND = {
}
-def parse_line(line, regex, x, y, ey):
- m = regex.search(line)
- if not m:
- return
+def extract_groupdict(g, x, y, ey):
+ '''We've matched a line with named groups. Extract data from them.'''
+ logging.debug('group dict: %r', g)
- g = m.groupdict()
- if g:
- logging.debug('group dict: %r', g)
+ if 'x' in g:
+ while len(x) < len(y):
+ x.append(None)
+ x.append(float(g['x']))
+
+ y.append(float(g['y']))
- if 'x' in g:
- while len(x) < len(y):
- x.append(None)
- x.append(float(g['x']))
+ if 'ey' in g:
+ while len(ey) < len(y):
+ ey.append(None)
+ ey.append(float(g['ey']))
- y.append(float(g['y']))
- if 'ey' in g:
- while len(ey) < len(y):
- ey.append(None)
- ey.append(float(g['ey']))
+def search_line(line, regex, x, y, ey):
+ '''Search an input line for groups matching the given regex.
+ Extracted data will be added to the mutable x, y, and/or ey sequences.
+ '''
+ m = regex.search(line)
+ if not m:
return
+ g = m.groupdict()
+ if g:
+ return extract_groupdict(g, x, y, ey)
+
g = m.groups()
logging.debug('group matches: %r', g)
if len(g) > 3:
@@ -122,20 +131,42 @@ def parse_line(line, regex, x, y, ey):
y.append(float(g[0]))
-def main(opts, args):
- colors = [c.strip() for c in opts.colors.split(',')]
- points = [s.strip() for s in opts.points.split(',')]
+def read_input(args):
+ '''Given input pattern arguments, open up corresponding files.'''
+ if not args:
+ args.append('-')
+ for pattern in args:
+ if pattern == '-':
+ yield 'stdin', sys.stdin
+ continue
+ for filename in glob.glob(pattern):
+ if filename.endswith('.gz'):
+ handle = gzip.open(filename)
+ elif filename.endswith('.bz2'):
+ handle = bz2.BZ2File(filename)
+ else:
+ handle = open(filename)
+ yield os.path.splitext(os.path.basename(filename))[0], handle
+
+def compile_regex(args):
+ '''Compile a regular expression pattern.'''
try:
- logging.debug('compiling REGEX %r', args[0])
- regex = re.compile(args[0])
+ pattern = args[0]
+ del args[0]
except IndexError:
FLAGS.error('no REGEX supplied')
sys.exit(-1)
+ logging.debug('compiling REGEX %r', pattern)
+ try:
+ return re.compile(pattern)
except:
- logging.critical('cannot compile REGEX %r', args[0])
+ logging.critical('cannot compile REGEX %r', pattern)
sys.exit(-2)
+
+def make_axes(opts):
+ '''Create an axes object to hold our plots.'''
X, Y = opts.ylabel and 0.12 or 0.1, opts.xlabel and 0.13 or 0.1
ax = pyplot.axes([X, Y, 0.95 - X, 0.95 - Y])
ax.xaxis.tick_bottom()
@@ -144,46 +175,11 @@ def main(opts, args):
ax.set_xscale('log')
if 'y' in opts.log:
ax.set_yscale('log')
+ return ax
- c = p = 0
-
- def replot(x, y, ey, filename):
- plotter = ax.plot
- kwargs = dict(alpha=opts.alpha, aa=True)
- if opts.smooth:
- y = numpy.convolve(y, [1. / opts.smooth] * opts.smooth, 'same')
- if opts.batch:
- n = opts.batch
- count = int(numpy.ceil(float(len(y)) / n))
- batches = lambda: (y[i * n:(i + 1) * n] for i in range(count))
- y = [numpy.array(b).mean() for b in batches()]
- ey = [numpy.array(b).std() for b in batches()]
- x = x or range(len(y))
- ax.plot(x, y, points[p],
- label=os.path.splitext(os.path.basename(filename))[0],
- c=colors[c],
- mec=colors[c],
- mfc=(1, 1, 1, 1),
- mew=1.,
- **kwargs)
- if ey:
- ax.errorbar(x, y, fmt=None, yerr=ey, ecolor=colors[c], **kwargs)
- return (c + 1) % len(colors), (p + 1) % len(points)
-
- if args[1:]:
- for pattern in args[1:]:
- for filename in glob.glob(pattern):
- x, y, ey = [], [], []
- with open(filename) as handle:
- for line in handle:
- parse_line(line, regex, x, y, ey)
- c, p = replot(x, y, ey, filename)
- else:
- x, y, ey = [], [], []
- for line in sys.stdin:
- parse_line(line, regex, x, y, ey)
- c, p = replot(x, y, ey, 'stdin')
+def format_axes(ax, opts):
+ '''Format our plotting axes using the script options.'''
logging.debug('using legend: %s' % opts.legend)
loc = LEGEND.get(opts.legend)
if loc is not None:
@@ -208,6 +204,49 @@ def main(opts, args):
logging.debug('using y limit: %r', opts.ylim)
ax.set_ylim(eval(opts.ylim))
+
+def main(opts, args):
+ colors = [c.strip() for c in opts.colors.split(',')]
+ points = [s.strip() for s in opts.points.split(',')]
+
+ regex = compile_regex(args)
+ ax = make_axes(opts)
+
+ plot_kwargs = dict(alpha=opts.alpha, aa=True)
+
+ c = p = 0
+
+ def plot(label, x, y, ey):
+ if opts.smooth:
+ y = numpy.convolve(y, [1. / opts.smooth] * opts.smooth, 'same')
+
+ if opts.batch:
+ n = opts.batch
+ count = int(numpy.ceil(float(len(y)) / n))
+ batches = lambda: (y[i * n:(i + 1) * n] for i in range(count))
+ means = [numpy.array(b).mean() for b in batches()]
+ stds = [numpy.array(b).std() for b in batches()]
+ y, ey = means, stds
+
+ x = x or range(len(y))
+
+ ax.plot(x, y, points[p], c=colors[c],
+ mec=colors[c], mfc=(1, 1, 1, 1), mew=1.,
+ label=label, **plot_kwargs)
+
+ if ey:
+ ax.errorbar(x, y, fmt=None, yerr=ey, ecolor=colors[c], **plot_kwargs)
+
+ for label, lines in read_input(args):
+ x, y, ey = [], [], []
+ for line in lines:
+ search_line(line, regex, x, y, ey)
+ plot(label, x, y, ey)
+ c = (c + 1) % len(colors)
+ p = (p + 1) % len(points)
+
+ format_axes(ax, opts)
+
if opts.output:
logging.info('%s: saving plot', opts.output)
return pyplot.savefig(opts.output)
Please sign in to comment.
Something went wrong with that request. Please try again.