Permalink
Browse files

Many improvements to the script -- allow multiple match patterns to m…

…ake different series of data, remove top and right axes, make bottom and left axes more pretty, add option to plot horizontal and vertical lines.
  • Loading branch information...
1 parent 74b5629 commit 045ea924bc8ec80327588e18335253e95b0d1adc Leif Johnson committed Mar 1, 2013
Showing with 88 additions and 38 deletions.
  1. +88 −38 scripts/py-grep-plot
View
@@ -71,6 +71,12 @@ class ArgParser(argparse.ArgumentParser):
FLAGS = ArgParser()
+g = FLAGS.add_mutually_exclusive_group()
+g.add_argument('-k', '--columns', nargs='+', type=int, metavar='K',
+ help='extract data from the Kth space-separated column')
+g.add_argument('-m', '--match', nargs='+', default=(r'([-+eE.\d]+)', ), metavar='RE',
+ help='extract data points from inputs using RE')
+
g = FLAGS.add_argument_group('output')
g.add_argument('-A', '--auto', action='store_true',
help='layout plot automatically')
@@ -86,21 +92,25 @@ g.add_argument('-b', '--batch', type=int, metavar='N',
help='batch data into groups of N points and plot mean + std')
g.add_argument('-e', '--every', type=int, metavar='N',
help='restrict plot to show only every Nth data point')
-g.add_argument('-k', '--column', nargs='*', type=int, metavar='K',
- help='extract data from the Kth space-separated column')
-g.add_argument('-r', '--regex', default=r'([-+eE.\d]+)', metavar='RE',
- help='extract data points from inputs using RE')
+g.add_argument('-f', '--fill-error', action='store_true',
+ help='display vertical error regions as a filled polygon')
+g.add_argument('-j', '--jitter', type=float, metavar='R',
+ help='add N(0, R) jitter to x values')
g.add_argument('-s', '--smooth', type=int, metavar='N',
- help='smooth across N points before plotting')
+ help='smooth data using N-sample rectangular window')
+g.add_argument('--hline', type=float, default=(), nargs='+', metavar='Y',
+ help='draw horizontal lines at Y')
+g.add_argument('--vline', type=float, default=(), nargs='+', metavar='X',
+ help='draw vertical lines at X')
g = FLAGS.add_argument_group('series')
g.add_argument('-a', '--alpha', type=float, default=0.9, metavar='N',
help='plot series with alpha N')
g.add_argument('-c', '--colors', nargs='+', default=tuple('krcbmgy'), metavar='C',
help='cycle through the given colors')
-g.add_argument('-p', '--points', nargs='+', default=['o-'], metavar='S',
+g.add_argument('-p', '--points', nargs='+', default=('o-', ), metavar='S',
help='cycle through the given line/point styles')
-g.add_argument('-n', '--names', nargs='+', metavar='L',
+g.add_argument('-n', '--names', nargs='+', default=(), metavar='L',
help='use these names in the legend')
g = FLAGS.add_argument_group('axes')
@@ -110,7 +120,7 @@ g.add_argument('-d', '--dates', metavar='FMT',
help='parse dates from x data using FMT')
g.add_argument('-L', '--legend', choices=tuple(sorted(LEGEND.keys())),
help='include a legend (None)')
-g.add_argument('-l', '--log', choices=['x', 'y', 'xy'],
+g.add_argument('-l', '--log', choices=('x', 'y', 'xy'),
help='use a log scale on the specified axes')
g.add_argument('-t', '--title', metavar='S',
help='use S as the plot title')
@@ -123,8 +133,8 @@ g.add_argument('-X', '--xlim', metavar='A,B',
g.add_argument('-Y', '--ylim', metavar='A,B',
help='use (A,B) as the range for the y-axis')
-FLAGS.add_argument('input', metavar='FILE', nargs=argparse.REMAINDER,
- help='extract data for plotting from FILE')
+FLAGS.add_argument('input', metavar='PATTERN', nargs=argparse.REMAINDER,
+ help='extract data from files matching PATTERN')
def extract_columns(data, columns, x, y, ex, ey):
@@ -170,7 +180,7 @@ def extract_groupdict(g, x, y, ex, ey):
def extract_groups(g, x, y, ex, ey):
logging.debug('group matches: %r', g)
if len(g) > 3:
- FLAGS.error('unnamed --regex cannot match more than 3 values')
+ FLAGS.error('unnamed --match cannot match more than 3 values')
elif len(g) == 3:
while len(x) < len(y):
x.append(None)
@@ -207,16 +217,11 @@ def search_line(line, regex, columns, *series):
extract_groups(m.groups(), *series)
-def read_input(args, names):
- '''Given input pattern arguments, open up corresponding files.'''
- logging.debug('reading input data from %r', args)
- if not args:
- args.append('-')
- names = names or ()
- for i, pattern in enumerate(args):
- name = len(names) > i and names[i] or ''
+def open_inputs(inputs):
+ '''Given input pattern arguments, open up matching files.'''
+ for i, pattern in enumerate(inputs or '-'):
if pattern == '-':
- yield name or 'stdin', sys.stdin
+ yield '-', sys.stdin
continue
for filename in glob.glob(pattern):
if filename.endswith('.gz'):
@@ -225,16 +230,16 @@ def read_input(args, names):
handle = bz2.BZ2File(os.path.expanduser(filename))
else:
handle = open(os.path.expanduser(filename))
- yield name or os.path.splitext(os.path.basename(filename))[0], handle
+ yield filename, handle
-def compile_regex(args):
+def compile_regex(regex):
'''Compile a regular expression pattern.'''
- logging.debug('compiling REGEX %r', args.regex)
+ logging.info('compiling regular expression %r', regex)
try:
- return re.compile(args.regex)
+ return re.compile(regex)
except:
- logging.critical('cannot compile REGEX %r', args.regex)
+ logging.critical('cannot compile regular expression %r', regex)
sys.exit(-2)
@@ -243,6 +248,11 @@ def make_axes(args):
ax = plt.subplot(111)
ax.xaxis.tick_bottom()
ax.yaxis.tick_left()
+ for loc, spine in ax.spines.iteritems():
+ if loc in 'left bottom':
+ spine.set_position(('outward', 6))
+ elif loc in 'right top':
+ spine.set_color('none')
if args.log and 'x' in args.log:
ax.set_xscale('log')
if args.log and 'y' in args.log:
@@ -283,13 +293,11 @@ def format_axes(ax, args):
def main(args):
- regex = compile_regex(args)
ax = make_axes(args)
- plot_kwargs = dict(alpha=args.alpha, aa=True)
-
colors = itertools.cycle(args.colors)
points = itertools.cycle(args.points)
+ limits = dict(xmin=0, xmax=0, ymin=0, ymax=0)
def plot(label, x, y, ex, ey):
if args.dates:
@@ -317,29 +325,71 @@ def main(args):
ex = ex[::args.every]
ey = ey[::args.every]
+ if args.jitter:
+ x += args.jitter * np.random.randn(len(x)) * x
+
color = next(colors)
- if len(ex) and len(ey):
+ if x:
+ xlo, xhi = min(x), max(x)
+ if limits['xmin'] is None:
+ limits['xmin'] = xlo
+ limits['xmax'] = xhi
+ else:
+ limits['xmin'] = min(limits['xmin'], xlo)
+ limits['xmax'] = max(limits['xmax'], xhi)
+
+ if y:
+ ylo, yhi = min(y), max(y)
+ if limits['ymin'] is None:
+ limits['ymin'] = ylo
+ limits['ymax'] = yhi
+ else:
+ limits['ymin'] = min(limits['ymin'], ylo)
+ limits['ymax'] = max(limits['ymax'], yhi)
+
+ if len(ex):
ex = np.asarray(ex)
ey = np.asarray(ey)
- ax.errorbar(x, y, xerr=ex, yerr=ey, color=color, alpha=0.1, aa=True)
+ ax.errorbar(x, y, xerr=ex, yerr=ey, color=color, aa=True)
return
- if len(ex):
- ex = np.asarray(ex)
- ax.fill_between(x + ex, x - ex, color=color, alpha=0.1, aa=True)
if len(ey):
ey = np.asarray(ey)
- ax.fill_between(y + ey, y - ey, color=color, alpha=0.1, aa=True)
+ if args.fill_error:
+ ax.fill_between(x, y + ey, y - ey, color=color, alpha=0.3, linewidth=0, antialiased=True)
+ else:
+ ax.errorbar(x, y, yerr=ey, color=color, aa=True)
+
+ ax.plot(x, y, next(points), c=color, markeredgecolor=color, label=label, alpha=args.alpha, aa=True)
- ax.plot(x, y, next(points), c=color, markeredgecolor=color, label=label, **plot_kwargs)
+ regexs = itertools.cycle(compile_regex(r) for r in args.match)
- for label, lines in read_input(args.input, args.names):
+ inputs = list(open_inputs(args.input))
+ if len(inputs) == 1 and 1 < len(args.match):
+ inputs = [next(open_inputs(args.input)) for _ in args.match]
+
+ for i, (path, handle) in enumerate(inputs):
+ logging.info('reading data from %s', path)
+ regex = next(regexs)
series = [], [], [], []
- for line in lines:
- search_line(line, regex, args.column, *series)
+ for l, line in enumerate(handle):
+ try:
+ search_line(line, regex, args.columns, *series)
+ except:
+ logging.exception('error extracting data from %s:%d %r', path, l, line.rstrip())
+ label = os.path.splitext(os.path.basename(path))[0]
+ if len(args.names) > i:
+ label = args.names[i]
+ elif len(args.match) > i:
+ label = match.pattern
plot(label, *series)
+ for y, c in zip(args.hline, itertools.cycle(args.colors)):
+ ax.hlines(y, limits['xmin'], limits['xmax'], c, 'dashed')
+ for x, c in zip(args.vline, itertools.cycle(args.colors)):
+ ax.vlines(x, limits['ymin'], limits['ymax'], c, 'dashed')
+
format_axes(ax, args)
if args.figsize:

0 comments on commit 045ea92

Please sign in to comment.