From 523e185fa4d15705606c288e51e26912b164e3c9 Mon Sep 17 00:00:00 2001 From: Davide Libenzi Date: Tue, 9 Apr 2019 00:38:56 -0700 Subject: [PATCH] Added utility scripts to parse the XLA_SAVE_TENSORS_FILE and metrics report outputs. --- scripts/grab_graphs.py | 138 ++++++++++++++++++++++++++++++++++++++++ scripts/grab_metrics.py | 97 ++++++++++++++++++++++++++++ 2 files changed, 235 insertions(+) create mode 100755 scripts/grab_graphs.py create mode 100755 scripts/grab_metrics.py diff --git a/scripts/grab_graphs.py b/scripts/grab_graphs.py new file mode 100755 index 000000000000..9aa5810d2148 --- /dev/null +++ b/scripts/grab_graphs.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python +# Parses the output of XLA_SAVE_TENSORS_FILE and produces statistics about graph +# types and Python frames. + +from __future__ import print_function + +import argparse +import collections +import difflib +import os +import re +import shutil +import sys + +GraphInfo = collections.namedtuple('GraphInfo', 'id, graph, ngraph, frame') + + +def save_graph(graph, path): + with open(path, 'w') as fd: + fd.write('\n'.join(graph)) + + +def normalize(graph): + # %397 = f32[128]{0} xla::cross_replica_sum(%396), scale=0.125, groups=() + ngraph = [] + for line in graph: + m = re.match(r'(\s*)%\d+\s*=\s*(.*::[^(]+\()[^)]*(.*)', line) + if m: + line = m.group(1) + m.group(2) + m.group(3) + ngraph.append(line) + return ngraph + + +def prase_graphs(gfile, dest_dir, graphs=None): + if dest_dir: + if os.path.isdir(dest_dir): + raise RuntimeError('Folder already exists: {}'.format(dest_dir)) + os.mkdir(dest_dir) + + if graphs is None: + graphs = [] + graph, frame, last_frame = None, None, None + for line in gfile: + line = line.rstrip('\n') + if frame is not None: + if re.match(r'\s*$', line): + last_frame = frame + frame = None + else: + frame.append(line) + elif graph is not None: + graph.append(line) + m = re.match(r'}\s*$', line) + if m: + if dest_dir: + save_graph(graph, + os.path.join(dest_dir, 'graph_{:04d}'.format(len(graphs)))) + graphs.append( + GraphInfo( + id=len(graphs), + graph=graph, + ngraph=normalize(graph), + frame=last_frame)) + graph = None + last_frame = None + else: + m = re.match(r'TensorsGraphInfo:', line) + if m: + frame = [] + else: + m = re.match(r'IR {\s*', line) + if m: + graph = [line] + return graphs + + +def group_by_frame(graphs): + fgroup = collections.defaultdict(list) + for graph in graphs: + fgroup['\n'.join(graph.frame)].append(graph) + return fgroup + + +def set_add(s, i): + plen = len(s) + s.add(i) + return len(s) > plen + + +def diff_graphs(g1, g2, name1, name2, prefix=''): + diff = difflib.unified_diff(g1.ngraph, g2.ngraph, name1, name2) + result = '' + for line in diff: + if line[-1] != '\n': + result += '{}{}\n'.format(prefix, line) + else: + result += '{}{}'.format(prefix, line) + return result + + +def process_graphs(args): + if not args.files: + graphs = prase_graphs(sys.stdin, args.graphdir) + else: + graphs = [] + for fname in args.files: + with open(fname, 'r') as fd: + prase_graphs(fd, args.graphdir, graphs=graphs) + print('Parsed {} graph(s)'.format(len(graphs))) + fgroup = group_by_frame(graphs) + print('{} frame group(s)'.format(len(fgroup))) + for f in fgroup.keys(): + fgraphs = fgroup[f] + uniq = set() + uniq_graphs = [] + for graph in fgraphs: + if set_add(uniq, '\n'.join(graph.graph)): + uniq_graphs.append(graph) + print('Frame has {} graph(s) ({} unique):\n{}\n'.format( + len(fgraphs), len(uniq), f)) + for i in range(len(uniq_graphs) - 1, 0, -1): + print(' Frame {} (len={}) vs {} (len={})'.format( + i - 1, len(uniq_graphs[i - 1].graph), i, len(uniq_graphs[i].graph))) + print( + diff_graphs( + uniq_graphs[i - 1], + uniq_graphs[i], + 'frame-{}'.format(i - 1), + 'frame-{}'.format(i), + prefix=' ')) + + +if __name__ == '__main__': + arg_parser = argparse.ArgumentParser() + arg_parser.add_argument('--graphdir', type=str) + args, files = arg_parser.parse_known_args() + args.files = files + process_graphs(args) diff --git a/scripts/grab_metrics.py b/scripts/grab_metrics.py new file mode 100755 index 000000000000..c2450b53ba41 --- /dev/null +++ b/scripts/grab_metrics.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python +# Given a log file in which the XLA metrics report has been dumped, extracts the +#different metrics across multiple points and produces data in a format which +# can be graphed. +# Can also produce data which is a combination of other metric, using the +# --synth parameters: +# +# --synth 'LiveDataHandles:CreateDataHandles - DestroyDataHandles' +# + +from __future__ import print_function + +import argparse +import collections +import re +import sys + + +def parse_metrics(lines): + # Counter: CreateCompileHandles + # Value: 1631 + metrics = collections.defaultdict(list) + metric = None + for line in lines: + if metric is not None: + m = re.match(r'\s*Value: ([^\s]+)', line) + if m: + metrics[metric].append(m.group(1)) + metric = None + else: + m = re.match(r'Counter: ([^\s]+)', line) + if m: + metric = m.group(1) + return metrics + + +def create_metric_report(args, metric, metric_data): + print('[{}]'.format(metric)) + for i, v in enumerate(metric_data): + print('{}\t{}'.format(i, v)) + + +def process_synth(args, synth, metrics): + name, expr = synth.split(':', 1) + xvars = set() + for m in re.finditer(r'[a-zA-Z_][a-zA-Z_0-9]*', expr): + xvars.add(m.group(0)) + xvars = list(xvars) + xmetrics = [] + for v in xvars: + metric_data = metrics.get(v, None) + if metric_data is None: + raise RuntimeError('Unknown metric: {}'.format(v)) + xmetrics.append(metric_data) + print('[{}]'.format(name)) + x = 0 + while True: + env = {} + for i, v in enumerate(xvars): + metric_data = xmetrics[i] + if x >= len(metric_data): + break + env[v] = float(metric_data[x]) + if len(env) < len(xvars): + break + y = eval(expr, env) + print('{}\t{}'.format(x, y)) + x += 1 + + +def create_report(args, metrics): + if args.metric: + metric_data = metrics.get(args.metric, None) + if metric_data is None: + raise RuntimeError('Unknown metric: {}'.format(args.metric)) + create_metric_report(args, args.metric, metric_data) + else: + for metric in metrics.keys(): + create_metric_report(args, metric, metrics[metric]) + for synth in args.synth: + process_synth(args, synth, metrics) + + +def process_metrics(args): + fd = sys.stdin if args.input is None else open(args.input, 'r') + metrics = parse_metrics(fd) + create_report(args, metrics) + + +if __name__ == '__main__': + arg_parser = argparse.ArgumentParser() + arg_parser.add_argument('--input', type=str) + arg_parser.add_argument('--metric', type=str) + arg_parser.add_argument('--synth', action='append', type=str) + args, files = arg_parser.parse_known_args() + args.files = files + process_metrics(args)