Permalink
Cannot retrieve contributors at this time
executable file
393 lines (324 sloc)
14.9 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| ''' | |
| tgentools | |
| Authored by Rob Jansen, 2015 | |
| See LICENSE for licensing information | |
| ''' | |
| import sys, os, argparse, logging, re, datetime | |
| from itertools import cycle | |
| from socket import gethostname | |
| from multiprocessing import cpu_count | |
| from tgentools._version import __version__ | |
| import tgentools.util as util | |
| DESC_MAIN = """ | |
| TGenTools is a utility to analyze (parse) and visualize (plot) TGen output. | |
| TGenTools must be run with a subcommand to specify a mode of operation. | |
| For more information, see https://github.com/shadow/tgen. | |
| """ | |
| HELP_MAIN = """ | |
| Use 'tgentools <subcommand> --help' for more info | |
| """ | |
| DESC_ANALYZE = """ | |
| Parse results from TGen traffic generator log files. | |
| This subcommand processes TGen log files and stores the processed | |
| data in json format for plotting. It was written so that the log files | |
| need never be stored on disk decompressed, which is useful when log file | |
| sizes reach tens of gigabytes. | |
| The standard way to run this subcommand is to give the path to a TGen | |
| file (e.g., those produced after running `tgen`) or to a directory | |
| containing such files, and the statistics parsed during the analysis | |
| will be dumped to `tgen.analysis.json.xz`. | |
| """ | |
| HELP_ANALYZE = """ | |
| Analyze TGen output | |
| """ | |
| DESC_VISUALIZE = """ | |
| Loads a previously parsed TGen json file and plots various interesting | |
| performance metrics to PDF files. | |
| """ | |
| HELP_VISUALIZE = """ | |
| Visualize TGen analysis results | |
| """ | |
| DESC_EDIT = """ | |
| Edit TGen configuration files in place to change or add options (i.e., | |
| set the values of graphml attributes) on the specified graphml vertices | |
| or edges. Be careful not to set unsupported attribute names or values! | |
| """ | |
| HELP_EDIT = """ | |
| Edit TGen configuration files in place | |
| """ | |
| logging.basicConfig(format='%(asctime)s %(created)f [tgentools] [%(levelname)s] %(message)s', level=logging.INFO, datefmt='%Y-%m-%d %H:%M:%S') | |
| class CustomHelpFormatter(argparse.ArgumentDefaultsHelpFormatter): | |
| # adds the 'RawDescriptionHelpFormatter' to the ArgsDefault one | |
| def _fill_text(self, text, width, indent): | |
| return ''.join([indent + line for line in text.splitlines(True)]) | |
| def main(): | |
| hostname = gethostname().split('.')[0] | |
| # argparse.RawDescriptionHelpFormatter, RawTextHelpFormatter, RawDescriptionHelpFormatter | |
| my_formatter_class = CustomHelpFormatter | |
| # construct the options | |
| main_parser = argparse.ArgumentParser(description=DESC_MAIN, formatter_class=my_formatter_class) | |
| main_parser.add_argument('-v', '--version', | |
| help="""Prints the version of the toolkit and exits.""", | |
| action="store_true", dest="do_version", | |
| default=False) | |
| sub_parser = main_parser.add_subparsers(help=HELP_MAIN) | |
| # analyze | |
| analyze_parser = sub_parser.add_parser('parse', | |
| description=DESC_ANALYZE, | |
| help=HELP_ANALYZE, | |
| formatter_class=my_formatter_class) | |
| analyze_parser.set_defaults(func=analyze, formatter_class=my_formatter_class) | |
| analyze_parser.add_argument( | |
| help="""The PATH to a TGen log file, or to a directory that will be | |
| recursively searched for TGen log files; may be '-' for STDIN; each log file | |
| may end in '.xz' to enable inline xz decompression""", | |
| metavar="PATH", type=type_str_path_in, | |
| action="store", dest="tgen_path") | |
| analyze_parser.add_argument('-p', '--prefix', | |
| help="""A directory PATH prefix where the processed data | |
| files generated by this script will be written""", | |
| metavar="PATH", type=type_str_dir_path_out, | |
| action="store", dest="prefix", | |
| default=os.getcwd()) | |
| analyze_parser.add_argument('-m', '--multiproc', | |
| help="""Enable multiprocessing with N worker processes, which may be '0' | |
| to use the number of available processor cores""", | |
| metavar="N", type=type_nonnegative_integer, | |
| action="store", dest="nprocesses", | |
| default=1) | |
| analyze_parser.add_argument('-c', '--complete', | |
| help="""Parse and export a more complete set of statistics that is more computationally expensive to obtain""", | |
| action="store_true", dest="do_complete", | |
| default=False) | |
| analyze_parser.add_argument('-a', '--address', | |
| help="""An IP address STRING that identifies the machine where the input logfiles were produced""", | |
| metavar="STRING", type=type_str_ip_in, | |
| action="store", dest="ip_address", | |
| default=None) | |
| analyze_parser.add_argument('-n', '--nickname', | |
| help="""A nickname STRING that identifies the machine where the input logfiles were produced""", | |
| metavar="STRING", type=str, | |
| action="store", dest="nickname", | |
| default=None) | |
| analyze_parser.add_argument('-d', '--date-filter', | |
| help="""A DATE string in the form YYYY-MM-DD, all log messages that did not occur on this date will be filtered out of the analysis""", | |
| metavar="DATE", type=type_str_date_in, | |
| action="store", dest="date_filter", | |
| default=None) | |
| analyze_parser.add_argument('-e', '--expression', | |
| help="""Append a regex PATTERN to a custom list of strings used with | |
| re.search to find log file names in the search path. The custom list of patterns | |
| will override the default pattern 'tgen.*\.log'.""", | |
| metavar="PATTERN", type=str, | |
| action="append", dest="patterns", | |
| default=[]) | |
| # visualize | |
| visualize_parser = sub_parser.add_parser('plot', description=DESC_VISUALIZE, help=HELP_VISUALIZE, | |
| formatter_class=my_formatter_class) | |
| visualize_parser.set_defaults(func=visualize, formatter_class=my_formatter_class) | |
| visualize_parser.add_argument('-d', '--data', | |
| help="""Append a PATH to a tgen.analysis.json results file, and a LABEL | |
| that we should use for the graph legend for this dataset""", | |
| metavar=("PATH", "LABEL"), | |
| nargs=2, | |
| required="True", | |
| action=PathStringArgsAction, dest="datasets") | |
| visualize_parser.add_argument('-p', '--prefix', | |
| help="a STRING filename prefix for graphs we generate", | |
| metavar="STRING", type=str, | |
| action="store", dest="prefix", | |
| default=None) | |
| visualize_parser.add_argument('-c', '--counter-cdfs', | |
| help="""Plot the tgen heartbeat counters as cumulative distributions across | |
| nodes in addition to the timeseries that are plotted by default.""", | |
| action="store_true", dest="do_heartbeat_cdfs", | |
| default=False) | |
| visualize_parser.add_argument('-b', '--bytes', | |
| help="""Plot a set of time to first and last byte graphs for every file size. | |
| Warning: may result in a PDF with hundreds or thousands of pages!""", | |
| action="store_true", dest="do_bytes", | |
| default=False) | |
| visualize_parser.add_argument('-s', '--stats', | |
| help="""Plot a set of additional cumulative distributions showing | |
| mean, median, and max per-client statistics.""", | |
| action="store_true", dest="do_stats", | |
| default=False) | |
| visualize_parser.add_argument('-f', '--format', | |
| help="""A comma-separated LIST of color/line format strings to cycle to | |
| matplotlib's plot command (see matplotlib.pyplot.plot)""", | |
| metavar="LIST", type=str, | |
| action="store", dest="lineformats", | |
| default=util.LINEFORMATS) | |
| visualize_parser.add_argument('-e', '--expression', | |
| help="""Append a regex PATTERN to a custom list of strings used with | |
| re.search to select which host names in the analysis results get plotted. | |
| By default, results from all hosts in the analysis file will get plotted.""", | |
| metavar="PATTERN", type=str, | |
| action="append", dest="hostpatterns", | |
| default=[]) | |
| # edit | |
| edit_parser = sub_parser.add_parser('edit', description=DESC_EDIT, help=HELP_EDIT, | |
| formatter_class=my_formatter_class) | |
| edit_parser.set_defaults(func=edit, formatter_class=my_formatter_class) | |
| edit_parser.add_argument( | |
| help="""The PATH to a TGen configuration file in graphml format""", | |
| metavar="PATH", type=type_str_path_in, | |
| action="store", dest="config_path") | |
| edit_parser.add_argument('-a', '--action', | |
| help="""Set the regex PATTERN that will be used to match actions (vertices) | |
| for which the given attribute and value will be set. Examples include: | |
| 'start', 'stream', 'flow', 'pause', 'end'.""", | |
| metavar="PATTERN", type=str, | |
| action="store", dest="action_pattern", | |
| default=None) | |
| edit_parser.add_argument('-s', '--source', | |
| help="""Set the regex PATTERN that will be used to match the name of the edge | |
| source for which the given attribute and value will be set. An edge matches | |
| the pattern if the name of the action on the source of the edge matches.""", | |
| metavar="PATTERN", type=str, | |
| action="store", dest="edge_source_pattern", | |
| default=None) | |
| edit_parser.add_argument('-t', '--target', | |
| help="""Set the regex PATTERN that will be used to match the name of the edge | |
| target for which the given attribute and value will be set. An edge matches | |
| the pattern if the name of the action on the target of the edge matches.""", | |
| metavar="PATTERN", type=str, | |
| action="store", dest="edge_target_pattern", | |
| default=None) | |
| edit_parser.add_argument('-n', '--name', | |
| help="A STRING name for the attribute that we set on matched vertices/edges.", | |
| metavar="STRING", type=str, | |
| action="store", dest="name", | |
| default=None) | |
| edit_parser.add_argument('-v', '--value', | |
| help="A STRING value for the attribute that we set on matched vertices/edges.", | |
| metavar="STRING", type=str, | |
| action="store", dest="value", | |
| default=None) | |
| # get args and call the command handler for the chosen mode | |
| args = main_parser.parse_args() | |
| if args.do_version: | |
| logging.info("TGenTools version {}".format(__version__)) | |
| return | |
| args.func(args) | |
| def analyze(args): | |
| from tgentools.analysis import ParallelAnalysis, SerialAnalysis | |
| searchexp = args.patterns if len(args.patterns) > 0 else ["tgen.*\.log"] | |
| paths = [] | |
| if os.path.isdir(args.tgen_path): | |
| # need to search | |
| paths = util.find_file_paths(args.tgen_path, searchexp) | |
| elif os.path.isfile(args.tgen_path): | |
| # just one file | |
| paths = [args.tgen_path] | |
| else: | |
| logging.warning("No valid tgen paths were given, nothing will be analyzed") | |
| return | |
| if len(paths) < 1: | |
| logging.warning("No valid tgen files found at path {}, nothing will be analyzed".format(args.tgen_path)) | |
| return | |
| analysis = None | |
| if args.nprocesses == 0: | |
| args.nprocesses = cpu_count() | |
| if args.nprocesses > 1: | |
| analysis = ParallelAnalysis(nickname=args.nickname, ip_address=args.ip_address) | |
| analysis.analyze(paths, do_complete=args.do_complete, date_filter=args.date_filter, | |
| num_subprocs=min(args.nprocesses, len(paths))) | |
| else: | |
| analysis = SerialAnalysis(nickname=args.nickname, ip_address=args.ip_address) | |
| analysis.analyze(paths, do_complete=args.do_complete, date_filter=args.date_filter) | |
| analysis.save(output_prefix=args.prefix) | |
| def visualize(args): | |
| from tgentools.visualization import TGenVisualization | |
| from tgentools.analysis import Analysis | |
| lflist = args.lineformats.strip().split(",") | |
| lfcycle = cycle(lflist) | |
| tgen_viz = TGenVisualization(args.hostpatterns, args.do_bytes, args.do_heartbeat_cdfs, args.do_stats) | |
| for (path, label) in args.datasets: | |
| nextformat = next(lfcycle) | |
| anal = Analysis.load(filename=path) | |
| if anal is not None: | |
| tgen_viz.add_dataset(anal, label, nextformat) | |
| tgen_viz.plot_all(args.prefix) | |
| def edit(args): | |
| if args.action_pattern is None and \ | |
| args.edge_source_pattern is None and args.edge_target_pattern is None: | |
| logging.info("You did not set any regex patterns, so no elements were selected") | |
| return | |
| if args.name is None or args.value is None: | |
| logging.info("You did not set a name and value, so no attributes were changed") | |
| return | |
| from tgentools.edit import edit_config | |
| edit_config(args.config_path, args.action_pattern, | |
| args.edge_source_pattern, args.edge_target_pattern, args.name, args.value) | |
| def type_nonnegative_integer(value): | |
| i = int(value) | |
| if i < 0: raise argparse.ArgumentTypeError("'%s' is an invalid non-negative int value" % value) | |
| return i | |
| def type_supported_analysis(value): | |
| t = value.lower() | |
| if t != "all" and t != "tgen" and t != "tor": | |
| raise argparse.ArgumentTypeError("'%s' is an invalid Analysis type" % value) | |
| return t | |
| def type_str_file_path_out(value): | |
| s = str(value) | |
| if s == "-": | |
| return s | |
| p = os.path.abspath(os.path.expanduser(s)) | |
| util.make_dir_path(os.path.dirname(p)) | |
| return p | |
| def type_str_dir_path_out(value): | |
| s = str(value) | |
| p = os.path.abspath(os.path.expanduser(s)) | |
| util.make_dir_path(p) | |
| return p | |
| def type_str_path_in(value): | |
| s = str(value) | |
| if s == "-": | |
| return s | |
| p = os.path.abspath(os.path.expanduser(s)) | |
| if not os.path.exists(p): | |
| raise argparse.ArgumentTypeError("path '%s' does not exist" % s) | |
| return p | |
| def type_str_ip_in(value): | |
| s = str(value) | |
| ip = re.match(r'[\d]{1,3}\.[\d]{1,3}\.[\d]{1,3}\.[\d]{1,3}', s) | |
| if ip is None: | |
| raise argparse.ArgumentTypeError("IP address '%s' is not a valid address" % s) | |
| return ip.group(0) | |
| def type_str_date_in(value): | |
| s = str(value) | |
| parse_ok = False | |
| try: | |
| parts = s.split('-') | |
| if len(parts) == 3: | |
| y, m, d = int(parts[0]), int(parts[1]), int(parts[2]) | |
| parse_ok = True | |
| except: | |
| parse_ok = False | |
| if not parse_ok: | |
| raise argparse.ArgumentTypeError("date '%s' is not in the valid YYYY-MM-DD format" % s) | |
| if y < datetime.MINYEAR or y > datetime.MAXYEAR: | |
| raise argparse.ArgumentTypeError("the year portion of date '%s' must be in the range [%d, %d]" % s, datetime.MINYEAR, datetime.MAXYEAR) | |
| if m < 1 or m > 12: | |
| raise argparse.ArgumentTypeError("the month portion of date '%s' must be in the range [1, 12]" % s) | |
| if d < 1 or d > 31: | |
| raise argparse.ArgumentTypeError("the day portion of date '%s' must be in the range [1, 31]" % s) | |
| return datetime.date(y, m, d) | |
| # a custom action for passing in experimental data directories when plotting | |
| class PathStringArgsAction(argparse.Action): | |
| def __call__(self, parser, namespace, values, option_string=None): | |
| # extract the path to our data, and the label for the legend | |
| p = os.path.abspath(os.path.expanduser(values[0])) | |
| s = values[1] | |
| # check the path exists | |
| if not os.path.exists(p): raise argparse.ArgumentError(self, "The supplied path does not exist: '{0}'".format(p)) | |
| # remove the default | |
| if "_didremovedefault" not in namespace: | |
| setattr(namespace, self.dest, []) | |
| setattr(namespace, "_didremovedefault", True) | |
| # append out new experiment path | |
| dest = getattr(namespace, self.dest) | |
| dest.append((p, s)) | |
| if __name__ == '__main__': sys.exit(main()) |