Skip to content
Permalink
c3199b8201
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
executable file 393 lines (324 sloc) 14.9 KB
#!/usr/bin/env python
'''
tgentools
Authored by Rob Jansen, 2015
See LICENSE for licensing information
'''
import sys, os, argparse, logging, re, datetime
from itertools import cycle
from socket import gethostname
from multiprocessing import cpu_count
from tgentools._version import __version__
import tgentools.util as util
DESC_MAIN = """
TGenTools is a utility to analyze (parse) and visualize (plot) TGen output.
TGenTools must be run with a subcommand to specify a mode of operation.
For more information, see https://github.com/shadow/tgen.
"""
HELP_MAIN = """
Use 'tgentools <subcommand> --help' for more info
"""
DESC_ANALYZE = """
Parse results from TGen traffic generator log files.
This subcommand processes TGen log files and stores the processed
data in json format for plotting. It was written so that the log files
need never be stored on disk decompressed, which is useful when log file
sizes reach tens of gigabytes.
The standard way to run this subcommand is to give the path to a TGen
file (e.g., those produced after running `tgen`) or to a directory
containing such files, and the statistics parsed during the analysis
will be dumped to `tgen.analysis.json.xz`.
"""
HELP_ANALYZE = """
Analyze TGen output
"""
DESC_VISUALIZE = """
Loads a previously parsed TGen json file and plots various interesting
performance metrics to PDF files.
"""
HELP_VISUALIZE = """
Visualize TGen analysis results
"""
DESC_EDIT = """
Edit TGen configuration files in place to change or add options (i.e.,
set the values of graphml attributes) on the specified graphml vertices
or edges. Be careful not to set unsupported attribute names or values!
"""
HELP_EDIT = """
Edit TGen configuration files in place
"""
logging.basicConfig(format='%(asctime)s %(created)f [tgentools] [%(levelname)s] %(message)s', level=logging.INFO, datefmt='%Y-%m-%d %H:%M:%S')
class CustomHelpFormatter(argparse.ArgumentDefaultsHelpFormatter):
# adds the 'RawDescriptionHelpFormatter' to the ArgsDefault one
def _fill_text(self, text, width, indent):
return ''.join([indent + line for line in text.splitlines(True)])
def main():
hostname = gethostname().split('.')[0]
# argparse.RawDescriptionHelpFormatter, RawTextHelpFormatter, RawDescriptionHelpFormatter
my_formatter_class = CustomHelpFormatter
# construct the options
main_parser = argparse.ArgumentParser(description=DESC_MAIN, formatter_class=my_formatter_class)
main_parser.add_argument('-v', '--version',
help="""Prints the version of the toolkit and exits.""",
action="store_true", dest="do_version",
default=False)
sub_parser = main_parser.add_subparsers(help=HELP_MAIN)
# analyze
analyze_parser = sub_parser.add_parser('parse',
description=DESC_ANALYZE,
help=HELP_ANALYZE,
formatter_class=my_formatter_class)
analyze_parser.set_defaults(func=analyze, formatter_class=my_formatter_class)
analyze_parser.add_argument(
help="""The PATH to a TGen log file, or to a directory that will be
recursively searched for TGen log files; may be '-' for STDIN; each log file
may end in '.xz' to enable inline xz decompression""",
metavar="PATH", type=type_str_path_in,
action="store", dest="tgen_path")
analyze_parser.add_argument('-p', '--prefix',
help="""A directory PATH prefix where the processed data
files generated by this script will be written""",
metavar="PATH", type=type_str_dir_path_out,
action="store", dest="prefix",
default=os.getcwd())
analyze_parser.add_argument('-m', '--multiproc',
help="""Enable multiprocessing with N worker processes, which may be '0'
to use the number of available processor cores""",
metavar="N", type=type_nonnegative_integer,
action="store", dest="nprocesses",
default=1)
analyze_parser.add_argument('-c', '--complete',
help="""Parse and export a more complete set of statistics that is more computationally expensive to obtain""",
action="store_true", dest="do_complete",
default=False)
analyze_parser.add_argument('-a', '--address',
help="""An IP address STRING that identifies the machine where the input logfiles were produced""",
metavar="STRING", type=type_str_ip_in,
action="store", dest="ip_address",
default=None)
analyze_parser.add_argument('-n', '--nickname',
help="""A nickname STRING that identifies the machine where the input logfiles were produced""",
metavar="STRING", type=str,
action="store", dest="nickname",
default=None)
analyze_parser.add_argument('-d', '--date-filter',
help="""A DATE string in the form YYYY-MM-DD, all log messages that did not occur on this date will be filtered out of the analysis""",
metavar="DATE", type=type_str_date_in,
action="store", dest="date_filter",
default=None)
analyze_parser.add_argument('-e', '--expression',
help="""Append a regex PATTERN to a custom list of strings used with
re.search to find log file names in the search path. The custom list of patterns
will override the default pattern 'tgen.*\.log'.""",
metavar="PATTERN", type=str,
action="append", dest="patterns",
default=[])
# visualize
visualize_parser = sub_parser.add_parser('plot', description=DESC_VISUALIZE, help=HELP_VISUALIZE,
formatter_class=my_formatter_class)
visualize_parser.set_defaults(func=visualize, formatter_class=my_formatter_class)
visualize_parser.add_argument('-d', '--data',
help="""Append a PATH to a tgen.analysis.json results file, and a LABEL
that we should use for the graph legend for this dataset""",
metavar=("PATH", "LABEL"),
nargs=2,
required="True",
action=PathStringArgsAction, dest="datasets")
visualize_parser.add_argument('-p', '--prefix',
help="a STRING filename prefix for graphs we generate",
metavar="STRING", type=str,
action="store", dest="prefix",
default=None)
visualize_parser.add_argument('-c', '--counter-cdfs',
help="""Plot the tgen heartbeat counters as cumulative distributions across
nodes in addition to the timeseries that are plotted by default.""",
action="store_true", dest="do_heartbeat_cdfs",
default=False)
visualize_parser.add_argument('-b', '--bytes',
help="""Plot a set of time to first and last byte graphs for every file size.
Warning: may result in a PDF with hundreds or thousands of pages!""",
action="store_true", dest="do_bytes",
default=False)
visualize_parser.add_argument('-s', '--stats',
help="""Plot a set of additional cumulative distributions showing
mean, median, and max per-client statistics.""",
action="store_true", dest="do_stats",
default=False)
visualize_parser.add_argument('-f', '--format',
help="""A comma-separated LIST of color/line format strings to cycle to
matplotlib's plot command (see matplotlib.pyplot.plot)""",
metavar="LIST", type=str,
action="store", dest="lineformats",
default=util.LINEFORMATS)
visualize_parser.add_argument('-e', '--expression',
help="""Append a regex PATTERN to a custom list of strings used with
re.search to select which host names in the analysis results get plotted.
By default, results from all hosts in the analysis file will get plotted.""",
metavar="PATTERN", type=str,
action="append", dest="hostpatterns",
default=[])
# edit
edit_parser = sub_parser.add_parser('edit', description=DESC_EDIT, help=HELP_EDIT,
formatter_class=my_formatter_class)
edit_parser.set_defaults(func=edit, formatter_class=my_formatter_class)
edit_parser.add_argument(
help="""The PATH to a TGen configuration file in graphml format""",
metavar="PATH", type=type_str_path_in,
action="store", dest="config_path")
edit_parser.add_argument('-a', '--action',
help="""Set the regex PATTERN that will be used to match actions (vertices)
for which the given attribute and value will be set. Examples include:
'start', 'stream', 'flow', 'pause', 'end'.""",
metavar="PATTERN", type=str,
action="store", dest="action_pattern",
default=None)
edit_parser.add_argument('-s', '--source',
help="""Set the regex PATTERN that will be used to match the name of the edge
source for which the given attribute and value will be set. An edge matches
the pattern if the name of the action on the source of the edge matches.""",
metavar="PATTERN", type=str,
action="store", dest="edge_source_pattern",
default=None)
edit_parser.add_argument('-t', '--target',
help="""Set the regex PATTERN that will be used to match the name of the edge
target for which the given attribute and value will be set. An edge matches
the pattern if the name of the action on the target of the edge matches.""",
metavar="PATTERN", type=str,
action="store", dest="edge_target_pattern",
default=None)
edit_parser.add_argument('-n', '--name',
help="A STRING name for the attribute that we set on matched vertices/edges.",
metavar="STRING", type=str,
action="store", dest="name",
default=None)
edit_parser.add_argument('-v', '--value',
help="A STRING value for the attribute that we set on matched vertices/edges.",
metavar="STRING", type=str,
action="store", dest="value",
default=None)
# get args and call the command handler for the chosen mode
args = main_parser.parse_args()
if args.do_version:
logging.info("TGenTools version {}".format(__version__))
return
args.func(args)
def analyze(args):
from tgentools.analysis import ParallelAnalysis, SerialAnalysis
searchexp = args.patterns if len(args.patterns) > 0 else ["tgen.*\.log"]
paths = []
if os.path.isdir(args.tgen_path):
# need to search
paths = util.find_file_paths(args.tgen_path, searchexp)
elif os.path.isfile(args.tgen_path):
# just one file
paths = [args.tgen_path]
else:
logging.warning("No valid tgen paths were given, nothing will be analyzed")
return
if len(paths) < 1:
logging.warning("No valid tgen files found at path {}, nothing will be analyzed".format(args.tgen_path))
return
analysis = None
if args.nprocesses == 0:
args.nprocesses = cpu_count()
if args.nprocesses > 1:
analysis = ParallelAnalysis(nickname=args.nickname, ip_address=args.ip_address)
analysis.analyze(paths, do_complete=args.do_complete, date_filter=args.date_filter,
num_subprocs=min(args.nprocesses, len(paths)))
else:
analysis = SerialAnalysis(nickname=args.nickname, ip_address=args.ip_address)
analysis.analyze(paths, do_complete=args.do_complete, date_filter=args.date_filter)
analysis.save(output_prefix=args.prefix)
def visualize(args):
from tgentools.visualization import TGenVisualization
from tgentools.analysis import Analysis
lflist = args.lineformats.strip().split(",")
lfcycle = cycle(lflist)
tgen_viz = TGenVisualization(args.hostpatterns, args.do_bytes, args.do_heartbeat_cdfs, args.do_stats)
for (path, label) in args.datasets:
nextformat = next(lfcycle)
anal = Analysis.load(filename=path)
if anal is not None:
tgen_viz.add_dataset(anal, label, nextformat)
tgen_viz.plot_all(args.prefix)
def edit(args):
if args.action_pattern is None and \
args.edge_source_pattern is None and args.edge_target_pattern is None:
logging.info("You did not set any regex patterns, so no elements were selected")
return
if args.name is None or args.value is None:
logging.info("You did not set a name and value, so no attributes were changed")
return
from tgentools.edit import edit_config
edit_config(args.config_path, args.action_pattern,
args.edge_source_pattern, args.edge_target_pattern, args.name, args.value)
def type_nonnegative_integer(value):
i = int(value)
if i < 0: raise argparse.ArgumentTypeError("'%s' is an invalid non-negative int value" % value)
return i
def type_supported_analysis(value):
t = value.lower()
if t != "all" and t != "tgen" and t != "tor":
raise argparse.ArgumentTypeError("'%s' is an invalid Analysis type" % value)
return t
def type_str_file_path_out(value):
s = str(value)
if s == "-":
return s
p = os.path.abspath(os.path.expanduser(s))
util.make_dir_path(os.path.dirname(p))
return p
def type_str_dir_path_out(value):
s = str(value)
p = os.path.abspath(os.path.expanduser(s))
util.make_dir_path(p)
return p
def type_str_path_in(value):
s = str(value)
if s == "-":
return s
p = os.path.abspath(os.path.expanduser(s))
if not os.path.exists(p):
raise argparse.ArgumentTypeError("path '%s' does not exist" % s)
return p
def type_str_ip_in(value):
s = str(value)
ip = re.match(r'[\d]{1,3}\.[\d]{1,3}\.[\d]{1,3}\.[\d]{1,3}', s)
if ip is None:
raise argparse.ArgumentTypeError("IP address '%s' is not a valid address" % s)
return ip.group(0)
def type_str_date_in(value):
s = str(value)
parse_ok = False
try:
parts = s.split('-')
if len(parts) == 3:
y, m, d = int(parts[0]), int(parts[1]), int(parts[2])
parse_ok = True
except:
parse_ok = False
if not parse_ok:
raise argparse.ArgumentTypeError("date '%s' is not in the valid YYYY-MM-DD format" % s)
if y < datetime.MINYEAR or y > datetime.MAXYEAR:
raise argparse.ArgumentTypeError("the year portion of date '%s' must be in the range [%d, %d]" % s, datetime.MINYEAR, datetime.MAXYEAR)
if m < 1 or m > 12:
raise argparse.ArgumentTypeError("the month portion of date '%s' must be in the range [1, 12]" % s)
if d < 1 or d > 31:
raise argparse.ArgumentTypeError("the day portion of date '%s' must be in the range [1, 31]" % s)
return datetime.date(y, m, d)
# a custom action for passing in experimental data directories when plotting
class PathStringArgsAction(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
# extract the path to our data, and the label for the legend
p = os.path.abspath(os.path.expanduser(values[0]))
s = values[1]
# check the path exists
if not os.path.exists(p): raise argparse.ArgumentError(self, "The supplied path does not exist: '{0}'".format(p))
# remove the default
if "_didremovedefault" not in namespace:
setattr(namespace, self.dest, [])
setattr(namespace, "_didremovedefault", True)
# append out new experiment path
dest = getattr(namespace, self.dest)
dest.append((p, s))
if __name__ == '__main__': sys.exit(main())