From a981185ff077c7cd3095a0b37f8d26ffce53792d Mon Sep 17 00:00:00 2001 From: Philippe Gervais Date: Tue, 27 Aug 2013 13:44:37 +0200 Subject: [PATCH 1/5] Added --title option to 'mprof plot' This option allows to set the title of figures created by 'mprof plot'. --- mprof | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/mprof b/mprof index 3e96413..e872e62 100755 --- a/mprof +++ b/mprof @@ -335,6 +335,7 @@ def plot_file(filename, index=0, timestamps=True): pl.vlines(t[max_mem_ind], bottom, top, colors="r", linestyles="--") + def plot_action(): try: import pylab as pl @@ -342,18 +343,26 @@ def plot_action(): print("matplotlib is needed for plotting.") sys.exit(1) + parser = OptionParser(version=mp.__version__) + parser.disable_interspersed_args() + parser.add_option("--title", "-t", dest="title", default="", + type="str", action="store", + help="String shown as plot title") + (options, args) = parser.parse_args() + profiles = glob.glob("mprofile_??????????????.dat") profiles.sort() - if len(sys.argv) == 1: + if len(args) == 0: if len(profiles) == 0: print("No input file found. \nThis program looks for " - "mprofile_*.dat files, generated by the mprofile command.") + "mprofile_*.dat files, generated by the " + "'mprof run' command.") sys.exit(-1) filenames = [profiles[-1]] else: filenames = [] - for arg in sys.argv[2:]: + for arg in args: if osp.exists(arg): if not arg in filenames: filenames.append(arg) @@ -374,7 +383,7 @@ def plot_action(): plot_file(filename, index=n, timestamps=timestamps) pl.xlabel("time (in seconds)") pl.ylabel("memory used (in MiB)") - + pl.title(options.title) ax = pl.gca() box = ax.get_position() ax.set_position([0.07, 0.1, @@ -384,7 +393,8 @@ def plot_action(): pl.show() if __name__ == "__main__": - # Workaround for optparse limitation: insert -- before first negative number found. + # Workaround for optparse limitation: insert -- before first negative + # number found. negint = re.compile("-[0-9]+") for n, arg in enumerate(sys.argv): if negint.match(arg): From 78d4ef9f288d0f2be02059fd286e307cc9da7f61 Mon Sep 17 00:00:00 2001 From: Philippe Gervais Date: Tue, 27 Aug 2013 14:22:30 +0200 Subject: [PATCH 2/5] Got rid of numpy is read_mprofile_file Numpy is still required for plotting, but only when pylab is used. --- mprof | 48 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/mprof b/mprof index e872e62..c37f9d4 100755 --- a/mprof +++ b/mprof @@ -259,31 +259,46 @@ def add_brackets(xloc, yloc, xshift=0, color="r", label=None): def read_mprofile_file(filename): - # TODO: would be nice to do without numpy + """Read an mprofile file and return its content. - import numpy as np - ret = {} - mdata = [] + Returns + ======= + content: dict + Keys: + + - "mem_usage": (list) memory usage values, in MiB + - "timestamp": (list) time instant for each memory usage value, in + second + - "func_timestamp": (dict) for each function, timestamps and memory + usage upon entering and exiting. + """ +# import numpy as np + func_ts = {} + mem_usage = [] + timestamp = [] f = open(filename, "r") for l in f: fields = l.split() if fields[0] == "MEM": # mem, timestamp - mdata.append((fields[1], fields[2])) + mem_usage.append(float(fields[1])) + timestamp.append(float(fields[2])) elif fields[0] == "FUNC": f_name, mem_start, start, mem_end, end = fields[1:] - ts = ret.get(f_name, []) - ts.append([float(start), float(end), float(mem_start), float(mem_end)]) - ret[f_name] = ts + ts = func_ts.get(f_name, []) + ts.append([float(start), float(end), + float(mem_start), float(mem_end)]) + func_ts[f_name] = ts else: pass f.close() - mdata = np.asarray(mdata, - dtype=[("mem", np.float), ("timestamp", np.float)]) - return mdata, ret + ## mdata = np.asarray(mdata, + ## dtype=[("mem", np.float), ("timestamp", np.float)]) + return {"mem_usage": mem_usage, "timestamp": timestamp, + "func_timestamp": func_ts} @@ -293,16 +308,16 @@ def plot_file(filename, index=0, timestamps=True): except ImportError: print("matplotlib is needed for plotting.") sys.exit(1) + import numpy as np # pylab requires numpy anyway + mprofile = read_mprofile_file(filename) - mdata, ts = read_mprofile_file(filename) - - global_start = float(mdata["timestamp"][0]) + global_start = float(mprofile["timestamp"][0]) - mem = mdata["mem"] + mem = np.asarray(mprofile["mem_usage"]) max_mem = mem.max() max_mem_ind = mem.argmax() - t = mdata["timestamp"] - global_start + t = np.asarray(mprofile["timestamp"]) - global_start all_colors=("c", "y", "g", "r", "b") mem_line_colors=('k', "b", "r") @@ -318,6 +333,7 @@ def plot_file(filename, index=0, timestamps=True): top -= 0.001 # plot timestamps, if any + ts = mprofile['func_timestamp'] if len(ts) > 0 and timestamps: func_num = 0 for f, exec_ts in ts.iteritems(): From b4bbce73c438f9e343b228c9bcd45703ce15fac0 Mon Sep 17 00:00:00 2001 From: Philippe Gervais Date: Tue, 27 Aug 2013 14:30:03 +0200 Subject: [PATCH 3/5] Proper handling of empty profile files When interrupting "mprof run", an empty file can be generated, which caused problem with "mprof plot". A clear error message is now displayed to the user. --- mprof | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/mprof b/mprof index c37f9d4..5b44430 100755 --- a/mprof +++ b/mprof @@ -272,7 +272,6 @@ def read_mprofile_file(filename): - "func_timestamp": (dict) for each function, timestamps and memory usage upon entering and exiting. """ -# import numpy as np func_ts = {} mem_usage = [] timestamp = [] @@ -295,10 +294,8 @@ def read_mprofile_file(filename): pass f.close() - ## mdata = np.asarray(mdata, - ## dtype=[("mem", np.float), ("timestamp", np.float)]) return {"mem_usage": mem_usage, "timestamp": timestamp, - "func_timestamp": func_ts} + "func_timestamp": func_ts, 'filename': filename} @@ -311,6 +308,14 @@ def plot_file(filename, index=0, timestamps=True): import numpy as np # pylab requires numpy anyway mprofile = read_mprofile_file(filename) + if len(mprofile['timestamp']) == 0: + print('** No memory usage values have been found in the profile ' + 'file.**\nFile path: {0}\n' + 'File may be empty or invalid.\n' + 'It can be deleted with "mprof rm {0}"'.format( + mprofile['filename'])) + sys.exit(0) + global_start = float(mprofile["timestamp"][0]) mem = np.asarray(mprofile["mem_usage"]) From 1e791ca55310f24e0565f421f024ec93c27ac3db Mon Sep 17 00:00:00 2001 From: Philippe Gervais Date: Tue, 27 Aug 2013 15:00:27 +0200 Subject: [PATCH 4/5] Command line is saved in profile file The full command line of what was run during the profiling is now saved into the profile file. It is used to set the graph title with 'mprof plot'. --- mprof | 47 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/mprof b/mprof index 5b44430..a372e5f 100755 --- a/mprof +++ b/mprof @@ -157,6 +157,12 @@ def clean_action(): os.remove(filename) +def get_cmd_line(args): + """Given a set or arguments, compute command-line.""" + blanks = set(' \t') + args = [s if blanks.isdisjoint(s) else "'" + s + "'" for s in args] + return ' '.join(args) + def run_action(): import time, subprocess @@ -200,15 +206,18 @@ def run_action(): print("running as a Python program...") if not args[0].startswith("python"): args.insert(0, "python") + cmd_line = get_cmd_line(args) args[1:1] = ("-m", "memory_profiler", "--timestamp", "-o", mprofile_output) p = subprocess.Popen(args) else: + cmd_line = get_cmd_line(args) p = subprocess.Popen(args) - mu = mp.memory_usage(proc=p, interval=options.interval, timestamps=True, - include_children=options.include_children) with open(mprofile_output, "a") as f: + f.write("CMDLINE {0}\n".format(cmd_line)) + mu = mp.memory_usage(proc=p, interval=options.interval, timestamps=True, + include_children=options.include_children) for m, t in mu: f.write("MEM {0:.6f} {1:.4f}".format(m, t) + "\n") @@ -271,31 +280,38 @@ def read_mprofile_file(filename): second - "func_timestamp": (dict) for each function, timestamps and memory usage upon entering and exiting. + - 'cmd_line': (str) command-line ran for this profile. """ func_ts = {} mem_usage = [] timestamp = [] + cmd_line = None f = open(filename, "r") for l in f: - fields = l.split() - if fields[0] == "MEM": + field, value = l.split(' ', 1) + if field == "MEM": # mem, timestamp - mem_usage.append(float(fields[1])) - timestamp.append(float(fields[2])) + values = value.split(' ') + mem_usage.append(float(values[0])) + timestamp.append(float(values[1])) - elif fields[0] == "FUNC": - f_name, mem_start, start, mem_end, end = fields[1:] + elif field == "FUNC": + values = value.split(' ') + f_name, mem_start, start, mem_end, end = values[:5] ts = func_ts.get(f_name, []) ts.append([float(start), float(end), float(mem_start), float(mem_end)]) func_ts[f_name] = ts + elif field == "CMDLINE": + cmd_line = value else: pass f.close() return {"mem_usage": mem_usage, "timestamp": timestamp, - "func_timestamp": func_ts, 'filename': filename} + "func_timestamp": func_ts, 'filename': filename, + 'cmd_line': cmd_line} @@ -355,6 +371,7 @@ def plot_file(filename, index=0, timestamps=True): colors="r", linestyles="--") pl.vlines(t[max_mem_ind], bottom, top, colors="r", linestyles="--") + return mprofile def plot_action(): @@ -366,7 +383,7 @@ def plot_action(): parser = OptionParser(version=mp.__version__) parser.disable_interspersed_args() - parser.add_option("--title", "-t", dest="title", default="", + parser.add_option("--title", "-t", dest="title", default=None, type="str", action="store", help="String shown as plot title") (options, args) = parser.parse_args() @@ -401,10 +418,16 @@ def plot_action(): else: timestamps = True for n, filename in enumerate(filenames): - plot_file(filename, index=n, timestamps=timestamps) + mprofile = plot_file(filename, index=n, timestamps=timestamps) pl.xlabel("time (in seconds)") pl.ylabel("memory used (in MiB)") - pl.title(options.title) + + if options.title is None and len(filenames) == 1: + pl.title(mprofile['cmd_line']) + else: + if options.title is not None: + pl.title(options.title) + ax = pl.gca() box = ax.get_position() ax.set_position([0.07, 0.1, From 9eba5ee676ee01d8384638c69e2c129c835f6ace Mon Sep 17 00:00:00 2001 From: Philippe Gervais Date: Tue, 27 Aug 2013 15:15:38 +0200 Subject: [PATCH 5/5] Plot all memory usage information Memory usage when entering and leaving a function is now included with the other measurements. This fixes some discrepancies between brackets and curves when sampling frequency was low. Add an option to prevent displaying of timestamps in plots (mprof plot -n) --- mprof | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/mprof b/mprof index a372e5f..12050ae 100755 --- a/mprof +++ b/mprof @@ -332,14 +332,30 @@ def plot_file(filename, index=0, timestamps=True): mprofile['filename'])) sys.exit(0) - global_start = float(mprofile["timestamp"][0]) + # Merge function timestamps and memory usage together + ts = mprofile['func_timestamp'] + t = mprofile['timestamp'] + mem = mprofile['mem_usage'] + + if len(ts) > 0: + for values in ts.itervalues(): + for v in values: + t.extend(v[:2]) + mem.extend(v[2:4]) + + mem = np.asarray(mem) + t = np.asarray(t) + ind = t.argsort() + mem = mem[ind] + t = t[ind] + + # Plot curves + global_start = float(t[0]) + t = t - global_start - mem = np.asarray(mprofile["mem_usage"]) max_mem = mem.max() max_mem_ind = mem.argmax() - t = np.asarray(mprofile["timestamp"]) - global_start - all_colors=("c", "y", "g", "r", "b") mem_line_colors=('k', "b", "r") mem_line_label = time.strftime("%d / %m / %Y - start at %H:%M:%S", @@ -354,7 +370,6 @@ def plot_file(filename, index=0, timestamps=True): top -= 0.001 # plot timestamps, if any - ts = mprofile['func_timestamp'] if len(ts) > 0 and timestamps: func_num = 0 for f, exec_ts in ts.iteritems(): @@ -386,6 +401,9 @@ def plot_action(): parser.add_option("--title", "-t", dest="title", default=None, type="str", action="store", help="String shown as plot title") + parser.add_option("--no-function-ts", "-n", dest="no_timestamps", + default=False, action="store_true", + help="Do not display function timestamps on plot.") (options, args) = parser.parse_args() profiles = glob.glob("mprofile_??????????????.dat") @@ -413,7 +431,7 @@ def plot_action(): filenames.append(profiles[n]) pl.figure(figsize=(14, 6), dpi=90) - if len(filenames) > 1: + if len(filenames) > 1 or options.no_timestamps: timestamps = False else: timestamps = True