From 36df6b247a1b3b6e54c2bb4c0a09e7b2119a5795 Mon Sep 17 00:00:00 2001 From: Hubert Kario Date: Sun, 22 Nov 2020 03:15:49 +0100 Subject: [PATCH] limit memory usage of ECDF plots there is no point calculating steps below resolution of the image this also automatically creates an upper bound for matplotlib memory (though the sorting before is still sample dependent) --- tlsfuzzer/analysis.py | 94 ++++++++++++++++++++++++++----------------- 1 file changed, 58 insertions(+), 36 deletions(-) diff --git a/tlsfuzzer/analysis.py b/tlsfuzzer/analysis.py index 8769fa897..de12e1b8a 100644 --- a/tlsfuzzer/analysis.py +++ b/tlsfuzzer/analysis.py @@ -13,6 +13,7 @@ import csv import getopt import sys +import math import multiprocessing as mp import shutil from itertools import chain @@ -535,8 +536,12 @@ def ecdf_plot(self): ax = fig.add_subplot(1, 1, 1) for classname in data: values = data.loc[:, classname] + values = np.sort(values) + # provide only enough data points to plot a smooth graph + nbins = 16 * fig.dpi * 10 + values = values[::max(len(values) // int(nbins), 1)] levels = np.linspace(1. / len(values), 1, len(values)) - ax.step(sorted(values), levels, where='post') + ax.step(values, levels, where='post') self.make_legend(ax) ax.set_title("Empirical Cumulative Distribution Function") ax.set_xlabel("Time") @@ -565,28 +570,23 @@ def diff_ecdf_plot(self): start_time = time.time() print("[i] Generating ECDF plots of differences") data = self.load_data() - fig = Figure(figsize=(16, 12)) - canvas = FigureCanvas(fig) - axes = fig.add_subplot(1, 1, 1) classnames = iter(data) base = next(classnames) base_data = data.loc[:, base] # parameters for the zoomed-in graphs of ecdf - zoom_params = OrderedDict([("98", (0.01, 0.99)), + zoom_params = OrderedDict([("", (0, 1)), + ("98", (0.01, 0.99)), ("33", (0.33, 0.66)), ("10", (0.45, 0.55))]) zoom_values = OrderedDict((name, [float("inf"), float("-inf")]) for name in zoom_params.keys()) + # calculate the params for ECDF graphs for classname in classnames: - # calculate the ECDF values = data.loc[:, classname] - levels = np.linspace(1. / len(values), 1, len(values)) - values = sorted(values-base_data) - axes.step(values, levels, where='post') + values = values-base_data - # calculate the bounds for the zoom positions quantiles = np.quantile(values, list(chain(*zoom_params.values()))) quantiles = iter(quantiles) for low, high, name in \ @@ -594,34 +594,56 @@ def diff_ecdf_plot(self): zoom_values[name][0] = min(zoom_values[name][0], low) zoom_values[name][1] = max(zoom_values[name][1], high) - fig.legend(list("{0}-0".format(i) - for i in range(1, len(list(values)))), - ncol=6, - loc='upper center', - bbox_to_anchor=(0.5, -0.05)) - axes.set_title("Empirical Cumulative Distribution Function of " - "class differences") - axes.set_xlabel("Time") - axes.set_ylabel("Cumulative probability") - - formatter = mpl.ticker.EngFormatter('s') - axes.get_xaxis().set_major_formatter(formatter) - - canvas.print_figure(join(self.output, "diff_ecdf_plot.png"), - bbox_inches="tight") - - # now graph progressive zooms of the central portion - for name, quantiles, values in \ + for name, quantiles, zoom_val in \ zip(zoom_params.keys(), zoom_params.values(), zoom_values.values()): - axes.set_ylim(quantiles) - # make the bounds a little weaker so that the extreme positions - # are visible of graph too - axes.set_xlim([values[0]*0.98, values[1]*1.02]) - canvas.print_figure(join(self.output, - "diff_ecdf_plot_zoom_in_{0}.png" - .format(name)), - bbox_inches="tight") + fig = Figure(figsize=(16, 12)) + canvas = FigureCanvas(fig) + axes = fig.add_subplot(1, 1, 1) + + # rewind the iterator + classnames = iter(data) + next(classnames) + + for classname in classnames: + # calculate the ECDF + values = data.loc[:, classname] + values = np.sort(values-base_data) + # provide only enough data points to plot a smooth graph + nbins = 16 * fig.dpi + min_pos = int(len(values) * quantiles[0]) + max_pos = int(math.ceil(len(values) * quantiles[1])) + values = values[min_pos:max_pos: + max((max_pos-min_pos) // int(nbins), 1)] + levels = np.linspace(quantiles[0], quantiles[1], + len(values)) + axes.step(values, levels, where='post') + + fig.legend(list("{0}-0".format(i) + for i in range(1, len(list(values)))), + ncol=6, + loc='upper center', + bbox_to_anchor=(0.5, -0.05)) + axes.set_title("Empirical Cumulative Distribution Function of " + "class differences") + axes.set_xlabel("Time") + axes.set_ylabel("Cumulative probability") + + formatter = mpl.ticker.EngFormatter('s') + axes.get_xaxis().set_major_formatter(formatter) + + if not name: + canvas.print_figure(join(self.output, "diff_ecdf_plot.png"), + bbox_inches="tight") + else: + axes.set_ylim(quantiles) + # make the bounds a little weaker so that the extreme positions + # are visible of graph too + axes.set_xlim([zoom_val[0]*0.98, zoom_val[1]*1.02]) + canvas.print_figure(join(self.output, + "diff_ecdf_plot_zoom_in_{0}.png" + .format(name)), + bbox_inches="tight") if self.verbose: print("[i] ECDF plots of differences done in {:.3}s".format(