Skip to content

Commit

Permalink
Merge pull request #904 from GeorgePantelakis/minerva-improvements
Browse files Browse the repository at this point in the history
Fix double conf interval calculations in bit-size analysis code
  • Loading branch information
tomato42 committed Feb 2, 2024
2 parents 4f7e472 + c7c3b5f commit 2dbbc24
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 88 deletions.
34 changes: 22 additions & 12 deletions tests/test_tlsfuzzer_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -1045,6 +1045,7 @@ class TestMeasurementAnalysis(unittest.TestCase):
def setUp(self):
self.analysis = Analysis("/tmp", bit_size_analysis=True)

@mock.patch("tlsfuzzer.analysis.Analysis._calc_exact_values")
@mock.patch("tlsfuzzer.analysis.Analysis.conf_plot_for_all_k")
@mock.patch("tlsfuzzer.analysis.Analysis.graph_worst_pair")
@mock.patch("tlsfuzzer.analysis.Analysis.diff_scatter_plot")
Expand All @@ -1059,7 +1060,8 @@ def setUp(self):
def test_bit_size_measurement_analysis_main(self, open_mock, rmtree_mock,
dir_creation_mock, load_data_mock, rel_t_test_mock,
wilcoxon_test_mock, interval_plot_mock, ecdf_plot_mock,
scatter_plot_mock, worst_pair_mock, conf_plot_mock):
scatter_plot_mock, worst_pair_mock, conf_plot_mock,
calc_values_mock):

def file_selector(*args, **kwargs):
file_name = args[0]
Expand Down Expand Up @@ -1093,6 +1095,11 @@ class dotDict(dict):
binomtest_result = {"statistic": 0.5, "pvalue": 0.5}
binomtest_mock = mock.Mock()

calc_values_mock.return_value = {
"mean": 0.5, "median": 0.5, "trim_mean_05": 0.5,
"trim_mean_25": 0.5, "trim_mean_45": 0.5, "trimean": 0.5
}

try:
with mock.patch(
"tlsfuzzer.analysis.stats.binomtest", binomtest_mock
Expand All @@ -1109,8 +1116,9 @@ class dotDict(dict):
binomtest_mock.assert_called()
rel_t_test_mock.assert_called()
wilcoxon_test_mock.assert_called()
calc_values_mock.assert_called()

@mock.patch("tlsfuzzer.analysis.Analysis.calc_diff_conf_int")
@mock.patch("tlsfuzzer.analysis.Analysis._calc_exact_values")
@mock.patch("tlsfuzzer.analysis.Analysis.conf_plot_for_all_k")
@mock.patch("tlsfuzzer.analysis.Analysis.graph_worst_pair")
@mock.patch("tlsfuzzer.analysis.Analysis.diff_scatter_plot")
Expand All @@ -1126,7 +1134,7 @@ def test_bit_size_measurement_analysis_main_100_samples(self, open_mock,
rmtree_mock, dir_creation_mock, load_data_mock,
rel_t_test_mock, wilcoxon_test_mock, interval_plot_mock,
ecdf_plot_mock, scatter_plot_mock, worst_pair_mock,
conf_plot_mock, calc_diff_conf_mock):
conf_plot_mock, calc_values_mock):

def file_selector(*args, **kwargs):
file_name = args[0]
Expand All @@ -1146,6 +1154,11 @@ def file_selector(*args, **kwargs):
("\n0.5,0.4\n0.5,0.5\n0.4,0.5" * 20)
)(file_name, mode)

if "bootstrapped" in file_name:
return mock.mock_open(
read_data= "1,0" + ("\n0.4" * 100) + ("\n0.6" * 100)
)(file_name, mode)

return mock.mock_open(
read_data="0,256,3\n0,255,102\n0,254,103\n1,256,4\n1,254,104\n1,253,105"
)(file_name, mode)
Expand All @@ -1154,21 +1167,18 @@ def file_selector(*args, **kwargs):
dir_creation_mock.return_value = [256, 255, 254, 253]
rel_t_test_mock.return_value = {(0, 1): 0.5}
wilcoxon_test_mock.return_value = {(0, 1): 0.5}
calc_diff_conf_mock.return_value = {
"mean": [0.4, 0.5, 0.6],
"median": [0.4, 0.5, 0.6],
"trim_mean_05": [0.4, 0.5, 0.6],
"trim_mean_25": [0.4, 0.5, 0.6],
"trim_mean_45": [0.4, 0.5, 0.6],
"trimean": [0.4, 0.5, 0.6]
}

class dotDict(dict):
__getattr__ = dict.__getitem__

binomtest_result = {"statistic": 0.5, "pvalue": 0.5}
binomtest_mock = mock.Mock()

calc_values_mock.return_value = {
"mean": 0.5, "median": 0.5, "trim_mean_05": 0.5,
"trim_mean_25": 0.5, "trim_mean_45": 0.5, "trimean": 0.5
}

try:
with mock.patch(
"tlsfuzzer.analysis.stats.binomtest", binomtest_mock
Expand All @@ -1185,7 +1195,7 @@ class dotDict(dict):
binomtest_mock.assert_called()
rel_t_test_mock.assert_called()
wilcoxon_test_mock.assert_called()
calc_diff_conf_mock.assert_called()
calc_values_mock.assert_called()

@mock.patch("tlsfuzzer.analysis.FigureCanvas.print_figure")
@mock.patch("builtins.open")
Expand Down
147 changes: 71 additions & 76 deletions tlsfuzzer/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -803,6 +803,20 @@ def _bootstrap_differences(self, pair, reps=5000, status=None):
_diffs = None
return ret

def _calc_exact_values(self, diff):
mean = np.mean(diff)
q1, median, q3 = np.quantile(diff, [0.25, 0.5, 0.75])
trim_mean_05 = stats.trim_mean(diff, 0.05, 0)
trim_mean_25 = stats.trim_mean(diff, 0.25, 0)
trim_mean_45 = stats.trim_mean(diff, 0.45, 0)
trimean = (q1 + 2*median + q3)/4

return {"mean": mean, "median": median,
"trim_mean_05": trim_mean_05,
"trim_mean_25": trim_mean_25,
"trim_mean_45": trim_mean_45,
"trimean": trimean}

def calc_diff_conf_int(self, pair, reps=5000, ci=0.95):
"""
Bootstrap a confidence interval for the central tendencies of
Expand Down Expand Up @@ -840,21 +854,9 @@ def calc_diff_conf_int(self, pair, reps=5000, ci=0.95):

data = self.load_data()
diff = data.iloc[:, pair[1]] - data.iloc[:, pair[0]]
mean = np.mean(diff)
q1, median, q3 = np.quantile(diff, [0.25, 0.5, 0.75])
trim_mean_05 = stats.trim_mean(diff, 0.05, 0)
trim_mean_25 = stats.trim_mean(diff, 0.25, 0)
trim_mean_45 = stats.trim_mean(diff, 0.45, 0)
trimean = (q1 + 2*median + q3)/4
exact_values = self._calc_exact_values(diff)

quantiles = [(1-ci)/2, 1-(1-ci)/2]

exact_values = {"mean": mean, "median": median,
"trim_mean_05": trim_mean_05,
"trim_mean_25": trim_mean_25,
"trim_mean_45": trim_mean_45,
"trimean": trimean}

ret = {}
for key, value in exact_values.items():
calc_quant = np.quantile(cent_tend[key], quantiles)
Expand Down Expand Up @@ -1716,7 +1718,30 @@ def analyze_bit_sizes(self):
"K size of {0}: {1}\n".format(k_size, results[(0, 1)])
)

# Creating graphs
self.conf_interval_plot()
self.diff_ecdf_plot()
self.diff_scatter_plot()
try:
self.graph_worst_pair(testPair)
except AssertionError:
if self.verbose:
print(
"[i] Couldn't create worst pair graph.".format(
k_size
)
)

# Bootstrap test
methods = {
"mean": "Mean",
"median": "Median",
"trim_mean_05": "Trimmed mean (5%)",
"trim_mean_25": "Trimmed mean (25%)",
"trim_mean_45": "Trimmed mean (45%)",
"trimean": "Trimean"
}

if k_size == max_k_size:
output_files['bootstrap_test'].write(
"For K size {0} (sanity) ({1} samples):\n".format(
Expand All @@ -1732,75 +1757,45 @@ def analyze_bit_sizes(self):
)
)

data = self.load_data()
diff = data.iloc[:, 1] - data.iloc[:, 0]
exact_values = self._calc_exact_values(diff)

if samples > 50:
results = self.calc_diff_conf_int(testPair, ci=0.95)
print_results = lambda result: \
"{0}s, 95% CI: {1}s, {2}s (±{3}s)"\
.format(
result[1], result[0], result[2],
(result[2] - result[0])
)
output_files['bootstrap_test'].write(
"Mean of differences: {0}\n".format(
print_results(results['mean'])
)
)
output_files['bootstrap_test'].write(
"Median of differences: {0}\n".format(
print_results(results['median'])
)
)
output_files['bootstrap_test'].write(
"Trimmed mean (5%) of differences: {0}\n".format(
print_results(results['trim_mean_05'])
)
)
output_files['bootstrap_test'].write(
"Trimmed mean (25%) of differences: {0}\n".format(
print_results(results['trim_mean_25'])
)
)
output_files['bootstrap_test'].write(
"Trimmed mean (45%) of differences: {0}\n".format(
print_results(results['trim_mean_45'])
)
)
output_files['bootstrap_test'].write(
"Trimean of differences: {0}\n\n".format(
print_results(results['trimean'])
if self.verbose:
print("[i] Reusing bootstraps to calculate 95% CI")

for method, human_readable in methods.items():
results = []
with open(join(
self.output, "bootstrapped_{0}.csv".format(method)
)) as fp:
results = fp.readlines()[1:]

results = list(map(lambda x: float(x), results))
calc_quant = np.quantile(results, [0.025, 0.975])

output_files['bootstrap_test'].write(
"{0} of differences: ".format(human_readable) +
"{0}s, 95% CI: {1}s, {2}s (±{3}s)\n"
.format(
exact_values[method], calc_quant[0],
calc_quant[1], (calc_quant[1] - calc_quant[0])
)
)
)
output_files['bootstrap_test'].write("\n")
else:
diffs = []

with open(
join(self.output, "timing.csv")
) as in_fp:
in_csv = csv.reader(in_fp)
next(in_csv)
for row in in_csv:
diffs.append(float(row[1]) - float(row[0]))

output_files['bootstrap_test'].write(
"Median of differences: {0}s\n\n"\
.format(np.mean(diffs) * 1e-9)
)

# Creating graphs
if self.verbose:
print('Creating graphs for k size {0}...'.format(k_size))
self.conf_interval_plot()
self.diff_ecdf_plot()
self.diff_scatter_plot()
try:
self.graph_worst_pair(testPair)
except AssertionError:
if self.verbose:
print(
"K size {0}: Couldn't create worst pair graph.".format(
k_size
print("[i] Not enough data to perform reliable "
"bootstraping ({0} observations)".format(samples))

for method, human_readable in methods.items():
output_files['bootstrap_test'].write(
"{0} of differences: {1}s\n".format(
human_readable, exact_values[method]
)
)
output_files['bootstrap_test'].write("\n")

for key in output_files:
output_files[key].close()
Expand Down

0 comments on commit 2dbbc24

Please sign in to comment.