Merge pull request #689 from tomato42/fast-analysis

Faster and more detailed analysis
tlsfuzzer · Aug 12, 2020 · 7fe42c0 · 7fe42c0
2 parents b1da057 + 33fe043
commit 7fe42c0
Show file tree

Hide file tree

Showing 11 changed files with 652 additions and 87 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -102,6 +102,7 @@ before_install:
   - chmod +x ./cc-test-reporter
 
 install:
+  - pip list
   - if [[ -e build-requirements-${TRAVIS_PYTHON_VERSION}.txt ]]; then travis_retry pip install -r build-requirements-${TRAVIS_PYTHON_VERSION}.txt; else travis_retry pip install -r build-requirements.txt; fi
   - |
       if [[ $EXTRACT_DEP == 'true' ]]; then
@@ -115,6 +116,7 @@ install:
   - travis_retry pip install -r requirements.txt
   # codeclimate supports natively just one set of results, so use the most recent python for that
   - if [[ $CC_REPORT == 'true' ]]; then ./cc-test-reporter before-build; fi
+  - pip list
 
 script:
   - |

diff --git a/build-requirements-2.6.txt b/build-requirements-2.6.txt
@@ -8,3 +8,4 @@ coveralls<1.3.0
 pylint
 diff_cover
 idna<2.8
+mock==2.0.0
diff --git a/build-requirements-3.3.txt b/build-requirements-3.3.txt
@@ -5,3 +5,4 @@ diff_cover<2.5.0
 typed_ast<1.3.0
 isort<4.3.5
 inflect<4.0.0
+mock>2.0.0,<4.0.0
diff --git a/build-requirements.txt b/build-requirements.txt
@@ -2,3 +2,4 @@ coverage
 coveralls
 pylint
 diff_cover
+mock>2.0.0
diff --git a/docs/source/timing-analysis.rst b/docs/source/timing-analysis.rst
@@ -277,6 +277,12 @@ file.
    PYTHONPATH=. python tlsfuzzer/analysis.py -o "/tmp/results"
 
 
+With large sample sizes, to avoid exhausting available memory and to speed up
+the analysis, you can skip the generation of some graphs using the
+``--no-ecdf-plot``, ``--no-scatter-plot`` and ``--no-conf-interval-plot``.
+That last option disables generation of the ``bootstrapped_means.csv`` file
+too.
+
 External timing data
 --------------------
 
@@ -309,6 +315,37 @@ file:
    PYTHONPATH=. python tlsfuzzer/analysis.py -o "/tmp/results"
 
 
+Combining results from multiple runs
+------------------------------------
+
+You can use the ``combine.py`` script to combine the results from runs.
+
+The script checks if the set of executed probes match in all the files,
+but you need to ensure that the environments of the test execution match
+too.
+
+To combine the runs, provide the output directory (``out-dir`` here) and
+paths to one or more ``timing.csv`` files:
+
+.. code:: bash
+
+   PYTHONPATH=. python tlsfuzzer/combine.py -o out-dir \
+   in_1596892760/timing.csv in_1596892742/timing.csv
+
+.. warning::
+
+   The script overwrites the ``timing.csv`` in the output directory!
+
+After combining the ``timing.csv`` files, execute analysis as usual.
+
+.. tip::
+
+   ``combine.py`` is the only script able to read the old format of
+   ``timing.csv`` files. Use it with a single input file to covert from
+   old file format (where all results for a given probe ware listed in a single
+   line) to the new file format (where all results for a given probe are
+   in a single column)
+
 Interpreting the results
 ========================
 

diff --git a/tests/test_tlsfuzzer_analysis.py b/tests/test_tlsfuzzer_analysis.py
@@ -25,66 +25,70 @@
 class TestReport(unittest.TestCase):
     def setUp(self):
         data = {
-            0: ["A", 0.000758129, 0.000696719, 0.000980079, 0.000988900, 0.000875509,
+            'A': [0.000758129, 0.000696719, 0.000980079, 0.000988900, 0.000875509,
                 0.000734843, 0.000754852, 0.000667378, 0.000671230, 0.000790935],
-            1: ["B", 0.000758130, 0.000696718, 0.000980080, 0.000988899, 0.000875510,
+            'B': [0.000758130, 0.000696718, 0.000980080, 0.000988899, 0.000875510,
                 0.000734843, 0.000754852, 0.000667378, 0.000671230, 0.000790935],
-            2: ["C", 0.000758131, 0.000696717, 0.000980081, 0.000988898, 0.000875511,
+            'C': [0.000758131, 0.000696717, 0.000980081, 0.000988898, 0.000875511,
                 0.000734843, 0.000754852, 0.000667378, 0.000671230, 0.000790935]
         }
-        self.neq_data = {
-            0: ["A", 0.000758130, 0.000696718, 0.000980080, 0.000988899, 0.000875510,
+        self.neq_data = pd.DataFrame(data={
+            'A': [0.000758130, 0.000696718, 0.000980080, 0.000988899, 0.000875510,
                 0.000734843, 0.000754852, 0.000667378, 0.000671230, 0.000790935],
-            1: ["B", 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
-        }
-        self.neq_data_overlap = {
-            0: ["A", 0, 0, 1, 7, 7] + [7] * 95,
-            1: ["B", 0, 0, 2, 6, 7] + [7] * 95,
-        }
+            'B': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
+        })
+        self.neq_data_overlap = pd.DataFrame(data={
+            'A': [0, 0, 1, 7, 7] + [7] * 95,
+            'B': [0, 0, 2, 6, 7] + [7] * 95,
+        })
         timings = pd.DataFrame(data=data)
         self.mock_read_csv = mock.Mock(spec=pd.read_csv)
-        self.mock_read_csv.return_value = timings.transpose()
+        self.mock_read_csv.return_value = timings
 
     def test_report(self):
         with mock.patch("tlsfuzzer.analysis.pd.read_csv", self.mock_read_csv):
             with mock.patch("tlsfuzzer.analysis.Analysis.ecdf_plot") as mock_ecdf:
                 with mock.patch("tlsfuzzer.analysis.Analysis.box_plot") as mock_box:
                     with mock.patch("tlsfuzzer.analysis.Analysis.scatter_plot") as mock_scatter:
-                        with mock.patch("__main__.__builtins__.open", mock.mock_open()) as mock_open:
-                            analysis = Analysis("/tmp")
-                            ret = analysis.generate_report()
-
-                            self.mock_read_csv.assert_called_once()
-                            #mock_ecdf.assert_called_once()
-                            #mock_box.assert_called_once()
-                            #mock_scatter.assert_called_once()
-                            # we're writing to report.csv, legend.csv, and
-                            # report.txt
-                            self.assertEqual(mock_open.call_count, 3)
-                            self.assertEqual(ret, 0)
+                        with mock.patch("tlsfuzzer.analysis.Analysis.conf_interval_plot") as mock_conf_int:
+                            with mock.patch("__main__.__builtins__.open", mock.mock_open()) as mock_open:
+                                with mock.patch("builtins.print"):
+                                    analysis = Analysis("/tmp")
+                                    ret = analysis.generate_report()
+
+                                    self.mock_read_csv.assert_called_once()
+                                    #mock_ecdf.assert_called_once()
+                                    #mock_box.assert_called_once()
+                                    #mock_scatter.assert_called_once()
+                                    # we're writing to report.csv, legend.csv, and
+                                    # report.txt
+                                    self.assertEqual(mock_open.call_count, 3)
+                                    self.assertEqual(ret, 0)
 
     def test_report_neq(self):
         timings = pd.DataFrame(data=self.neq_data)
         mock_read_csv = mock.Mock(spec=pd.read_csv)
-        mock_read_csv.return_value = timings.transpose()
+        mock_read_csv.return_value = timings
         with mock.patch("tlsfuzzer.analysis.pd.read_csv", mock_read_csv):
             with mock.patch("tlsfuzzer.analysis.Analysis.ecdf_plot") as mock_ecdf:
                 with mock.patch("tlsfuzzer.analysis.Analysis.box_plot") as mock_box:
                     with mock.patch("tlsfuzzer.analysis.Analysis.scatter_plot") as mock_scatter:
-                        with mock.patch("__main__.__builtins__.open", mock.mock_open()) as mock_open:
-                            analysis = Analysis("/tmp")
-                            ret = analysis.generate_report()
-
-                            mock_read_csv.assert_called_once()
-                            #mock_ecdf.assert_called_once()
-                            #mock_box.assert_called_once()
-                            #mock_scatter.assert_called_once()
-                            # we're writing to report.csv, legend.csv,
-                            # and report.txt
-                            self.assertEqual(mock_open.call_count, 3)
-                            self.assertEqual(ret, 1)
-
-    def test_ks_test(self):
+                        with mock.patch("tlsfuzzer.analysis.Analysis.conf_interval_plot") as mock_conf_int:
+                            with mock.patch("__main__.__builtins__.open", mock.mock_open()) as mock_open:
+                                with mock.patch("builtins.print"):
+                                    analysis = Analysis("/tmp")
+                                    ret = analysis.generate_report()
+
+                                    mock_read_csv.assert_called_once()
+                                    #mock_ecdf.assert_called_once()
+                                    #mock_box.assert_called_once()
+                                    #mock_scatter.assert_called_once()
+                                    # we're writing to report.csv, legend.csv,
+                                    # and report.txt
+                                    self.assertEqual(mock_open.call_count, 3)
+                                    self.assertEqual(ret, 1)
+
+    def test_wilcoxon_test(self):
         with mock.patch("tlsfuzzer.analysis.pd.read_csv", self.mock_read_csv):
             analysis = Analysis("/tmp")
             self.mock_read_csv.assert_called_once()
@@ -107,7 +111,7 @@ def test_box_test(self):
     def test_box_test_neq(self):
         timings = pd.DataFrame(data=self.neq_data)
         mock_read_csv = mock.Mock(spec=pd.read_csv)
-        mock_read_csv.return_value = timings.transpose()
+        mock_read_csv.return_value = timings
         with mock.patch("tlsfuzzer.analysis.pd.read_csv", mock_read_csv):
             analysis = Analysis("/tmp")
 
@@ -119,7 +123,7 @@ def test_box_test_neq(self):
     def test_box_test_neq_overlap(self):
         timings = pd.DataFrame(data=self.neq_data_overlap)
         mock_read_csv = mock.Mock(spec=pd.read_csv)
-        mock_read_csv.return_value = timings.transpose()
+        mock_read_csv.return_value = timings
         with mock.patch("tlsfuzzer.analysis.pd.read_csv", mock_read_csv):
             analysis = Analysis("/tmp")
             mock_read_csv.assert_called_once()
@@ -129,20 +133,48 @@ def test_box_test_neq_overlap(self):
             for index, result in res.items():
                 self.assertEqual(result, None)
 
+    def test__mean_of_random_sample(self):
+        diffs = [1, 2, 3, 4, 5, 6, 7, 8, 9]
+        timings = pd.DataFrame(data=self.neq_data_overlap)
+        mock_read_csv = mock.Mock(spec=pd.read_csv)
+        mock_read_csv.return_value = timings
+        with mock.patch("tlsfuzzer.analysis.pd.read_csv", mock_read_csv):
+            with mock.patch("tlsfuzzer.analysis._diffs", diffs):
+                analysis = Analysis("/tmp")
+                vals = analysis._mean_of_random_sample(10)
+
+                self.assertEqual(len(vals), 10)
+                avg = sum(vals)/len(vals)
+                self.assertLessEqual(avg, 8)
+                self.assertLessEqual(2, avg)
+
+    def test__mean_of_random_sample_with_no_reps(self):
+        diffs = [1, 2, 3, 4, 5, 6, 7, 8, 9]
+        timings = pd.DataFrame(data=self.neq_data_overlap)
+        mock_read_csv = mock.Mock(spec=pd.read_csv)
+        mock_read_csv.return_value = timings
+        with mock.patch("tlsfuzzer.analysis.pd.read_csv", mock_read_csv):
+            with mock.patch("tlsfuzzer.analysis._diffs", diffs):
+                analysis = Analysis("/tmp")
+                vals = analysis._mean_of_random_sample(0)
+
+                self.assertEqual(len(vals), 0)
+                self.assertEqual(vals, [])
+
 
 @unittest.skipIf(failed_import,
                  "Could not import analysis. Skipping related tests.")
 class TestPlots(unittest.TestCase):
     def setUp(self):
         data = {
-            0: ["A", 0.000758130, 0.000696718, 0.000980080, 0.000988899, 0.000875510,
+            'A': [0.000758130, 0.000696718, 0.000980080, 0.000988899, 0.000875510,
                 0.000734843, 0.000754852, 0.000667378, 0.000671230, 0.000790935],
-            1: ["B", 0.000758130, 0.000696718, 0.000980080, 0.000988899, 0.000875510,
+            'B': [0.000758130, 0.000696718, 0.000980080, 0.000988899, 0.000875510,
                 0.000734843, 0.000754852, 0.000667378, 0.000671230, 0.000790935]
         }
         timings = pd.DataFrame(data=data)
         mock_read_csv = mock.Mock(spec=pd.read_csv)
-        mock_read_csv.return_value = timings.transpose()
+        mock_read_csv.return_value = timings
         with mock.patch("tlsfuzzer.analysis.pd.read_csv", mock_read_csv):
             self.analysis = Analysis("/tmp")
 
@@ -164,6 +196,14 @@ def test_box_plot(self):
             self.analysis.box_plot()
             mock_save.assert_called_once()
 
+    def test_conf_interval_plot(self):
+        with mock.patch("tlsfuzzer.analysis.FigureCanvas.print_figure",
+                        mock.Mock()) as mock_save:
+            with mock.patch("__main__.__builtins__.open", mock.mock_open())\
+                    as mock_open:
+                self.analysis.conf_interval_plot()
+                mock_save.assert_called_once()
+
 
 @unittest.skipIf(failed_import,
                  "Could not import analysis. Skipping related tests.")
@@ -178,7 +218,21 @@ def test_command_line(self):
                 with mock.patch("sys.argv", args):
                     main()
                     mock_report.assert_called_once()
-                    mock_init.assert_called_once_with(output)
+                    mock_init.assert_called_once_with(output, True, True, True)
+
+    def test_call_with_no_plots(self):
+        output = "/tmp"
+        args = ["analysis.py", "-o", output, "--no-ecdf-plot",
+                "--no-scatter-plot", "--no-conf-interval-plot"]
+        mock_init = mock.Mock()
+        mock_init.return_value = None
+        with mock.patch('tlsfuzzer.analysis.Analysis.generate_report') as mock_report:
+            with mock.patch('tlsfuzzer.analysis.Analysis.__init__', mock_init):
+                with mock.patch("sys.argv", args):
+                    main()
+                    mock_report.assert_called_once()
+                    mock_init.assert_called_once_with(
+                        output, False, False, False)
 
     def test_help(self):
         args = ["analysis.py", "--help"]