Merge pull request #686 from tomato42/external-time-source

External time source
tlsfuzzer · Aug 5, 2020 · 88eac35 · 88eac35
2 parents 76f69c1 + da1ac45
commit 88eac35
Show file tree

Hide file tree

Showing 7 changed files with 207 additions and 21 deletions.
diff --git a/.gitignore b/.gitignore
@@ -49,9 +49,6 @@ pylint_report.txt
 *.mo
 *.pot
 
-# Django stuff:
-*.log
-
 # Sphinx documentation
 docs/_build/
 epydoc/

diff --git a/docs/source/timing-analysis.rst b/docs/source/timing-analysis.rst
@@ -265,7 +265,7 @@ script. Resulting file will be outputted to the specified folder.
 .. code:: bash
 
    PYTHONPATH=. python tlsfuzzer/extract.py -h localhost -p 4433 \
-   -c capture.pcap -l class.log -o /tmp/results/
+   -c capture.pcap -l log.csv -o /tmp/results/
 
 Timing runner will also launch analysis, if its dependencies are available.
 Again, in case you need to run it later, you can do that by providing the
@@ -274,7 +274,39 @@ file.
 
 .. code:: bash
 
-   PYTHONPATH=. python tlsfuzzer/analysis.py -o "/tmp"
+   PYTHONPATH=. python tlsfuzzer/analysis.py -o "/tmp/results"
+
+
+External timing data
+--------------------
+
+The ``extract.py`` can also process data collected by some external source
+(be it packet capture closer to server under test or an internal probe
+inside the server).
+
+The provided csv file must have a header and one column. While the file
+can contain additional data points at the beginning, the very last
+data point must correspond to the last connection made by tlsfuzzer.
+
+Place such file in the directory (in this example named ``timings-log.csv``)
+with the ``log.csv`` file and execute:
+
+.. code:: bash
+
+   PYTHONPATH=. python tlsfuzzer/extract.py -l /tmp/results/log.csv \
+   -o /tmp/results --raw-times /tmp/results/timings-log.csv
+
+.. warning::
+
+   The above mentioned command will overrite the timings extracted from the
+   ``capture.pcap`` file!
+
+Then run ``analysis.py`` as in the case of data extracted from ``capture.pcap``
+file:
+
+.. code:: bash
+   PYTHONPATH=. python tlsfuzzer/analysis.py -o "/tmp/results"
+
 
 Interpreting the results
 ========================

diff --git a/tests/test.log b/tests/test.log
@@ -0,0 +1,11 @@
+A,B
+1,0
+0,1
+1,0
+0,1
+0,1
+0,1
+0,1
+1,0
+1,0
+1,0
diff --git a/tests/test_tlsfuzzer_extract.py b/tests/test_tlsfuzzer_extract.py
@@ -51,13 +51,15 @@ def test_invalid_hostname(self):
                  "Could not import extraction. Skipping related tests.")
 class TestExtraction(unittest.TestCase):
     def setUp(self):
-        self.logfile = "test.log"
+        self.logfile = join(dirname(abspath(__file__)), "test.log")
         log_content = "A,B\n1,0\n0,1\n1,0\n0,1\n0,1\n0,1\n0,1\n1,0\n1,0\n1,0\n"
         self.expected = (
             "B,0.000747009,0.000920462,0.001327954,0.000904547,0.000768453,"
             "0.000752226,0.000862102,0.000706491,0.000668237,0.000992733\n"
             "A,0.000758130,0.000696718,0.000980080,0.000988899,0.000875510,"
             "0.000734843,0.000754852,0.000667378,0.000671230,0.000790935\n")
+        self.time_vals = "\n".join(["some random header"] +
+                                   list(str(i) for i in range(20)))
         # fix mock not supporting iterators
         self.mock_log = mock.mock_open(read_data=log_content)
         self.mock_log.return_value.__iter__ = lambda s: iter(s.readline, '')
@@ -66,13 +68,25 @@ def setUp(self):
             self.log = Log(self.logfile)
             self.log.read_log()
 
+    def test_extraction_from_external_time_source(self):
+        extract = Extract(self.log, None, "/tmp", None, None,
+                          join(dirname(abspath(__file__)), "times-log.csv"))
+        extract.parse()
+
+        with mock.patch('__main__.__builtins__.open', mock.mock_open()) as mock_file:
+            mock_file.return_value.write.side_effect = lambda s: self.assertIn(
+                s.strip(), self.expected.splitlines())
+            extract.write_csv('timing.csv')
+
     def test_extraction(self):
-        extract = Extract(self.log, join(dirname(abspath(__file__)), "capture.pcap"), "/tmp", "localhost", 4433)
+        extract = Extract(self.log,
+                          join(dirname(abspath(__file__)), "capture.pcap"),
+                          "/tmp", "localhost", 4433)
         extract.parse()
 
         with mock.patch('__main__.__builtins__.open', mock.mock_open()) as mock_file:
-            mock_file.return_value.write.side_effect = lambda s: self.assertTrue(
-                s.strip() in self.expected.splitlines())
+            mock_file.return_value.write.side_effect = lambda s: self.assertIn(
+                s.strip(), self.expected.splitlines())
             extract.write_csv('timing.csv')
 
 
@@ -100,7 +114,30 @@ def test_command_line(self):
                         with mock.patch("sys.argv", args):
                             main()
                             mock_log.assert_called_once_with(logfile)
-                            mock_init.assert_called_once_with(mock.ANY, capture, output, host, int(port))
+                            mock_init.assert_called_once_with(
+                                mock.ANY, capture, output, host, int(port),
+                                None)
+
+    def test_raw_times(self):
+        raw_times = "times-log.csv"
+        logfile = "log.csv"
+        output = "/tmp"
+        args = ["extract.py",
+                "-l", logfile,
+                "-o", output,
+                "--raw-times", raw_times]
+        mock_init = mock.Mock()
+        mock_init.return_value = None
+        with mock.patch('tlsfuzzer.extract.Extract.parse'):
+            with mock.patch('tlsfuzzer.extract.Extract.__init__', mock_init):
+                with mock.patch('tlsfuzzer.extract.Extract.write_csv'):
+                    with mock.patch('tlsfuzzer.extract.Log') as mock_log:
+                        with mock.patch("sys.argv", args):
+                            main()
+                            mock_log.assert_called_once_with(logfile)
+                            mock_init.assert_called_once_with(
+                                mock.ANY, None, output, None, None,
+                                raw_times)
 
     def test_help(self):
         args = ["extract.py", "--help"]
@@ -117,4 +154,24 @@ def test_help_msg(self):
     def test_missing_output(self):
         args = ["extract.py"]
         with mock.patch("sys.argv", args):
-            self.assertRaises(ValueError, main)
+            self.assertRaises(SystemExit, main)
+
+    def test_incompatible_options(self):
+        args = ["extract.py", "-c", "capture.pcap", "--raw-times",
+                "times-log.csv"]
+        with mock.patch("sys.argv", args):
+            with self.assertRaises(ValueError):
+                main()
+
+    def test_incomplete_packet_capture_options(self):
+        args = ["extract.py", "-c", "capture.pcap", "-l", "log.csv",
+                "-o", "/tmp"]
+        with mock.patch("sys.argv", args):
+            with self.assertRaises(ValueError):
+                main()
+
+    def test_incomplete_ext_times_options(self):
+        args = ["extract.py", "--raw-times", "times-log.csv", "-o", "/tmp"]
+        with mock.patch("sys.argv", args):
+            with self.assertRaises(ValueError):
+                main()
diff --git a/tests/times-log.csv b/tests/times-log.csv
@@ -0,0 +1,21 @@
+Random connection times
+0.000747009
+0.000758130
+0.000696718
+0.000920462
+0.001327954
+0.000980080
+0.000988899
+0.000904547
+0.000875510
+0.000768453
+0.000734843
+0.000752226
+0.000754852
+0.000862102
+0.000706491
+0.000667378
+0.000668237
+0.000671230
+0.000992733
+0.000790935
diff --git a/tlsfuzzer/analysis.py b/tlsfuzzer/analysis.py
@@ -133,8 +133,8 @@ def box_plot(self):
         ax = fig.add_subplot(1, 1, 1)
 
         self.data.boxplot(ax=ax, grid=False, showfliers=False)
-        ax.set_xticks(list(range(len(self.data))))
-        ax.set_xticklabels(list(range(len(self.data))))
+        ax.set_xticks(list(range(len(self.data.columns)+1)))
+        ax.set_xticklabels([''] + list(range(len(self.data.columns))))
 
         ax.set_title("Box plot")
         ax.set_ylabel("Time [s]")

diff --git a/tlsfuzzer/extract.py b/tlsfuzzer/extract.py
@@ -23,11 +23,18 @@ def help_msg():
     """Print help message."""
     print("Usage: extract [-l logfile] [-c capture] [[-o output] ...]")
     print(" -l logfile     Filename of the timing log (required)")
-    print(" -c capture     Packet capture of the test run (required)")
+    print(" -c capture     Packet capture of the test run")
     print(" -o output      Directory where to place results (required)")
-    print(" -h host        TLS server host or ip (required)")
-    print(" -p port        TLS server port (required)")
+    print(" -h host        TLS server host or ip")
+    print(" -p port        TLS server port")
+    print(" --raw-times FILE Read the timings from an external file, not")
+    print("                the packet capture")
     print(" --help         Display this message")
+    print("")
+    print("When extracting data from a capture file, specifying the capture")
+    print("file, host and port is necessary.")
+    print("When using the external timing source, only it, and the always")
+    print("required options: logfile and output dir are necessary.")
 
 
 def main():
@@ -37,9 +44,16 @@ def main():
     output = None
     ip_address = None
     port = None
+    raw_times = None
+
 
     argv = sys.argv[1:]
-    opts, args = getopt.getopt(argv, "l:c:h:p:o:t:", ["help"])
+
+    if not argv:
+        help_msg()
+        sys.exit(1)
+
+    opts, args = getopt.getopt(argv, "l:c:h:p:o:t:", ["help", "raw-times="])
     for opt, arg in opts:
         if opt == '-l':
             logfile = arg
@@ -51,24 +65,35 @@ def main():
             ip_address = arg
         elif opt == '-p':
             port = int(arg)
+        elif opt == "--raw-times":
+            raw_times = arg
         elif opt == "--help":
             help_msg()
             sys.exit(0)
 
-    if not all([logfile, capture, output, ip_address, port]):
+    if raw_times and capture:
+        raise ValueError(
+            "Can't specify both a capture file and external timing log")
+
+    if not all([logfile, output]):
+        raise ValueError(
+            "Specifying logfile and output is mandatory")
+
+    if capture and not all([logfile, output, ip_address, port]):
         raise ValueError("Some arguments are missing!")
 
     log = Log(logfile)
     log.read_log()
-    analysis = Extract(log, capture, output, ip_address, port)
+    analysis = Extract(log, capture, output, ip_address, port, raw_times)
     analysis.parse()
     analysis.write_csv('timing.csv')
 
 
 class Extract:
     """Extract timing information from packet capture."""
 
-    def __init__(self, log, capture, output, ip_address, port):
+    def __init__(self, log, capture=None, output=None, ip_address=None,
+                 port=None, raw_times=None):
         """
         Initialises instance and sets up class name generator from log.
 
@@ -80,12 +105,13 @@ def __init__(self, log, capture, output, ip_address, port):
         """
         self.capture = capture
         self.output = output
-        self.ip_address = self.hostname_to_ip(ip_address)
+        self.ip_address = ip_address and self.hostname_to_ip(ip_address)
         self.port = port
         self.timings = defaultdict(list)
         self.client_message = None
         self.server_message = None
         self.warm_up_messages_left = WARM_UP
+        self.raw_times = raw_times
 
         # set up class names generator
         self.log = log
@@ -97,6 +123,48 @@ def parse(self):
         Extract timing information from capture file
         and associate it with class from log file.
         """
+        if self.capture:
+            return self._parse_pcap()
+        return self._parse_raw_times()
+
+    def _parse_raw_times(self):
+        """Classify already extracted times."""
+        # as unlike with capture file, we don't know how many sanity tests,
+        # manual checks, etc. were performed to the server before the
+        # timing tests were started, we don't know how many measurements to
+        # skip. Count the probes, the times, and then use the last len(probes)
+        # of times for classification
+
+        # do counting in memory efficient way
+        probe_count = sum(1 for _ in self.class_generator)
+        self.log.read_log()
+        self.class_generator = self.log.iterate_log()
+        with open(self.raw_times, 'r') as raw_times:
+            # skip the header line
+            raw_times.readline()
+            times_count = 0
+            for times_count, _ in enumerate(raw_times, 1):
+                pass
+        if probe_count > times_count:
+            raise ValueError(
+                "Insufficient number of times for provided log file")
+
+        self.warm_up_messages_left = times_count - probe_count
+
+        with open(self.raw_times, 'r') as raw_times:
+            # skip the header line
+            raw_times.readline()
+
+            for _ in range(self.warm_up_messages_left):
+                raw_times.readline()
+
+            for line in raw_times:
+                class_index = next(self.class_generator)
+                class_name = self.class_names[class_index]
+                self.timings[class_name].append(line.strip())
+
+    def _parse_pcap(self):
+        """Process capture file."""
         with open(self.capture, 'rb') as pcap:
             capture = dpkt.pcap.Reader(pcap)