Merge pull request #579 from dluyer/patch-2

Correctness fix (pull #578) plus efficiency improvements
pexpect · Jan 14, 2020 · f16add7 · f16add7
2 parents 692681f + ad4be95
commit f16add7
Show file tree

Hide file tree

Showing 6 changed files with 138 additions and 38 deletions.
diff --git a/pexpect/_async.py b/pexpect/_async.py
@@ -8,10 +8,7 @@
 def expect_async(expecter, timeout=None):
     # First process data that was previously read - if it maches, we don't need
     # async stuff.
-    previously_read = expecter.spawn.buffer
-    expecter.spawn._buffer = expecter.spawn.buffer_type()
-    expecter.spawn._before = expecter.spawn.buffer_type()
-    idx = expecter.new_data(previously_read)
+    idx = expecter.existing_data()
     if idx is not None:
         return idx
     if not expecter.spawn.async_pw_transport:

diff --git a/pexpect/expect.py b/pexpect/expect.py
@@ -6,45 +6,101 @@ class Expecter(object):
     def __init__(self, spawn, searcher, searchwindowsize=-1):
         self.spawn = spawn
         self.searcher = searcher
+        # A value of -1 means to use the figure from spawn, which should
+        # be None or a positive number.
         if searchwindowsize == -1:
             searchwindowsize = spawn.searchwindowsize
         self.searchwindowsize = searchwindowsize
+        self.lookback = None
+        if hasattr(searcher, 'longest_string'):
+            self.lookback = searcher.longest_string
 
-    def new_data(self, data):
+    def do_search(self, window, freshlen):
         spawn = self.spawn
         searcher = self.searcher
-
-        pos = spawn._buffer.tell()
-        spawn._buffer.write(data)
-        spawn._before.write(data)
-
-        # determine which chunk of data to search; if a windowsize is
-        # specified, this is the *new* data + the preceding <windowsize> bytes
-        if self.searchwindowsize:
-            spawn._buffer.seek(max(0, pos - self.searchwindowsize))
-            window = spawn._buffer.read(self.searchwindowsize + len(data))
-        else:
-            # otherwise, search the whole buffer (really slow for large datasets)
-            window = spawn.buffer
-        index = searcher.search(window, len(data))
+        if freshlen > len(window):
+            freshlen = len(window)
+        index = searcher.search(window, freshlen, self.searchwindowsize)
         if index >= 0:
             spawn._buffer = spawn.buffer_type()
             spawn._buffer.write(window[searcher.end:])
-            spawn.before = spawn._before.getvalue()[0:-(len(window) - searcher.start)]
+            spawn.before = spawn._before.getvalue()[
+                0:-(len(window) - searcher.start)]
             spawn._before = spawn.buffer_type()
-            spawn.after = window[searcher.start: searcher.end]
+            spawn._before.write(window[searcher.end:])
+            spawn.after = window[searcher.start:searcher.end]
             spawn.match = searcher.match
             spawn.match_index = index
             # Found a match
             return index
-        elif self.searchwindowsize:
-            spawn._buffer = spawn.buffer_type()
-            spawn._buffer.write(window)
+        elif self.searchwindowsize or self.lookback:
+            maintain = self.searchwindowsize or self.lookback
+            if spawn._buffer.tell() > maintain:
+                spawn._buffer = spawn.buffer_type()
+                spawn._buffer.write(window[-maintain:])
+
+    def existing_data(self):
+        # First call from a new call to expect_loop or expect_async.
+        # self.searchwindowsize may have changed.
+        # Treat all data as fresh.
+        spawn = self.spawn
+        before_len = spawn._before.tell()
+        buf_len = spawn._buffer.tell()
+        freshlen = before_len
+        if before_len > buf_len:
+            if not self.searchwindowsize:
+                spawn._buffer = spawn.buffer_type()
+                window = spawn._before.getvalue()
+                spawn._buffer.write(window)
+            elif buf_len < self.searchwindowsize:
+                spawn._buffer = spawn.buffer_type()
+                spawn._before.seek(
+                    max(0, before_len - self.searchwindowsize))
+                window = spawn._before.read()
+                spawn._buffer.write(window)
+            else:
+                spawn._buffer.seek(max(0, buf_len - self.searchwindowsize))
+                window = spawn._buffer.read()
+        else:
+            if self.searchwindowsize:
+                spawn._buffer.seek(max(0, buf_len - self.searchwindowsize))
+                window = spawn._buffer.read()
+            else:
+                window = spawn._buffer.getvalue()
+        return self.do_search(window, freshlen)
+
+    def new_data(self, data):
+        # A subsequent call, after a call to existing_data.
+        spawn = self.spawn
+        freshlen = len(data)
+        spawn._before.write(data)
+        if not self.searchwindowsize:
+            if self.lookback:
+                # search lookback + new data.
+                old_len = spawn._buffer.tell()
+                spawn._buffer.write(data)
+                spawn._buffer.seek(max(0, old_len - self.lookback))
+                window = spawn._buffer.read()
+            else:
+                # copy the whole buffer (really slow for large datasets).
+                spawn._buffer.write(data)
+                window = spawn.buffer
+        else:
+            if len(data) >= self.searchwindowsize or not spawn._buffer.tell():
+                window = data[-self.searchwindowsize:]
+                spawn._buffer = spawn.buffer_type()
+                spawn._buffer.write(window[-self.searchwindowsize:])
+            else:
+                spawn._buffer.write(data)
+                new_len = spawn._buffer.tell()
+                spawn._buffer.seek(max(0, new_len - self.searchwindowsize))
+                window = spawn._buffer.read()
+        return self.do_search(window, freshlen)
 
     def eof(self, err=None):
         spawn = self.spawn
 
-        spawn.before = spawn.buffer
+        spawn.before = spawn._before.getvalue()
         spawn._buffer = spawn.buffer_type()
         spawn._before = spawn.buffer_type()
         spawn.after = EOF
@@ -64,11 +120,11 @@ def eof(self, err=None):
             exc = EOF(msg)
             exc.__cause__ = None # in Python 3.x we can use "raise exc from None"
             raise exc
-    
+
     def timeout(self, err=None):
         spawn = self.spawn
 
-        spawn.before = spawn.buffer
+        spawn.before = spawn._before.getvalue()
         spawn.after = TIMEOUT
         index = self.searcher.timeout_index
         if index >= 0:
@@ -89,11 +145,11 @@ def timeout(self, err=None):
 
     def errored(self):
         spawn = self.spawn
-        spawn.before = spawn.buffer
+        spawn.before = spawn._before.getvalue()
         spawn.after = None
         spawn.match = None
         spawn.match_index = None
-    
+
     def expect_loop(self, timeout=-1):
         """Blocking expect"""
         spawn = self.spawn
@@ -102,21 +158,21 @@ def expect_loop(self, timeout=-1):
             end_time = time.time() + timeout
 
         try:
-            incoming = spawn.buffer
-            spawn._buffer = spawn.buffer_type()
-            spawn._before = spawn.buffer_type()
+            idx = self.existing_data()
+            if idx is not None:
+                return idx
             while True:
-                idx = self.new_data(incoming)
-                # Keep reading until exception or return.
-                if idx is not None:
-                    return idx
                 # No match at this point
                 if (timeout is not None) and (timeout < 0):
                     return self.timeout()
                 # Still have time left, so read more data
                 incoming = spawn.read_nonblocking(spawn.maxread, timeout)
                 if self.spawn.delayafterread is not None:
                     time.sleep(self.spawn.delayafterread)
+                idx = self.new_data(incoming)
+                # Keep reading until exception or return.
+                if idx is not None:
+                    return idx
                 if timeout is not None:
                     timeout = end_time - time.time()
         except EOF as e:
@@ -154,6 +210,7 @@ def __init__(self, strings):
         self.eof_index = -1
         self.timeout_index = -1
         self._strings = []
+        self.longest_string = 0
         for n, s in enumerate(strings):
             if s is EOF:
                 self.eof_index = n
@@ -162,6 +219,8 @@ def __init__(self, strings):
                 self.timeout_index = n
                 continue
             self._strings.append((n, s))
+            if len(s) > self.longest_string:
+                self.longest_string = len(s)
 
     def __str__(self):
         '''This returns a human-readable string that represents the state of

diff --git a/pexpect/run.py b/pexpect/run.py
@@ -67,7 +67,7 @@ def print_ticks(d):
     contains patterns and responses. Whenever one of the patterns is seen
     in the command output, run() will send the associated response string.
     So, run() in the above example can be also written as:
-    
+
         run("mencoder dvd://1 -o video.avi -oac copy -ovc copy",
             events=[(TIMEOUT,print_ticks)], timeout=5)
 

diff --git a/pexpect/screen.py b/pexpect/screen.py
@@ -90,7 +90,7 @@ def __init__(self, r=24, c=80, encoding='latin-1', encoding_errors='replace'):
         self.encoding = encoding
         self.encoding_errors = encoding_errors
         if encoding is not None:
-            self.decoder = codecs.getincrementaldecoder(encoding)(encoding_errors)            
+            self.decoder = codecs.getincrementaldecoder(encoding)(encoding_errors)
         else:
             self.decoder = None
         self.cur_r = 1

diff --git a/pexpect/spawnbase.py b/pexpect/spawnbase.py
@@ -120,6 +120,9 @@ def write_to_stdout(b):
         self.async_pw_transport = None
         # This is the read buffer. See maxread.
         self._buffer = self.buffer_type()
+        # The buffer may be trimmed for efficiency reasons.  This is the
+        # untrimmed buffer, used to create the before attribute.
+        self._before = self.buffer_type()
 
     def _log(self, s, direction):
         if self.logfile is not None:

diff --git a/tests/test_expect.py b/tests/test_expect.py
@@ -451,6 +451,47 @@ def test_before_after_exact(self):
         p.expect = p.expect_exact
         self._before_after(p)
 
+    def test_before_after_timeout(self):
+        '''Tests that timeouts do not truncate before, a bug in 4.4-4.7.'''
+        child = pexpect.spawn('cat', echo=False)
+        child.sendline('BEGIN')
+        for i in range(100):
+            child.sendline('foo' * 100)
+        e = child.expect([b'xyzzy', pexpect.TIMEOUT],
+                         searchwindowsize=10, timeout=0.001)
+        self.assertEqual(e, 1)
+        child.sendline('xyzzy')
+        e = child.expect([b'xyzzy', pexpect.TIMEOUT],
+                         searchwindowsize=10, timeout=30)
+        self.assertEqual(e, 0)
+        self.assertEqual(child.before[0:5], b'BEGIN')
+        child.sendeof()
+        child.expect(pexpect.EOF)
+
+    def test_increasing_searchwindowsize(self):
+        '''Tests that the search window can be expanded, a bug in 4.4-4.7.'''
+        child = pexpect.spawn('cat', echo=False)
+        child.sendline('BEGIN')
+        for i in range(100):
+            child.sendline('foo' * 100)
+        e = child.expect([b'xyzzy', pexpect.TIMEOUT],
+                         searchwindowsize=10, timeout=0.5)
+        self.assertEqual(e, 1)
+        e = child.expect([b'BEGIN', pexpect.TIMEOUT],
+                         searchwindowsize=10, timeout=0.5)
+        self.assertEqual(e, 1)
+        e = child.expect([b'BEGIN', pexpect.TIMEOUT],
+                         searchwindowsize=40000, timeout=30.0)
+        self.assertEqual(e, 0)
+        child.sendeof()
+        child.expect(pexpect.EOF)
+
+    def test_searchwindowsize(self):
+        '''Tests that we don't match outside the window, a bug in 4.4-4.7.'''
+        p = pexpect.spawn('echo foobarbazbop')
+        e = p.expect([b'bar', b'bop'], searchwindowsize=6)
+        self.assertEqual(e, 1)
+
     def _ordering(self, p):
         p.timeout = 20
         p.expect(b'>>> ')