Skip to content

Commit

Permalink
Merge pull request #579 from dluyer/patch-2
Browse files Browse the repository at this point in the history
Correctness fix (pull #578) plus efficiency improvements
  • Loading branch information
takluyver committed Jan 14, 2020
2 parents 692681f + ad4be95 commit f16add7
Show file tree
Hide file tree
Showing 6 changed files with 138 additions and 38 deletions.
5 changes: 1 addition & 4 deletions pexpect/_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,7 @@
def expect_async(expecter, timeout=None):
# First process data that was previously read - if it maches, we don't need
# async stuff.
previously_read = expecter.spawn.buffer
expecter.spawn._buffer = expecter.spawn.buffer_type()
expecter.spawn._before = expecter.spawn.buffer_type()
idx = expecter.new_data(previously_read)
idx = expecter.existing_data()
if idx is not None:
return idx
if not expecter.spawn.async_pw_transport:
Expand Down
123 changes: 91 additions & 32 deletions pexpect/expect.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,45 +6,101 @@ class Expecter(object):
def __init__(self, spawn, searcher, searchwindowsize=-1):
self.spawn = spawn
self.searcher = searcher
# A value of -1 means to use the figure from spawn, which should
# be None or a positive number.
if searchwindowsize == -1:
searchwindowsize = spawn.searchwindowsize
self.searchwindowsize = searchwindowsize
self.lookback = None
if hasattr(searcher, 'longest_string'):
self.lookback = searcher.longest_string

def new_data(self, data):
def do_search(self, window, freshlen):
spawn = self.spawn
searcher = self.searcher

pos = spawn._buffer.tell()
spawn._buffer.write(data)
spawn._before.write(data)

# determine which chunk of data to search; if a windowsize is
# specified, this is the *new* data + the preceding <windowsize> bytes
if self.searchwindowsize:
spawn._buffer.seek(max(0, pos - self.searchwindowsize))
window = spawn._buffer.read(self.searchwindowsize + len(data))
else:
# otherwise, search the whole buffer (really slow for large datasets)
window = spawn.buffer
index = searcher.search(window, len(data))
if freshlen > len(window):
freshlen = len(window)
index = searcher.search(window, freshlen, self.searchwindowsize)
if index >= 0:
spawn._buffer = spawn.buffer_type()
spawn._buffer.write(window[searcher.end:])
spawn.before = spawn._before.getvalue()[0:-(len(window) - searcher.start)]
spawn.before = spawn._before.getvalue()[
0:-(len(window) - searcher.start)]
spawn._before = spawn.buffer_type()
spawn.after = window[searcher.start: searcher.end]
spawn._before.write(window[searcher.end:])
spawn.after = window[searcher.start:searcher.end]
spawn.match = searcher.match
spawn.match_index = index
# Found a match
return index
elif self.searchwindowsize:
spawn._buffer = spawn.buffer_type()
spawn._buffer.write(window)
elif self.searchwindowsize or self.lookback:
maintain = self.searchwindowsize or self.lookback
if spawn._buffer.tell() > maintain:
spawn._buffer = spawn.buffer_type()
spawn._buffer.write(window[-maintain:])

def existing_data(self):
# First call from a new call to expect_loop or expect_async.
# self.searchwindowsize may have changed.
# Treat all data as fresh.
spawn = self.spawn
before_len = spawn._before.tell()
buf_len = spawn._buffer.tell()
freshlen = before_len
if before_len > buf_len:
if not self.searchwindowsize:
spawn._buffer = spawn.buffer_type()
window = spawn._before.getvalue()
spawn._buffer.write(window)
elif buf_len < self.searchwindowsize:
spawn._buffer = spawn.buffer_type()
spawn._before.seek(
max(0, before_len - self.searchwindowsize))
window = spawn._before.read()
spawn._buffer.write(window)
else:
spawn._buffer.seek(max(0, buf_len - self.searchwindowsize))
window = spawn._buffer.read()
else:
if self.searchwindowsize:
spawn._buffer.seek(max(0, buf_len - self.searchwindowsize))
window = spawn._buffer.read()
else:
window = spawn._buffer.getvalue()
return self.do_search(window, freshlen)

def new_data(self, data):
# A subsequent call, after a call to existing_data.
spawn = self.spawn
freshlen = len(data)
spawn._before.write(data)
if not self.searchwindowsize:
if self.lookback:
# search lookback + new data.
old_len = spawn._buffer.tell()
spawn._buffer.write(data)
spawn._buffer.seek(max(0, old_len - self.lookback))
window = spawn._buffer.read()
else:
# copy the whole buffer (really slow for large datasets).
spawn._buffer.write(data)
window = spawn.buffer
else:
if len(data) >= self.searchwindowsize or not spawn._buffer.tell():
window = data[-self.searchwindowsize:]
spawn._buffer = spawn.buffer_type()
spawn._buffer.write(window[-self.searchwindowsize:])
else:
spawn._buffer.write(data)
new_len = spawn._buffer.tell()
spawn._buffer.seek(max(0, new_len - self.searchwindowsize))
window = spawn._buffer.read()
return self.do_search(window, freshlen)

def eof(self, err=None):
spawn = self.spawn

spawn.before = spawn.buffer
spawn.before = spawn._before.getvalue()
spawn._buffer = spawn.buffer_type()
spawn._before = spawn.buffer_type()
spawn.after = EOF
Expand All @@ -64,11 +120,11 @@ def eof(self, err=None):
exc = EOF(msg)
exc.__cause__ = None # in Python 3.x we can use "raise exc from None"
raise exc

def timeout(self, err=None):
spawn = self.spawn

spawn.before = spawn.buffer
spawn.before = spawn._before.getvalue()
spawn.after = TIMEOUT
index = self.searcher.timeout_index
if index >= 0:
Expand All @@ -89,11 +145,11 @@ def timeout(self, err=None):

def errored(self):
spawn = self.spawn
spawn.before = spawn.buffer
spawn.before = spawn._before.getvalue()
spawn.after = None
spawn.match = None
spawn.match_index = None

def expect_loop(self, timeout=-1):
"""Blocking expect"""
spawn = self.spawn
Expand All @@ -102,21 +158,21 @@ def expect_loop(self, timeout=-1):
end_time = time.time() + timeout

try:
incoming = spawn.buffer
spawn._buffer = spawn.buffer_type()
spawn._before = spawn.buffer_type()
idx = self.existing_data()
if idx is not None:
return idx
while True:
idx = self.new_data(incoming)
# Keep reading until exception or return.
if idx is not None:
return idx
# No match at this point
if (timeout is not None) and (timeout < 0):
return self.timeout()
# Still have time left, so read more data
incoming = spawn.read_nonblocking(spawn.maxread, timeout)
if self.spawn.delayafterread is not None:
time.sleep(self.spawn.delayafterread)
idx = self.new_data(incoming)
# Keep reading until exception or return.
if idx is not None:
return idx
if timeout is not None:
timeout = end_time - time.time()
except EOF as e:
Expand Down Expand Up @@ -154,6 +210,7 @@ def __init__(self, strings):
self.eof_index = -1
self.timeout_index = -1
self._strings = []
self.longest_string = 0
for n, s in enumerate(strings):
if s is EOF:
self.eof_index = n
Expand All @@ -162,6 +219,8 @@ def __init__(self, strings):
self.timeout_index = n
continue
self._strings.append((n, s))
if len(s) > self.longest_string:
self.longest_string = len(s)

def __str__(self):
'''This returns a human-readable string that represents the state of
Expand Down
2 changes: 1 addition & 1 deletion pexpect/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def print_ticks(d):
contains patterns and responses. Whenever one of the patterns is seen
in the command output, run() will send the associated response string.
So, run() in the above example can be also written as:
run("mencoder dvd://1 -o video.avi -oac copy -ovc copy",
events=[(TIMEOUT,print_ticks)], timeout=5)
Expand Down
2 changes: 1 addition & 1 deletion pexpect/screen.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def __init__(self, r=24, c=80, encoding='latin-1', encoding_errors='replace'):
self.encoding = encoding
self.encoding_errors = encoding_errors
if encoding is not None:
self.decoder = codecs.getincrementaldecoder(encoding)(encoding_errors)
self.decoder = codecs.getincrementaldecoder(encoding)(encoding_errors)
else:
self.decoder = None
self.cur_r = 1
Expand Down
3 changes: 3 additions & 0 deletions pexpect/spawnbase.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ def write_to_stdout(b):
self.async_pw_transport = None
# This is the read buffer. See maxread.
self._buffer = self.buffer_type()
# The buffer may be trimmed for efficiency reasons. This is the
# untrimmed buffer, used to create the before attribute.
self._before = self.buffer_type()

def _log(self, s, direction):
if self.logfile is not None:
Expand Down
41 changes: 41 additions & 0 deletions tests/test_expect.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,47 @@ def test_before_after_exact(self):
p.expect = p.expect_exact
self._before_after(p)

def test_before_after_timeout(self):
'''Tests that timeouts do not truncate before, a bug in 4.4-4.7.'''
child = pexpect.spawn('cat', echo=False)
child.sendline('BEGIN')
for i in range(100):
child.sendline('foo' * 100)
e = child.expect([b'xyzzy', pexpect.TIMEOUT],
searchwindowsize=10, timeout=0.001)
self.assertEqual(e, 1)
child.sendline('xyzzy')
e = child.expect([b'xyzzy', pexpect.TIMEOUT],
searchwindowsize=10, timeout=30)
self.assertEqual(e, 0)
self.assertEqual(child.before[0:5], b'BEGIN')
child.sendeof()
child.expect(pexpect.EOF)

def test_increasing_searchwindowsize(self):
'''Tests that the search window can be expanded, a bug in 4.4-4.7.'''
child = pexpect.spawn('cat', echo=False)
child.sendline('BEGIN')
for i in range(100):
child.sendline('foo' * 100)
e = child.expect([b'xyzzy', pexpect.TIMEOUT],
searchwindowsize=10, timeout=0.5)
self.assertEqual(e, 1)
e = child.expect([b'BEGIN', pexpect.TIMEOUT],
searchwindowsize=10, timeout=0.5)
self.assertEqual(e, 1)
e = child.expect([b'BEGIN', pexpect.TIMEOUT],
searchwindowsize=40000, timeout=30.0)
self.assertEqual(e, 0)
child.sendeof()
child.expect(pexpect.EOF)

def test_searchwindowsize(self):
'''Tests that we don't match outside the window, a bug in 4.4-4.7.'''
p = pexpect.spawn('echo foobarbazbop')
e = p.expect([b'bar', b'bop'], searchwindowsize=6)
self.assertEqual(e, 1)

def _ordering(self, p):
p.timeout = 20
p.expect(b'>>> ')
Expand Down

0 comments on commit f16add7

Please sign in to comment.