Skip to content

Commit

Permalink
Refactored self._buffer to self._nltk_buffer
Browse files Browse the repository at this point in the history
Resolves #1308

This will avoid the variable name class between nltk.data.BufferedGzipFile and native Python3.5 gzip.GzipFile.
  • Loading branch information
alvations committed Feb 27, 2016
1 parent 3d92820 commit e1e994c
Showing 1 changed file with 10 additions and 5 deletions.
15 changes: 10 additions & 5 deletions nltk/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,25 +369,30 @@ def __init__(self, filename=None, mode=None, compresslevel=9,
"""
GzipFile.__init__(self, filename, mode, compresslevel, fileobj)
self._size = kwargs.get('size', self.SIZE)
self._buffer = BytesIO()
# Note: In > Python3.5, GzipFile is already using a
# buffered reader in the backend which has a variable self._buffer
# See https://github.com/nltk/nltk/issues/1308
if sys.version.startswith('3.5'):
sys.stderr.write("Use the native Python gzip.GzipFile instead.")
self._nltk_buffer = BytesIO()
# cStringIO does not support len.
self._len = 0

def _reset_buffer(self):
# For some reason calling BytesIO.truncate() here will lead to
# inconsistent writes so just set _buffer to a new BytesIO object.
self._buffer = BytesIO()
self._nltk_buffer = BytesIO()
self._len = 0

def _write_buffer(self, data):
# Simply write to the buffer and increment the buffer size.
if data is not None:
self._buffer.write(data)
self._nltk_buffer.write(data)
self._len += len(data)

def _write_gzip(self, data):
# Write the current buffer to the GzipFile.
GzipFile.write(self, self._buffer.getvalue())
GzipFile.write(self, self._nltk_buffer.getvalue())
# Then reset the buffer and write the new data to the buffer.
self._reset_buffer()
self._write_buffer(data)
Expand All @@ -400,7 +405,7 @@ def close(self):
return GzipFile.close(self)

def flush(self, lib_mode=FLUSH):
self._buffer.flush()
self._nltk_buffer.flush()
GzipFile.flush(self, lib_mode)

def read(self, size=None):
Expand Down

0 comments on commit e1e994c

Please sign in to comment.