Skip to content

Commit

Permalink
API: Deprecate skip_footer in read_csv
Browse files Browse the repository at this point in the history
  • Loading branch information
gfyoung committed Jul 28, 2016
1 parent cc216ad commit d21345f
Show file tree
Hide file tree
Showing 8 changed files with 54 additions and 41 deletions.
4 changes: 3 additions & 1 deletion doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ skiprows : list-like or integer, default ``None``
of the file.
skipfooter : int, default ``0``
Number of lines at bottom of file to skip (unsupported with engine='c').
skip_footer : int, default ``0``
DEPRECATED: use the ``skipfooter`` parameter instead, as they are identical
nrows : int, default ``None``
Number of rows of file to read. Useful for reading pieces of large files.
low_memory : boolean, default ``True``
Expand Down Expand Up @@ -1411,7 +1413,7 @@ back to python if C-unsupported options are specified. Currently, C-unsupported
options include:

- ``sep`` other than a single character (e.g. regex separators)
- ``skip_footer``
- ``skipfooter``
- ``sep=None`` with ``delim_whitespace=False``

Specifying any of the above options will produce a ``ParserWarning`` unless the
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,7 @@ Deprecations
- ``compact_ints`` and ``use_unsigned`` have been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13320`)
- ``buffer_lines`` has been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13360`)
- ``as_recarray`` has been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13373`)
- ``skip_footer`` has been deprecated in ``pd.read_csv()`` in favor of ``skipfooter`` and will be removed in a future version (:issue:`13349`)
- top-level ``pd.ordered_merge()`` has been renamed to ``pd.merge_ordered()`` and the original name will be removed in a future version (:issue:`13358`)
- ``Timestamp.offset`` property (and named arg in the constructor), has been deprecated in favor of ``freq`` (:issue:`12160`)
- ``pd.tseries.util.pivot_annual`` is deprecated. Use ``pivot_table`` as alternative, an example is :ref:`here <cookbook.pivot>` (:issue:`736`)
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ def _parse_cell(cell_contents, cell_typ):
parse_dates=parse_dates,
date_parser=date_parser,
skiprows=skiprows,
skip_footer=skip_footer,
skipfooter=skip_footer,
squeeze=squeeze,
**kwds)

Expand Down
43 changes: 24 additions & 19 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@
at the start of the file
skipfooter : int, default 0
Number of lines at bottom of file to skip (Unsupported with engine='c')
skip_footer : int, default 0
DEPRECATED: use the `skipfooter` parameter instead, as they are identical
nrows : int, default None
Number of rows of file to read. Useful for reading pieces of large files
na_values : str or list-like or dict, default None
Expand Down Expand Up @@ -341,9 +343,6 @@ def _validate_nrows(nrows):
def _read(filepath_or_buffer, kwds):
"Generic reader of line files."
encoding = kwds.get('encoding', None)
skipfooter = kwds.pop('skipfooter', None)
if skipfooter is not None:
kwds['skip_footer'] = skipfooter

# If the input could be a filename, check for a recognizable compression
# extension. If we're reading from a URL, the `get_filepath_or_buffer`
Expand Down Expand Up @@ -411,8 +410,8 @@ def _read(filepath_or_buffer, kwds):
'na_values': None,
'true_values': None,
'false_values': None,
'skip_footer': 0,
'converters': None,
'skipfooter': 0,

'keep_default_na': True,
'thousands': None,
Expand Down Expand Up @@ -461,7 +460,7 @@ def _read(filepath_or_buffer, kwds):
'widths': None,
}

_c_unsupported = set(['skip_footer'])
_c_unsupported = set(['skipfooter'])
_python_unsupported = set([
'low_memory',
'buffer_lines',
Expand Down Expand Up @@ -503,7 +502,6 @@ def parser_f(filepath_or_buffer,
false_values=None,
skipinitialspace=False,
skiprows=None,
skipfooter=None,
nrows=None,

# NA and Missing Data Handling
Expand Down Expand Up @@ -541,8 +539,8 @@ def parser_f(filepath_or_buffer,
error_bad_lines=True,
warn_bad_lines=True,

# Deprecated
skip_footer=0,
skipfooter=0,
skip_footer=0, # deprecated

# Internal
doublequote=True,
Expand Down Expand Up @@ -570,6 +568,13 @@ def parser_f(filepath_or_buffer,
engine = 'c'
engine_specified = False

if skip_footer != 0:
warnings.warn("The 'skip_footer' argument has "
"been deprecated and will be removed "
"in a future version. Please use the "
"'skipfooter' argument instead.",
FutureWarning, stacklevel=2)

kwds = dict(delimiter=delimiter,
engine=engine,
dialect=dialect,
Expand Down Expand Up @@ -767,9 +772,9 @@ def _clean_options(self, options, engine):

# C engine not supported yet
if engine == 'c':
if options['skip_footer'] > 0:
if options['skipfooter'] > 0:
fallback_reason = "the 'c' engine does not support"\
" skip_footer"
" skipfooter"
engine = 'python'

if sep is None and not delim_whitespace:
Expand Down Expand Up @@ -902,8 +907,8 @@ def _failover_to_python(self):

def read(self, nrows=None):
if nrows is not None:
if self.options.get('skip_footer'):
raise ValueError('skip_footer not supported for iteration')
if self.options.get('skipfooter'):
raise ValueError('skipfooter not supported for iteration')

ret = self._engine.read(nrows)

Expand Down Expand Up @@ -1578,7 +1583,7 @@ def TextParser(*args, **kwds):
date_parser : function, default None
skiprows : list of integers
Row numbers to skip
skip_footer : int
skipfooter : int
Number of line at bottom of file to skip
converters : dict, default None
Dict of functions for converting values in certain columns. Keys can
Expand Down Expand Up @@ -1691,7 +1696,7 @@ def __init__(self, f, **kwds):
self.memory_map = kwds['memory_map']
self.skiprows = kwds['skiprows']

self.skip_footer = kwds['skip_footer']
self.skipfooter = kwds['skipfooter']
self.delimiter = kwds['delimiter']

self.quotechar = kwds['quotechar']
Expand Down Expand Up @@ -2323,7 +2328,7 @@ def _rows_to_cols(self, content):
content, min_width=col_len).T)
zip_len = len(zipped_content)

if self.skip_footer < 0:
if self.skipfooter < 0:
raise ValueError('skip footer cannot be negative')

# Loop through rows to verify lengths are correct.
Expand All @@ -2336,8 +2341,8 @@ def _rows_to_cols(self, content):
break

footers = 0
if self.skip_footer:
footers = self.skip_footer
if self.skipfooter:
footers = self.skipfooter

row_num = self.pos - (len(content) - i + footers)

Expand Down Expand Up @@ -2423,8 +2428,8 @@ def _get_lines(self, rows=None):
else:
lines = new_rows

if self.skip_footer:
lines = lines[:-self.skip_footer]
if self.skipfooter:
lines = lines[:-self.skipfooter]

lines = self._check_comments(lines)
if self.skip_blank_lines:
Expand Down
12 changes: 6 additions & 6 deletions pandas/io/tests/parser/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,9 +218,9 @@ def test_malformed(self):
skiprows=[2])
it.read()

# skip_footer is not supported with the C parser yet
# skipfooter is not supported with the C parser yet
if self.engine == 'python':
# skip_footer
# skipfooter
data = """ignore
A,B,C
1,2,3 # comment
Expand All @@ -232,7 +232,7 @@ def test_malformed(self):
with tm.assertRaisesRegexp(Exception, msg):
self.read_table(StringIO(data), sep=',',
header=1, comment='#',
skip_footer=1)
skipfooter=1)

def test_quoting(self):
bad_line_small = """printer\tresult\tvariant_name
Expand Down Expand Up @@ -524,11 +524,11 @@ def test_iterator(self):
self.assertEqual(len(result), 3)
tm.assert_frame_equal(pd.concat(result), expected)

# skip_footer is not supported with the C parser yet
# skipfooter is not supported with the C parser yet
if self.engine == 'python':
# test bad parameter (skip_footer)
# test bad parameter (skipfooter)
reader = self.read_csv(StringIO(self.data1), index_col=0,
iterator=True, skip_footer=True)
iterator=True, skipfooter=True)
self.assertRaises(ValueError, reader.read, 3)

def test_pass_names_with_index(self):
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/tests/parser/python_parser_only.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def test_single_line(self):
finally:
sys.stdout = sys.__stdout__

def test_skip_footer(self):
def test_skipfooter(self):
# see gh-6607
data = """A,B,C
1,2,3
Expand All @@ -107,7 +107,7 @@ def test_skip_footer(self):
want to skip this
also also skip this
"""
result = self.read_csv(StringIO(data), skip_footer=2)
result = self.read_csv(StringIO(data), skipfooter=2)
no_footer = '\n'.join(data.split('\n')[:-3])
expected = self.read_csv(StringIO(no_footer))
tm.assert_frame_equal(result, expected)
Expand Down
13 changes: 9 additions & 4 deletions pandas/io/tests/parser/test_unsupported.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def test_c_engine(self):
with tm.assertRaisesRegexp(ValueError, msg):
read_table(StringIO(data), sep='\s', dtype={'a': float})
with tm.assertRaisesRegexp(ValueError, msg):
read_table(StringIO(data), skip_footer=1, dtype={'a': float})
read_table(StringIO(data), skipfooter=1, dtype={'a': float})

# specify C engine with unsupported options (raise)
with tm.assertRaisesRegexp(ValueError, msg):
Expand All @@ -61,15 +61,15 @@ def test_c_engine(self):
with tm.assertRaisesRegexp(ValueError, msg):
read_table(StringIO(data), engine='c', sep='\s')
with tm.assertRaisesRegexp(ValueError, msg):
read_table(StringIO(data), engine='c', skip_footer=1)
read_table(StringIO(data), engine='c', skipfooter=1)

# specify C-unsupported options without python-unsupported options
with tm.assert_produces_warning(parsers.ParserWarning):
read_table(StringIO(data), sep=None, delim_whitespace=False)
with tm.assert_produces_warning(parsers.ParserWarning):
read_table(StringIO(data), sep='\s')
with tm.assert_produces_warning(parsers.ParserWarning):
read_table(StringIO(data), skip_footer=1)
read_table(StringIO(data), skipfooter=1)

text = """ A B C D E
one two three four
Expand Down Expand Up @@ -127,15 +127,20 @@ def test_deprecated_args(self):
'as_recarray': True,
'buffer_lines': True,
'compact_ints': True,
'skip_footer': True,
'use_unsigned': True,
}

engines = 'c', 'python'

for engine in engines:
for arg, non_default_val in deprecated.items():
if engine == 'c' and arg == 'skip_footer':
# unsupported --> exception is raised
continue

if engine == 'python' and arg == 'buffer_lines':
# unsupported --> exception is raised first
# unsupported --> exception is raised
continue

with tm.assert_produces_warning(
Expand Down
16 changes: 8 additions & 8 deletions pandas/parser.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ cdef extern from "parser/tokenizer.h":

void *skipset
int64_t skip_first_N_rows
int skip_footer
int skipfooter
double (*converter)(const char *, char **, char, char, char, int) nogil

# error handling
Expand Down Expand Up @@ -270,7 +270,7 @@ cdef class TextReader:
kh_str_t *true_set

cdef public:
int leading_cols, table_width, skip_footer, buffer_lines
int leading_cols, table_width, skipfooter, buffer_lines
object allow_leading_cols
object delimiter, converters, delim_whitespace
object na_values
Expand Down Expand Up @@ -338,7 +338,7 @@ cdef class TextReader:
low_memory=False,
buffer_lines=None,
skiprows=None,
skip_footer=0,
skipfooter=0,
verbose=False,
mangle_dupe_cols=True,
tupleize_cols=False,
Expand Down Expand Up @@ -418,15 +418,15 @@ cdef class TextReader:
if skiprows is not None:
self._make_skiprow_set()

self.skip_footer = skip_footer
self.skipfooter = skipfooter

# suboptimal
if usecols is not None:
self.has_usecols = 1
self.usecols = set(usecols)

# XXX
if skip_footer > 0:
if skipfooter > 0:
self.parser.error_bad_lines = 0
self.parser.warn_bad_lines = 0

Expand Down Expand Up @@ -912,8 +912,8 @@ cdef class TextReader:
if buffered_lines < irows:
self._tokenize_rows(irows - buffered_lines)

if self.skip_footer > 0:
raise ValueError('skip_footer can only be used to read '
if self.skipfooter > 0:
raise ValueError('skipfooter can only be used to read '
'the whole file')
else:
with nogil:
Expand All @@ -926,7 +926,7 @@ cdef class TextReader:

if status < 0:
raise_parser_error('Error tokenizing data', self.parser)
footer = self.skip_footer
footer = self.skipfooter

if self.parser_start == self.parser.lines:
raise StopIteration
Expand Down

0 comments on commit d21345f

Please sign in to comment.