Skip to content

Commit

Permalink
ENH: Add columns argument to read_feather() (pandas-dev#24025)
Browse files Browse the repository at this point in the history
  • Loading branch information
nixphix committed Dec 3, 2018
1 parent 08395af commit 8e419d3
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 5 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ New features
dataframe's indexes from the resulting Parquet file. (:issue:`20768`)
- :meth:`DataFrame.corr` and :meth:`Series.corr` now accept a callable for generic calculation methods of correlation, e.g. histogram intersection (:issue:`22684`)
- :func:`DataFrame.to_string` now accepts ``decimal`` as an argument, allowing the user to specify which decimal separator should be used in the output. (:issue:`23614`)
- :func:`DataFrame.read_feather` now accepts ``columns`` as an argument, allowing the user to specify which columns should be read. (:issue:`24025`)

.. _whatsnew_0240.values_api:

Expand Down
13 changes: 10 additions & 3 deletions pandas/io/feather_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def to_feather(df, path):


@deprecate_kwarg(old_arg_name='nthreads', new_arg_name='use_threads')
def read_feather(path, use_threads=True):
def read_feather(path, columns=None, use_threads=True):
"""
Load a feather-format object from the file path
Expand All @@ -93,6 +93,11 @@ def read_feather(path, use_threads=True):
Parameters
----------
path : string file path, or file-like object
columns : sequence, default None
Only read a specific set of columns. If not provided, all columns are
read
.. versionadded 0.24.0
nthreads : int, default 1
Number of CPU threads to use when reading to pandas.DataFrame
Expand All @@ -116,6 +121,8 @@ def read_feather(path, use_threads=True):
int_use_threads = int(use_threads)
if int_use_threads < 1:
int_use_threads = 1
return feather.read_feather(path, nthreads=int_use_threads)
return feather.read_feather(path, columns=columns,
nthreads=int_use_threads)

return feather.read_feather(path, use_threads=bool(use_threads))
return feather.read_feather(path, columns=columns,
use_threads=bool(use_threads))
19 changes: 17 additions & 2 deletions pandas/tests/io/test_feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,16 @@ def check_error_on_write(self, df, exc):
with ensure_clean() as path:
to_feather(df, path)

def check_round_trip(self, df, **kwargs):
def check_round_trip(self, df, expected=None, **kwargs):

if expected is None:
expected = df

with ensure_clean() as path:
to_feather(df, path)

result = read_feather(path, **kwargs)
assert_frame_equal(result, df)
assert_frame_equal(result, expected)

def test_error(self):

Expand Down Expand Up @@ -74,6 +77,18 @@ def test_stringify_columns(self):
df = pd.DataFrame(np.arange(12).reshape(4, 3)).copy()
self.check_error_on_write(df, ValueError)

def test_read_columns(self):

df = pd.DataFrame({'col1': list('abc'),
'col2': list(range(1, 4)),
'col3': list('xyz'),
'col4': list(range(4, 7))})
self.check_round_trip(df, columns=None)
self.check_round_trip(df, columns=df.columns)
random_cols = np.random.choice(df.columns, 2)
self.check_round_trip(df, expected=df[random_cols],
columns=random_cols)

def test_unsupported_other(self):

# period
Expand Down

0 comments on commit 8e419d3

Please sign in to comment.