Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ API Changes
indexed. These will be excluded. This will make pandas conform more with pandas/numpy indexing of out-of-bounds
values. A single indexer that is out-of-bounds and drops the dimensions of the object will still raise
``IndexError`` (:issue:`6296`)
- ``select_as_multiple`` will always raise a ``KeyError``, when a key or the selector is not found (:issue:`6177`)

Experimental Features
~~~~~~~~~~~~~~~~~~~~~
Expand All @@ -86,6 +87,9 @@ Bug Fixes
- Bug in conversion of a string types to a DatetimeIndex with a specified frequency (:issue:`6273`, :issue:`6274`)
- Bug in ``eval`` where type-promotion failed for large expressions (:issue:`6205`)
- Bug in interpolate with inplace=True (:issue:`6281`)
- ``HDFStore.remove`` now handles start and stop (:issue:`6177`)
- ``HDFStore.select_as_multiple`` handles start and stop the same way as ``select`` (:issue:`6177`)
- ``HDFStore.select_as_coordinates`` and ``select_column`` works where clauses that result in filters (:issue:`6177`)

pandas 0.13.1
-------------
Expand Down
92 changes: 56 additions & 36 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -724,8 +724,9 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None,

Exceptions
----------
raise if any of the keys don't refer to tables or if they are not ALL
THE SAME DIMENSIONS
raises KeyError if keys or selector is not found or keys is empty
raises TypeError if keys is not a list or tuple
raises ValueError if the tables are not ALL THE SAME DIMENSIONS
"""

# default to single select
Expand All @@ -748,12 +749,13 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None,

# collect the tables
tbls = [self.get_storer(k) for k in keys]
s = self.get_storer(selector)

# validate rows
nrows = None
for t, k in zip(tbls, keys):
for t, k in itertools.chain([(s,selector)], zip(tbls, keys)):
if t is None:
raise TypeError("Invalid table [%s]" % k)
raise KeyError("Invalid table [%s]" % k)
if not t.is_table:
raise TypeError(
"object [%s] is not a table, and cannot be used in all "
Expand All @@ -766,22 +768,17 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None,
raise ValueError(
"all tables must have exactly the same nrows!")

# select coordinates from the selector table
try:
c = self.select_as_coordinates(
selector, where, start=start, stop=stop)
nrows = len(c)
except Exception:
raise ValueError("invalid selector [%s]" % selector)
# axis is the concentation axes
axis = list(set([t.non_index_axes[0][0] for t in tbls]))[0]

def func(_start, _stop):

# collect the returns objs
objs = [t.read(where=c[_start:_stop], columns=columns)
for t in tbls]

# axis is the concentation axes
axis = list(set([t.non_index_axes[0][0] for t in tbls]))[0]
if where is not None:
c = s.read_coordinates(where=where, start=_start, stop=_stop, **kwargs)
else:
c = None
objs = [t.read(where=c, start=_start, stop=_stop,
columns=columns, **kwargs) for t in tbls]

# concat and return
return concat(objs, axis=axis,
Expand Down Expand Up @@ -860,7 +857,7 @@ def remove(self, key, where=None, start=None, stop=None):
raise KeyError('No object named %s in the file' % key)

# remove the node
if where is None:
if where is None and start is None and stop is None:
s.group._f_remove(recursive=True)

# delete from the table
Expand Down Expand Up @@ -2139,11 +2136,9 @@ def write(self, **kwargs):
raise NotImplementedError(
"cannot write on an abstract storer: sublcasses should implement")

def delete(self, where=None, **kwargs):
"""support fully deleting the node in its entirety (only) - where
specification must be None
"""
if where is None:
def delete(self, where=None, start=None, stop=None, **kwargs):
""" support fully deleting the node in its entirety (only) - where specification must be None """
if where is None and start is None and stop is None:
self._handle.removeNode(self.group, recursive=True)
return None

Expand Down Expand Up @@ -3381,9 +3376,15 @@ def read_coordinates(self, where=None, start=None, stop=None, **kwargs):
# create the selection
self.selection = Selection(
self, where=where, start=start, stop=stop, **kwargs)
return Index(self.selection.select_coords())
coords = self.selection.select_coords()
if self.selection.filter is not None:
for field, op, filt in self.selection.filter.format():
data = self.read_column(field, start=coords.min(), stop=coords.max()+1)
coords = coords[op(data.iloc[coords-coords.min()], filt).values]

def read_column(self, column, where=None, **kwargs):
return Index(coords)

def read_column(self, column, where=None, start=None, stop=None, **kwargs):
"""return a single column from the table, generally only indexables
are interesting
"""
Expand Down Expand Up @@ -3411,7 +3412,7 @@ def read_column(self, column, where=None, **kwargs):
# column must be an indexable or a data column
c = getattr(self.table.cols, column)
a.set_info(self.info)
return Series(a.convert(c[:], nan_rep=self.nan_rep,
return Series(a.convert(c[start:stop], nan_rep=self.nan_rep,
encoding=self.encoding).take_data())

raise KeyError("column [%s] not found in the table" % column)
Expand Down Expand Up @@ -3712,12 +3713,19 @@ def write_data_chunk(self, indexes, mask, values):
except Exception as detail:
raise TypeError("tables cannot write this data -> %s" % detail)

def delete(self, where=None, **kwargs):
def delete(self, where=None, start=None, stop=None, **kwargs):

# delete all rows (and return the nrows)
if where is None or not len(where):
nrows = self.nrows
self._handle.removeNode(self.group, recursive=True)
if start is None and stop is None:
nrows = self.nrows
self._handle.removeNode(self.group, recursive=True)
else:
# pytables<3.0 would remove a single row with stop=None
if stop is None:
stop = self.nrows
nrows = self.table.removeRows(start=start, stop=stop)
self.table.flush()
return nrows

# infer the data kind
Expand All @@ -3726,7 +3734,7 @@ def delete(self, where=None, **kwargs):

# create the selection
table = self.table
self.selection = Selection(self, where, **kwargs)
self.selection = Selection(self, where, start=start, stop=stop, **kwargs)
values = self.selection.select_coords()

# delete the rows in reverse order
Expand Down Expand Up @@ -4303,13 +4311,25 @@ def select_coords(self):
"""
generate the selection
"""
if self.condition is None:
return np.arange(self.table.nrows)
start, stop = self.start, self.stop
nrows = self.table.nrows
if start is None:
start = 0
elif start < 0:
start += nrows
if self.stop is None:
stop = nrows
elif stop < 0:
stop += nrows

return self.table.table.getWhereList(self.condition.format(),
start=self.start, stop=self.stop,
sort=True)
if self.condition is not None:
return self.table.table.getWhereList(self.condition.format(),
start=start, stop=stop,
sort=True)
elif self.coordinates is not None:
return self.coordinates

return np.arange(start, stop)

# utilities ###

Expand Down
97 changes: 95 additions & 2 deletions pandas/io/tests/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -2195,6 +2195,69 @@ def test_remove_where(self):
# self.assertRaises(ValueError, store.remove,
# 'wp2', [('column', ['A', 'D'])])

def test_remove_startstop(self):
# GH #4835 and #6177

with ensure_clean_store(self.path) as store:

wp = tm.makePanel()

# start
store.put('wp1', wp, format='t')
n = store.remove('wp1', start=32)
#assert(n == 120-32)
result = store.select('wp1')
expected = wp.reindex(major_axis=wp.major_axis[:32//4])
assert_panel_equal(result, expected)

store.put('wp2', wp, format='t')
n = store.remove('wp2', start=-32)
#assert(n == 32)
result = store.select('wp2')
expected = wp.reindex(major_axis=wp.major_axis[:-32//4])
assert_panel_equal(result, expected)

# stop
store.put('wp3', wp, format='t')
n = store.remove('wp3', stop=32)
#assert(n == 32)
result = store.select('wp3')
expected = wp.reindex(major_axis=wp.major_axis[32//4:])
assert_panel_equal(result, expected)

store.put('wp4', wp, format='t')
n = store.remove('wp4', stop=-32)
#assert(n == 120-32)
result = store.select('wp4')
expected = wp.reindex(major_axis=wp.major_axis[-32//4:])
assert_panel_equal(result, expected)

# start n stop
store.put('wp5', wp, format='t')
n = store.remove('wp5', start=16, stop=-16)
#assert(n == 120-32)
result = store.select('wp5')
expected = wp.reindex(major_axis=wp.major_axis[:16//4]+wp.major_axis[-16//4:])
assert_panel_equal(result, expected)

store.put('wp6', wp, format='t')
n = store.remove('wp6', start=16, stop=16)
#assert(n == 0)
result = store.select('wp6')
expected = wp.reindex(major_axis=wp.major_axis)
assert_panel_equal(result, expected)

# with where
date = wp.major_axis.take(np.arange(0,30,3))
crit = Term('major_axis=date')
store.put('wp7', wp, format='t')
n = store.remove('wp7', where=[crit], stop=80)
#assert(n == 28)
result = store.select('wp7')
expected = wp.reindex(major_axis=wp.major_axis-wp.major_axis[np.arange(0,20,3)])
assert_panel_equal(result, expected)


def test_remove_crit(self):

with ensure_clean_store(self.path) as store:
Expand Down Expand Up @@ -3449,6 +3512,25 @@ def f():
result = store.select_column('df3', 'string')
tm.assert_almost_equal(result.values, df3['string'].values)

# start/stop
result = store.select_column('df3', 'string', start=2)
tm.assert_almost_equal(result.values, df3['string'].values[2:])

result = store.select_column('df3', 'string', start=-2)
tm.assert_almost_equal(result.values, df3['string'].values[-2:])

result = store.select_column('df3', 'string', stop=2)
tm.assert_almost_equal(result.values, df3['string'].values[:2])

result = store.select_column('df3', 'string', stop=-2)
tm.assert_almost_equal(result.values, df3['string'].values[:-2])

result = store.select_column('df3', 'string', start=2, stop=-2)
tm.assert_almost_equal(result.values, df3['string'].values[2:-2])

result = store.select_column('df3', 'string', start=-2, stop=2)
tm.assert_almost_equal(result.values, df3['string'].values[-2:2])

def test_coordinates(self):
df = tm.makeTimeDataFrame()

Expand Down Expand Up @@ -3519,6 +3601,12 @@ def test_coordinates(self):
self.assertRaises(ValueError, store.select, 'df',where=np.arange(len(df)),start=5)
self.assertRaises(ValueError, store.select, 'df',where=np.arange(len(df)),start=5,stop=10)

# selection with filter
selection = date_range('20000101',periods=500)
result = store.select('df', where='index in selection')
expected = df[df.index.isin(selection)]
tm.assert_frame_equal(result,expected)

# list
df = DataFrame(np.random.randn(10,2))
store.append('df2',df)
Expand All @@ -3533,6 +3621,11 @@ def test_coordinates(self):
expected = df.loc[where]
tm.assert_frame_equal(result,expected)

# start/stop
result = store.select('df2', start=5, stop=10)
expected = df[5:10]
tm.assert_frame_equal(result,expected)

def test_append_to_multiple(self):
df1 = tm.makeTimeDataFrame()
df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
Expand Down Expand Up @@ -3603,11 +3696,11 @@ def test_select_as_multiple(self):
None, where=['A>0', 'B>0'], selector='df1')
self.assertRaises(Exception, store.select_as_multiple,
[None], where=['A>0', 'B>0'], selector='df1')
self.assertRaises(TypeError, store.select_as_multiple,
self.assertRaises(KeyError, store.select_as_multiple,
['df1','df3'], where=['A>0', 'B>0'], selector='df1')
self.assertRaises(KeyError, store.select_as_multiple,
['df3'], where=['A>0', 'B>0'], selector='df1')
self.assertRaises(ValueError, store.select_as_multiple,
self.assertRaises(KeyError, store.select_as_multiple,
['df1','df2'], where=['A>0', 'B>0'], selector='df4')

# default select
Expand Down