Odd behavior from df.iloc #4017
Labels
Bug
Indexing
Related to indexing on series/frames, not to indexes themselves
Performance
Memory or execution speed performance
Milestone
I encountered some strange behavior from df.iloc today (today's master). Here's an example. First, create a 300,000 element df with ascending integer index and then select 3000 rows from it:
from pandas import *
import numpy as np
df = DataFrame({'A' : [0.1] * 300000, 'B' : [1] * 300000})
idx = array(range(3000)) * 99
%timeit df.A.iloc[idx]
This seems quite reasonable. Now do the same thing, but this time with a df whose index is not ascending:
df2 = DataFrame({'A' : [0.1] * 100000, 'B' : [1] * 100000})
df2 = concat([df, 2_df, 3_df])
%timeit df2.A.iloc[idx]
After waiting several minutes, I get the output:
AssertionError Traceback (most recent call last)
in ()
1 df2 = DataFrame({'A' : [0.1] * 100000, 'B' : [1] * 100000})
2 df2 = concat([df, 2_df, 3_df])
----> 3 get_ipython().magic(u'timeit df2.A.iloc[idx]')
/RHS/packages/anaconda/ipython/IPython/core/interactiveshell.pyc in magic(self, arg_s)
2180 magic_name, _, magic_arg_s = arg_s.partition(' ')
2181 magic_name = magic_name.lstrip(prefilter.ESC_MAGIC)
-> 2182 return self.run_line_magic(magic_name, magic_arg_s)
2183
2184 #-------------------------------------------------------------------------
/RHS/packages/anaconda/ipython/IPython/core/interactiveshell.pyc in run_line_magic(self, magic_name, line)
2101 kwargs['local_ns'] = sys._getframe(stack_depth).f_locals
2102 with self.builtin_trap:
-> 2103 result = fn(args,*kwargs)
2104 return result
2105
/RHS/packages/anaconda/ipython/IPython/core/magics/execution.pyc in timeit(self, line, cell)
/RHS/packages/anaconda/ipython/IPython/core/magic.pyc in (f, _a, *_k)
190 # but it's overkill for just that one bit of state.
191 def magic_deco(arg):
--> 192 call = lambda f, _a, *_k: f(_a, *_k)
193
194 if callable(arg):
/RHS/packages/anaconda/ipython/IPython/core/magics/execution.pyc in timeit(self, line, cell)
885 number = 1
886 for i in range(1, 10):
--> 887 if timer.timeit(number) >= 0.2:
888 break
889 number *= 10
/Users/stanton/anaconda/lib/python2.7/timeit.pyc in timeit(self, number)
193 gc.disable()
194 try:
--> 195 timing = self.inner(it, self.timer)
196 finally:
197 if gcold:
in inner(_it, _timer)
/RHS/packages/anaconda/pandas/pandas/core/indexing.pyc in getitem(self, key)
686 return self._getitem_tuple(key)
687 else:
--> 688 return self._getitem_axis(key, axis=0)
689
690 def _getitem_axis(self, key, axis=0):
/RHS/packages/anaconda/pandas/pandas/core/indexing.pyc in _getitem_axis(self, key, axis)
838 raise ValueError("Cannot index by location index with a non-integer key")
839
--> 840 return self._get_loc(key,axis=axis)
841
842 def _convert_to_indexer(self, obj, axis=0):
/RHS/packages/anaconda/pandas/pandas/core/indexing.pyc in _get_loc(self, key, axis)
62
63 def _get_loc(self, key, axis=0):
---> 64 return self.obj._ixs(key, axis=axis)
65
66 def _slice(self, obj, axis=0, raise_on_error=False):
/RHS/packages/anaconda/pandas/pandas/core/series.pyc in _ixs(self, i, axis)
599 label = self.index[i]
600 if isinstance(label, Index):
--> 601 return self.reindex(label)
602 else:
603 return _index.get_value_at(self, i)
/RHS/packages/anaconda/pandas/pandas/core/series.pyc in reindex(self, index, method, level, fill_value, limit, copy)
2660 level=level, limit=limit)
2661 new_values = com.take_1d(self.values, indexer, fill_value=fill_value)
-> 2662 return Series(new_values, index=new_index, name=self.name)
2663
2664 def reindex_axis(self, labels, axis=0, **kwargs):
/RHS/packages/anaconda/pandas/pandas/core/series.pyc in new(cls, data, index, dtype, name, copy)
487 else:
488 subarr = subarr.view(Series)
--> 489 subarr.index = index
490 subarr.name = name
491
/RHS/packages/anaconda/pandas/pandas/lib.so in pandas.lib.SeriesIndex.set (pandas/lib.c:29483)()
AssertionError: Index length did not match values
The text was updated successfully, but these errors were encountered: