-
-
Notifications
You must be signed in to change notification settings - Fork 19.3k
Description
Pandas version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
import pandas as pd
ts = pd.Series([0, 1, 2],
index=pd.Index(
["2018-04-09", None, "2018-04-10"], dtype="datetime64[ns]")
)
# this works and prints just the first row
print(ts.first('1D'))
# raises KeyError
print(ts.first('2D'))Issue Description
When first should select rows spanning a null in the index, it throws KeyError like KeyError: Timestamp('2018-04-11 00:00:00').
Stack trace
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/_libs/index.pyx:548, in pandas._libs.index.DatetimeEngine.get_loc()
File pandas/_libs/hashtable_class_helper.pxi:2263, in pandas._libs.hashtable.Int64HashTable.get_item()
File pandas/_libs/hashtable_class_helper.pxi:2273, in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 1523404800000000000
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/core/indexes/base.py:3803, in Index.get_loc(self, key, method, tolerance)
3802 try:
-> 3803 return self._engine.get_loc(casted_key)
3804 except KeyError as err:
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/_libs/index.pyx:516, in pandas._libs.index.DatetimeEngine.get_loc()
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/_libs/index.pyx:550, in pandas._libs.index.DatetimeEngine.get_loc()
KeyError: Timestamp('2018-04-11 00:00:00')
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/core/indexes/datetimes.py:736, in DatetimeIndex.get_loc(self, key, method, tolerance)
735 try:
--> 736 return Index.get_loc(self, key, method, tolerance)
737 except KeyError as err:
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/core/indexes/base.py:3805, in Index.get_loc(self, key, method, tolerance)
3804 except KeyError as err:
-> 3805 raise KeyError(key) from err
3806 except TypeError:
3807 # If we have a listlike key, _check_indexing_error will raise
3808 # InvalidIndexError. Otherwise we fall through and re-raise
3809 # the TypeError.
KeyError: Timestamp('2018-04-11 00:00:00')
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
Cell In [1], line 10
8 print(ts.first('1D'))
9 # raises KeyError
---> 10 print(ts.first('2D'))
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/core/generic.py:8946, in NDFrame.first(self, offset)
8943 end = self.index.searchsorted(end_date, side="left")
8944 return self.iloc[:end]
-> 8946 return self.loc[:end]
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/core/indexing.py:1073, in _LocationIndexer.__getitem__(self, key)
1070 axis = self.axis or 0
1072 maybe_callable = com.apply_if_callable(key, self.obj)
-> 1073 return self._getitem_axis(maybe_callable, axis=axis)
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/core/indexing.py:1290, in _LocIndexer._getitem_axis(self, key, axis)
1288 if isinstance(key, slice):
1289 self._validate_key(key, axis)
-> 1290 return self._get_slice_axis(key, axis=axis)
1291 elif com.is_bool_indexer(key):
1292 return self._getbool_axis(key, axis=axis)
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/core/indexing.py:1324, in _LocIndexer._get_slice_axis(self, slice_obj, axis)
1321 return obj.copy(deep=False)
1323 labels = obj._get_axis(axis)
-> 1324 indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, slice_obj.step)
1326 if isinstance(indexer, slice):
1327 return self.obj._slice(indexer, axis=axis)
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/core/indexes/datetimes.py:809, in DatetimeIndex.slice_indexer(self, start, end, step, kind)
801 # GH#33146 if start and end are combinations of str and None and Index is not
802 # monotonic, we can not use Index.slice_indexer because it does not honor the
803 # actual elements, is only searching for start and end
804 if (
805 check_str_or_none(start)
806 or check_str_or_none(end)
807 or self.is_monotonic_increasing
808 ):
--> 809 return Index.slice_indexer(self, start, end, step, kind=kind)
811 mask = np.array(True)
812 deprecation_mask = np.array(True)
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/core/indexes/base.py:6602, in Index.slice_indexer(self, start, end, step, kind)
6559 """
6560 Compute the slice indexer for input labels and step.
6561
(...)
6598 slice(1, 3, None)
6599 """
6600 self._deprecated_arg(kind, "kind", "slice_indexer")
-> 6602 start_slice, end_slice = self.slice_locs(start, end, step=step)
6604 # return a slice
6605 if not is_scalar(start_slice):
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/core/indexes/base.py:6816, in Index.slice_locs(self, start, end, step, kind)
6814 end_slice = None
6815 if end is not None:
-> 6816 end_slice = self.get_slice_bound(end, "right")
6817 if end_slice is None:
6818 end_slice = len(self)
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/core/indexes/base.py:6729, in Index.get_slice_bound(self, label, side, kind)
6726 return self._searchsorted_monotonic(label, side)
6727 except ValueError:
6728 # raise the original KeyError
-> 6729 raise err
6731 if isinstance(slc, np.ndarray):
6732 # get_loc may return a boolean array, which
6733 # is OK as long as they are representable by a slice.
6734 assert is_bool_dtype(slc.dtype)
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/core/indexes/base.py:6723, in Index.get_slice_bound(self, label, side, kind)
6721 # we need to look up the label
6722 try:
-> 6723 slc = self.get_loc(label)
6724 except KeyError as err:
6725 try:
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/core/indexes/datetimes.py:738, in DatetimeIndex.get_loc(self, key, method, tolerance)
736 return Index.get_loc(self, key, method, tolerance)
737 except KeyError as err:
--> 738 raise KeyError(orig_key) from err
KeyError: Timestamp('2018-04-11 00:00:00')
Expected Behavior
I should get all the matching times, excluding nulls.
Installed Versions
/Users/maheshvashishtha/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/_distutils_hack/init.py:33: UserWarning: Setuptools is replacing distutils.
warnings.warn("Setuptools is replacing distutils.")
INSTALLED VERSIONS
commit : 8dab54d
python : 3.10.4.final.0
python-bits : 64
OS : Darwin
OS-release : 21.5.0
Version : Darwin Kernel Version 21.5.0: Tue Apr 26 21:08:22 PDT 2022; root:xnu-8020.121.3~4/RELEASE_X86_64
machine : x86_64
processor : i386
byteorder : little
LC_ALL : None
LANG : en_US.UTF-8
LOCALE : en_US.UTF-8
pandas : 1.5.2
numpy : 1.23.3
pytz : 2022.2.1
dateutil : 2.8.2
setuptools : 63.4.1
pip : 22.1.2
Cython : 0.29.32
pytest : None
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : None
lxml.etree : None
html5lib : None
pymysql : None
psycopg2 : None
jinja2 : None
IPython : 8.5.0
pandas_datareader: None
bs4 : None
bottleneck : None
brotli : None
fastparquet : None
fsspec : None
gcsfs : None
matplotlib : None
numba : None
numexpr : None
odfpy : None
openpyxl : None
pandas_gbq : None
pyarrow : None
pyreadstat : None
pyxlsb : None
s3fs : None
scipy : None
snappy : None
sqlalchemy : None
tables : None
tabulate : None
xarray : None
xlrd : None
xlwt : None
zstandard : None
tzdata : None