From 733982afd84072de8028ee1737848e4368249094 Mon Sep 17 00:00:00 2001 From: Jocelyn Date: Sun, 21 Apr 2024 16:20:44 -0400 Subject: [PATCH 01/32] Added errors parameter to DatetimeArray.strftime function for improved error handling --- pandas/_libs/tslib.pyi | 1 + pandas/_libs/tslib.pyx | 19 +++++++++++++++++++ pandas/_libs/tslibs/timestamps.pyx | 4 ++-- pandas/core/arrays/datetimelike.py | 4 ++-- pandas/core/arrays/datetimes.py | 6 +++--- pandas/core/indexes/datetimes.py | 4 ++-- 6 files changed, 29 insertions(+), 9 deletions(-) diff --git a/pandas/_libs/tslib.pyi b/pandas/_libs/tslib.pyi index 5a340c1d88bc4..6a0cae49ff401 100644 --- a/pandas/_libs/tslib.pyi +++ b/pandas/_libs/tslib.pyi @@ -10,6 +10,7 @@ def format_array_from_datetime( format: str | None = ..., na_rep: str | float = ..., reso: int = ..., # NPY_DATETIMEUNIT + errors: str | None = ..., ) -> npt.NDArray[np.object_]: ... def array_with_unit_to_datetime( values: npt.NDArray[np.object_], diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index ee8ed762fdb6e..08d5922729375 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -120,6 +120,7 @@ def format_array_from_datetime( str format=None, na_rep: str | float = "NaT", NPY_DATETIMEUNIT reso=NPY_FR_ns, + str errors=None, ) -> np.ndarray: """ return a np object array of the string formatted values @@ -211,6 +212,23 @@ def format_array_from_datetime( if format is None: # Use datetime.str, that returns ts.isoformat(sep=' ') res = str(ts) + elif (errors == 'ignore')or(errors=='warn'): + try: + # Note: dispatches to pydatetime + res = ts.strftime(format) + except: + res = None + if (errors=='warn'): + mesg="The following timestamps could be converted to string: [" + mesg+=str(ts)+"] Set errors='raise' to see the details" + #warnings.warn(mesg,StrfimeErrorWarning, + #stacklevel=find_stack_level()) + else: + # Do not catch errors, allow them to raise up through + res = ts.strftime(format) + + + ''' else: # invalid format string @@ -221,6 +239,7 @@ def format_array_from_datetime( except ValueError: # Use datetime.str, that returns ts.isoformat(sep=' ') res = str(ts) + ''' # Note: we can index result directly instead of using PyArray_MultiIter_DATA # like we do for the other functions because result is known C-contiguous diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index d4cd90613ca5b..21a2dd70635d2 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1473,7 +1473,7 @@ class Timestamp(_Timestamp): tz = maybe_get_tz(tz) return cls(datetime.fromtimestamp(ts, tz)) - def strftime(self, format): + def strftime(self, format, errors = 'raise'): """ Return a formatted string of the Timestamp. @@ -1502,7 +1502,7 @@ class Timestamp(_Timestamp): "and `.month`) and construct your string from there." ) from err return _dt.strftime(format) - + def ctime(self): """ Return ctime() style string. diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index f4f076103d8c3..ebf11d8ba2150 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1732,7 +1732,7 @@ class DatelikeOps(DatetimeLikeArrayMixin): URL="https://docs.python.org/3/library/datetime.html" "#strftime-and-strptime-behavior" ) - def strftime(self, date_format: str) -> npt.NDArray[np.object_]: + def strftime(self, date_format: str, errors = 'raise') -> npt.NDArray[np.object_]: """ Convert to Index using specified date_format. @@ -1776,7 +1776,7 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: 'March 10, 2018, 09:00:02 AM'], dtype='object') """ - result = self._format_native_types(date_format=date_format, na_rep=np.nan) + result = self._format_native_types(date_format=date_format, na_rep=np.nan, errors=errors) return result.astype(object, copy=False) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index d446407ec3d01..f2449896355d0 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -753,15 +753,15 @@ def astype(self, dtype, copy: bool = True): # Rendering Methods def _format_native_types( - self, *, na_rep: str | float = "NaT", date_format=None, **kwargs + self, *, na_rep: str | float = "NaT", date_format=None, errors = 'raise', **kwargs ) -> npt.NDArray[np.object_]: if date_format is None and self._is_dates_only: # Only dates and no timezone: provide a default format date_format = "%Y-%m-%d" return tslib.format_array_from_datetime( - self.asi8, tz=self.tz, format=date_format, na_rep=na_rep, reso=self._creso - ) + self.asi8, tz=self.tz, format=date_format, na_rep=na_rep, reso=self._creso, + errors=errors) # ----------------------------------------------------------------- # Comparison Methods diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index cefdc14145d1f..d2f74950daac5 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -262,8 +262,8 @@ def _engine_type(self) -> type[libindex.DatetimeEngine]: # methods that dispatch to DatetimeArray and wrap result @doc(DatetimeArray.strftime) - def strftime(self, date_format) -> Index: - arr = self._data.strftime(date_format) + def strftime(self, date_format, errors = 'raise') -> Index: + arr = self._data.strftime(date_format, errors) return Index(arr, name=self.name, dtype=object) @doc(DatetimeArray.tz_convert) From 48be7e1a0771b8a1365d17a613b69094ca08a9b3 Mon Sep 17 00:00:00 2001 From: Jocelyn Date: Tue, 23 Apr 2024 13:15:33 -0400 Subject: [PATCH 02/32] Added tests for the routine with the new parameter --- pandas/_libs/tslib.pyx | 1 - pandas/core/arrays/datetimelike.py | 2 +- pandas/tests/arrays/test_datetimelike.py | 13 +++++++++++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 08d5922729375..8da8a059a5608 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -154,7 +154,6 @@ def format_array_from_datetime( ndarray result = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0) object[::1] res_flat = result.ravel() # should NOT be a copy cnp.flatiter it = cnp.PyArray_IterNew(values) - if tz is None: # if we don't have a format nor tz, then choose # a format based on precision diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index ebf11d8ba2150..11129c57691fb 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -334,7 +334,7 @@ def asi8(self) -> npt.NDArray[np.int64]: # Rendering Methods def _format_native_types( - self, *, na_rep: str | float = "NaT", date_format=None + self, *, na_rep: str | float = "NaT", date_format=None, errors = 'raise' ) -> npt.NDArray[np.object_]: """ Helper method for astype when converting to strings. diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index cfc04b5c91354..0842131942b22 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -893,6 +893,19 @@ def test_strftime(self, arr1d): expected = np.array([ts.strftime("%Y %b") for ts in arr], dtype=object) tm.assert_numpy_array_equal(result, expected) + #additional tests for error parameter below + result = arr.strftime("%Y %b", 'ignore') + expected = np.array([ts.strftime("%Y %b") for ts in arr], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = arr.strftime("%Y %b", 'warn') + expected = np.array([ts.strftime("%Y %b") for ts in arr], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = arr.strftime("%Y %b", 'raise') + expected = np.array([ts.strftime("%Y %b") for ts in arr], dtype=object) + tm.assert_numpy_array_equal(result, expected) + def test_strftime_nat(self): # GH 29578 arr = DatetimeIndex(["2019-01-01", NaT])._data From 5078dd00941914aeaeeaddde1d395f3b1334cb95 Mon Sep 17 00:00:00 2001 From: Stanley Ou Date: Tue, 23 Apr 2024 21:36:15 -0400 Subject: [PATCH 03/32] Moved error handling from tslib.pyx to timestamps.pyx to resolve discrepancy. --- pandas/_libs/tslib.pyx | 18 ++---------------- pandas/_libs/tslibs/timestamps.pyx | 20 +++++++++++++++++++- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 8da8a059a5608..d752671893f39 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -211,27 +211,13 @@ def format_array_from_datetime( if format is None: # Use datetime.str, that returns ts.isoformat(sep=' ') res = str(ts) - elif (errors == 'ignore')or(errors=='warn'): - try: - # Note: dispatches to pydatetime - res = ts.strftime(format) - except: - res = None - if (errors=='warn'): - mesg="The following timestamps could be converted to string: [" - mesg+=str(ts)+"] Set errors='raise' to see the details" - #warnings.warn(mesg,StrfimeErrorWarning, - #stacklevel=find_stack_level()) else: - # Do not catch errors, allow them to raise up through - res = ts.strftime(format) - + res = ts.strftime(format, errors) ''' else: - # invalid format string - # requires dates > 1900 + try: # Note: dispatches to pydatetime res = ts.strftime(format) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 21a2dd70635d2..b9775b26f1c08 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1501,7 +1501,25 @@ class Timestamp(_Timestamp): "For now, please call the components you need (such as `.year` " "and `.month`) and construct your string from there." ) from err - return _dt.strftime(format) + + if (errors == 'warn') or (errors == 'ignore'): + try: + # Note: dispatches to pydatetime + return _dt.strftime(format) + + except: # Exception handling according to errors parameter + if (errors == 'warn'): + # TODO + mesg="The following timestamps could be converted to string: [" + mesg+=str(_dt)+"] Set errors='raise' to see the details" + #warnings.warn(mesg,StrfimeErrorWarning, stacklevel=find_stack_level()); + + else: # errors == 'ignore' + # TODO + return None + + else: # errors == 'raise' + _dt.strftime(format) def ctime(self): """ From daa688f73dab86671b6f0fc45d03b78ea36d4e79 Mon Sep 17 00:00:00 2001 From: Stanley Ou Date: Wed, 24 Apr 2024 11:46:04 -0400 Subject: [PATCH 04/32] Added tests in test_datetimelike.py for strftime object vs array consistency. --- pandas/tests/arrays/test_datetimelike.py | 27 ++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 0842131942b22..c8995e9136f99 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -906,6 +906,33 @@ def test_strftime(self, arr1d): expected = np.array([ts.strftime("%Y %b") for ts in arr], dtype=object) tm.assert_numpy_array_equal(result, expected) + def test_strftime_err(self): + arr = DatetimeIndex(np.array(['1820-01-01', '2020-01-02'], 'datetime64[s]')) + + try: + result = arr.strftime("%y", 'raise') + assert False + except ValueError: + assert True + + try: + result = arr[0].strftime("%y", 'raise') + assert False + except ValueError: + assert True + + result = arr.strftime("%y", 'ignore') + expected = pd.Index([None, '20'], dtype = 'object') + tm.assert_numpy_array_equal(result, expected) + + assert result[0] == arr[0].strftime("%y", 'ignore') + + result = arr.strftime("%y", 'warn') + expected = pd.Index([None, '20'], dtype = 'object') + tm.assert_numpy_array_equal(result, expected) + + assert result[0] == arr[0].strftime("%y", 'warn') + def test_strftime_nat(self): # GH 29578 arr = DatetimeIndex(["2019-01-01", NaT])._data From 9a7a96436e3496304ee653e5c2ef3ef91fdb8cda Mon Sep 17 00:00:00 2001 From: Stanley Ou Date: Wed, 24 Apr 2024 12:10:16 -0400 Subject: [PATCH 05/32] Modified testing functions to use pytest --- pandas/tests/arrays/test_datetimelike.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index c8995e9136f99..1b56be6e0aa1b 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -909,24 +909,20 @@ def test_strftime(self, arr1d): def test_strftime_err(self): arr = DatetimeIndex(np.array(['1820-01-01', '2020-01-02'], 'datetime64[s]')) - try: - result = arr.strftime("%y", 'raise') - assert False - except ValueError: - assert True - - try: - result = arr[0].strftime("%y", 'raise') - assert False - except ValueError: - assert True + with pytest.raises(ValueError): + result = arr.strftime("%y", 'raise') + with pytest.raises(ValueError): + result = arr[0].strftime("%y", 'raise') + result = arr.strftime("%y", 'ignore') expected = pd.Index([None, '20'], dtype = 'object') tm.assert_numpy_array_equal(result, expected) assert result[0] == arr[0].strftime("%y", 'ignore') + #with tm.assert_produces_warning(TODO): + result = arr.strftime("%y", 'warn') expected = pd.Index([None, '20'], dtype = 'object') tm.assert_numpy_array_equal(result, expected) From 329c1ef6fb7565c30adb4afb8fa73d6b051038a0 Mon Sep 17 00:00:00 2001 From: pyrevoid15 Date: Wed, 24 Apr 2024 21:19:59 -0400 Subject: [PATCH 06/32] Setting errors parameter in strftime now ignores the NotImplementedError from evaluating datetime --- pandas/_libs/tslibs/timestamps.pyx | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index b9775b26f1c08..7936bc8e06b82 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1494,7 +1494,11 @@ class Timestamp(_Timestamp): _dt = datetime(self.year, self.month, self.day, self.hour, self.minute, self.second, self.microsecond, self.tzinfo, fold=self.fold) + except ValueError as err: + if errors == 'ignore': + return None + raise NotImplementedError( "strftime not yet supported on Timestamps which " "are outside the range of Python's standard library. " @@ -1502,6 +1506,13 @@ class Timestamp(_Timestamp): "and `.month`) and construct your string from there." ) from err + except Exception as err: + if errors == 'ignore': + return None + + raise err + + if (errors == 'warn') or (errors == 'ignore'): try: # Note: dispatches to pydatetime @@ -1515,7 +1526,6 @@ class Timestamp(_Timestamp): #warnings.warn(mesg,StrfimeErrorWarning, stacklevel=find_stack_level()); else: # errors == 'ignore' - # TODO return None else: # errors == 'raise' From d7baa42b3204866e27a5c440a5dd2dce4cf3fb23 Mon Sep 17 00:00:00 2001 From: pyrevoid15 Date: Wed, 24 Apr 2024 21:45:48 -0400 Subject: [PATCH 07/32] Readded return in 'raise' case in strftime --- pandas/_libs/tslibs/timestamps.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 7936bc8e06b82..f35f20cfdb9d3 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1529,7 +1529,7 @@ class Timestamp(_Timestamp): return None else: # errors == 'raise' - _dt.strftime(format) + return _dt.strftime(format) def ctime(self): """ From b306d78abc7ef018b7072813b1111c916310d066 Mon Sep 17 00:00:00 2001 From: Stanley Ou Date: Wed, 24 Apr 2024 21:51:44 -0400 Subject: [PATCH 08/32] Fixed some strftime tests. --- pandas/tests/arrays/test_datetimelike.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 1b56be6e0aa1b..7cb3991ea7f6f 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -917,7 +917,7 @@ def test_strftime_err(self): result = arr.strftime("%y", 'ignore') expected = pd.Index([None, '20'], dtype = 'object') - tm.assert_numpy_array_equal(result, expected) + tm.assert_index_equal(result, expected) assert result[0] == arr[0].strftime("%y", 'ignore') @@ -925,7 +925,7 @@ def test_strftime_err(self): result = arr.strftime("%y", 'warn') expected = pd.Index([None, '20'], dtype = 'object') - tm.assert_numpy_array_equal(result, expected) + tm.assert_index_equal(result, expected) assert result[0] == arr[0].strftime("%y", 'warn') From 3649a67d79686754b228e9ab1f8aec895b84b788 Mon Sep 17 00:00:00 2001 From: Stanley Ou Date: Thu, 25 Apr 2024 16:30:27 -0400 Subject: [PATCH 09/32] Deleted errors parameter for timestamp strftime, moved error handling back to arrays strftime --- pandas/_libs/tslib.pyx | 26 +++++++++--------- pandas/_libs/tslibs/timestamps.pyx | 32 ++-------------------- pandas/tests/arrays/test_datetimelike.py | 34 +++++++++++------------- 3 files changed, 30 insertions(+), 62 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index d752671893f39..3127c035cf98d 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -206,25 +206,25 @@ def format_array_from_datetime( res += f".{dts.us // 1000:03d}" else: - ts = Timestamp._from_value_and_reso(val, reso=reso, tz=tz) if format is None: # Use datetime.str, that returns ts.isoformat(sep=' ') res = str(ts) - else: - res = ts.strftime(format, errors) - - ''' - else: - - + if (errors == "warn") or (errors == "ignore"): try: - # Note: dispatches to pydatetime res = ts.strftime(format) - except ValueError: - # Use datetime.str, that returns ts.isoformat(sep=' ') - res = str(ts) - ''' + except (ValueError, NotImplementedError): + # Catches errors and replaces result with None + # TODO + if (errors == "warn"): + mesg="The following timestamps could be converted to string: [" + mesg+=str(ts)+"] Set errors='raise' to see the details" + # warnings.warn(mesg,StrfimeErrorWarning, + # stacklevel=find_stack_level()); + res = None + + else: + res = ts.strftime(format) # Note: we can index result directly instead of using PyArray_MultiIter_DATA # like we do for the other functions because result is known C-contiguous diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index f35f20cfdb9d3..d4cd90613ca5b 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1473,7 +1473,7 @@ class Timestamp(_Timestamp): tz = maybe_get_tz(tz) return cls(datetime.fromtimestamp(ts, tz)) - def strftime(self, format, errors = 'raise'): + def strftime(self, format): """ Return a formatted string of the Timestamp. @@ -1494,43 +1494,15 @@ class Timestamp(_Timestamp): _dt = datetime(self.year, self.month, self.day, self.hour, self.minute, self.second, self.microsecond, self.tzinfo, fold=self.fold) - except ValueError as err: - if errors == 'ignore': - return None - raise NotImplementedError( "strftime not yet supported on Timestamps which " "are outside the range of Python's standard library. " "For now, please call the components you need (such as `.year` " "and `.month`) and construct your string from there." ) from err + return _dt.strftime(format) - except Exception as err: - if errors == 'ignore': - return None - - raise err - - - if (errors == 'warn') or (errors == 'ignore'): - try: - # Note: dispatches to pydatetime - return _dt.strftime(format) - - except: # Exception handling according to errors parameter - if (errors == 'warn'): - # TODO - mesg="The following timestamps could be converted to string: [" - mesg+=str(_dt)+"] Set errors='raise' to see the details" - #warnings.warn(mesg,StrfimeErrorWarning, stacklevel=find_stack_level()); - - else: # errors == 'ignore' - return None - - else: # errors == 'raise' - return _dt.strftime(format) - def ctime(self): """ Return ctime() style string. diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 7cb3991ea7f6f..794a83f8bd27b 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -893,41 +893,37 @@ def test_strftime(self, arr1d): expected = np.array([ts.strftime("%Y %b") for ts in arr], dtype=object) tm.assert_numpy_array_equal(result, expected) - #additional tests for error parameter below - result = arr.strftime("%Y %b", 'ignore') + # additional tests for error parameter below + result = arr.strftime("%Y %b", "ignore") expected = np.array([ts.strftime("%Y %b") for ts in arr], dtype=object) tm.assert_numpy_array_equal(result, expected) - result = arr.strftime("%Y %b", 'warn') + result = arr.strftime("%Y %b", "warn") expected = np.array([ts.strftime("%Y %b") for ts in arr], dtype=object) tm.assert_numpy_array_equal(result, expected) - result = arr.strftime("%Y %b", 'raise') + result = arr.strftime("%Y %b", "raise") expected = np.array([ts.strftime("%Y %b") for ts in arr], dtype=object) tm.assert_numpy_array_equal(result, expected) def test_strftime_err(self): - arr = DatetimeIndex(np.array(['1820-01-01', '2020-01-02'], 'datetime64[s]')) + arr = DatetimeIndex(np.array(["1820-01-01", "2020-01-02"], "datetime64[s]")) - with pytest.raises(ValueError): - result = arr.strftime("%y", 'raise') + # with pytest.raises(ValueError): + # result = arr.strftime("%y", "raise") - with pytest.raises(ValueError): - result = arr[0].strftime("%y", 'raise') - - result = arr.strftime("%y", 'ignore') - expected = pd.Index([None, '20'], dtype = 'object') - tm.assert_index_equal(result, expected) + # with pytest.raises(ValueError): + # result = arr[0].strftime("%y") - assert result[0] == arr[0].strftime("%y", 'ignore') + result = arr.strftime("%y", "ignore") + expected = pd.Index([None, "20"], dtype="object") + tm.assert_index_equal(result, expected) - #with tm.assert_produces_warning(TODO): + # with tm.assert_produces_warning(TODO): - result = arr.strftime("%y", 'warn') - expected = pd.Index([None, '20'], dtype = 'object') + result = arr.strftime("%y", "warn") + expected = pd.Index([None, "20"], dtype="object") tm.assert_index_equal(result, expected) - - assert result[0] == arr[0].strftime("%y", 'warn') def test_strftime_nat(self): # GH 29578 From 3b786575adcf8d99f2e550292330463d57c38cec Mon Sep 17 00:00:00 2001 From: Jocelyn Date: Thu, 25 Apr 2024 21:39:05 -0400 Subject: [PATCH 10/32] Added comments, fixed bug in tslib.pyx file, and edited testing routine --- pandas/_libs/tslib.pyx | 4 +++- pandas/tests/arrays/test_datetimelike.py | 16 ++++++++++++++-- pandas/tests/arrays/test_datetimes.py | 2 +- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 3127c035cf98d..2af493923e8a5 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -210,7 +210,7 @@ def format_array_from_datetime( if format is None: # Use datetime.str, that returns ts.isoformat(sep=' ') res = str(ts) - if (errors == "warn") or (errors == "ignore"): + elif (errors == "warn") or (errors == "ignore"): try: res = ts.strftime(format) except (ValueError, NotImplementedError): @@ -224,6 +224,8 @@ def format_array_from_datetime( res = None else: + # Do not catch errors, allow them to raise up through + # errors == 'raise' res = ts.strftime(format) # Note: we can index result directly instead of using PyArray_MultiIter_DATA diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 794a83f8bd27b..c287034277813 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -5,6 +5,7 @@ import numpy as np import pytest +import datetime from pandas._libs import ( NaT, @@ -909,6 +910,17 @@ def test_strftime(self, arr1d): def test_strftime_err(self): arr = DatetimeIndex(np.array(["1820-01-01", "2020-01-02"], "datetime64[s]")) + windowFlag = False + try: + datetime.datetime(1820, 1, 1) + except ValueError: + windowFlag = True + if windowFlag: + expected = pd.Index([None, "20"], dtype="object") + else: + expected = pd.Index(["20", "20"], dtype="object") + + # with pytest.raises(ValueError): # result = arr.strftime("%y", "raise") @@ -916,13 +928,13 @@ def test_strftime_err(self): # result = arr[0].strftime("%y") result = arr.strftime("%y", "ignore") - expected = pd.Index([None, "20"], dtype="object") + # expected = pd.Index([None, "20"], dtype="object") tm.assert_index_equal(result, expected) # with tm.assert_produces_warning(TODO): result = arr.strftime("%y", "warn") - expected = pd.Index([None, "20"], dtype="object") + # expected = pd.Index([None, "20"], dtype="object") tm.assert_index_equal(result, expected) def test_strftime_nat(self): diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 8650be62ae7eb..1281cff227787 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -161,7 +161,7 @@ def test_format_native_types(self, unit, dtype, dta_dti): # In this case we should get the same formatted values with our nano # version dti._data as we do with the non-nano dta dta, dti = dta_dti - + res = dta._format_native_types() exp = dti._data._format_native_types() tm.assert_numpy_array_equal(res, exp) From 1cfdd56ea95997529ea4c5833594f4a8b412eda8 Mon Sep 17 00:00:00 2001 From: pyrevoid15 Date: Fri, 26 Apr 2024 01:30:17 -0400 Subject: [PATCH 11/32] Separated ignore case from warn case in strftime. Also added documentation note. --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/_libs/tslib.pyx | 17 +++++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 19b448a1871c2..ee7cbfcb7ee49 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -35,7 +35,7 @@ Other enhancements - Support passing a :class:`Series` input to :func:`json_normalize` that retains the :class:`Series` :class:`Index` (:issue:`51452`) - Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`) - :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`) -- +- :func:`DatetimeIndex.strftime` and :func:`DatetimeArray.strftime` now have an optional ``errors`` paramater, which can be set to ``'ignore'`` or ``'warn'`` to intepret bad datetimes as ``None``. The latter also throws a warning (:issue:`58178`) .. --------------------------------------------------------------------------- .. _whatsnew_300.notable_bug_fixes: diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 2af493923e8a5..1742fab0cb771 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -210,17 +210,22 @@ def format_array_from_datetime( if format is None: # Use datetime.str, that returns ts.isoformat(sep=' ') res = str(ts) - elif (errors == "warn") or (errors == "ignore"): + elif (errors == "warn"): try: res = ts.strftime(format) except (ValueError, NotImplementedError): # Catches errors and replaces result with None # TODO - if (errors == "warn"): - mesg="The following timestamps could be converted to string: [" - mesg+=str(ts)+"] Set errors='raise' to see the details" - # warnings.warn(mesg,StrfimeErrorWarning, - # stacklevel=find_stack_level()); + mesg= "The following timestamps could be converted to string:" +\ + f"[{ts}] Set errors='raise' to see the details" + # warnings.warn(mesg,StrfimeErrorWarning, + # stacklevel=find_stack_level()); + + res = None + elif (errors == "ignore"): + try: + res = ts.strftime(format) + except Exception: res = None else: From 2915e3123e8c65fdaf26427dd1027fa0fc5890a9 Mon Sep 17 00:00:00 2001 From: summitwei Date: Fri, 26 Apr 2024 13:25:54 -0400 Subject: [PATCH 12/32] added warning --- pandas/_libs/tslib.pyx | 7 +- .../pip/_vendor/pygments/lexers/python.py | 1188 +++++++++++++++++ 2 files changed, 1191 insertions(+), 4 deletions(-) create mode 100644 ~virtualenvs/panda-dev/lib/python3.10/site-packages/pip/_vendor/pygments/lexers/python.py diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 1742fab0cb771..49b035a5c076a 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -215,11 +215,10 @@ def format_array_from_datetime( res = ts.strftime(format) except (ValueError, NotImplementedError): # Catches errors and replaces result with None - # TODO - mesg= "The following timestamps could be converted to string:" +\ + mesg= "The following timestamp could not be converted to string:" +\ f"[{ts}] Set errors='raise' to see the details" - # warnings.warn(mesg,StrfimeErrorWarning, - # stacklevel=find_stack_level()); + warnings.warn(mesg,StrftimeErrorWarning, + stacklevel=find_stack_level()); res = None elif (errors == "ignore"): diff --git a/~virtualenvs/panda-dev/lib/python3.10/site-packages/pip/_vendor/pygments/lexers/python.py b/~virtualenvs/panda-dev/lib/python3.10/site-packages/pip/_vendor/pygments/lexers/python.py new file mode 100644 index 0000000000000..0f8536fa2e50b --- /dev/null +++ b/~virtualenvs/panda-dev/lib/python3.10/site-packages/pip/_vendor/pygments/lexers/python.py @@ -0,0 +1,1188 @@ +""" + pygments.lexers.python + ~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for Python and related languages. + + :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re +import keyword + +from pip._vendor.pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \ + default, words, combined, do_insertions, this +from pip._vendor.pygments.util import get_bool_opt, shebang_matches +from pip._vendor.pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation, Generic, Other, Error +from pip._vendor.pygments import unistring as uni + +__all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer', + 'Python2Lexer', 'Python2TracebackLexer', + 'CythonLexer', 'DgLexer', 'NumPyLexer'] + +line_re = re.compile('.*?\n') + + +class PythonLexer(RegexLexer): + """ + For `Python `_ source code (version 3.x). + + .. versionadded:: 0.10 + + .. versionchanged:: 2.5 + This is now the default ``PythonLexer``. It is still available as the + alias ``Python3Lexer``. + """ + + name = 'Python' + aliases = ['python', 'py', 'sage', 'python3', 'py3'] + filenames = [ + '*.py', + '*.pyw', + # Jython + '*.jy', + # Sage + '*.sage', + # SCons + '*.sc', + 'SConstruct', + 'SConscript', + # Skylark/Starlark (used by Bazel, Buck, and Pants) + '*.bzl', + 'BUCK', + 'BUILD', + 'BUILD.bazel', + 'WORKSPACE', + # Twisted Application infrastructure + '*.tac', + ] + mimetypes = ['text/x-python', 'application/x-python', + 'text/x-python3', 'application/x-python3'] + + flags = re.MULTILINE | re.UNICODE + + uni_name = "[%s][%s]*" % (uni.xid_start, uni.xid_continue) + + def innerstring_rules(ttype): + return [ + # the old style '%s' % (...) string formatting (still valid in Py3) + (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' + '[hlL]?[E-GXc-giorsaux%]', String.Interpol), + # the new style '{}'.format(...) string formatting + (r'\{' + r'((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name + r'(\![sra])?' # conversion + r'(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?' + r'\}', String.Interpol), + + # backslashes, quotes and formatting signs must be parsed one at a time + (r'[^\\\'"%{\n]+', ttype), + (r'[\'"\\]', ttype), + # unhandled string formatting sign + (r'%|(\{{1,2})', ttype) + # newlines are an error (use "nl" state) + ] + + def fstring_rules(ttype): + return [ + # Assuming that a '}' is the closing brace after format specifier. + # Sadly, this means that we won't detect syntax error. But it's + # more important to parse correct syntax correctly, than to + # highlight invalid syntax. + (r'\}', String.Interpol), + (r'\{', String.Interpol, 'expr-inside-fstring'), + # backslashes, quotes and formatting signs must be parsed one at a time + (r'[^\\\'"{}\n]+', ttype), + (r'[\'"\\]', ttype), + # newlines are an error (use "nl" state) + ] + + tokens = { + 'root': [ + (r'\n', Text), + (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")', + bygroups(Text, String.Affix, String.Doc)), + (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')", + bygroups(Text, String.Affix, String.Doc)), + (r'\A#!.+$', Comment.Hashbang), + (r'#.*$', Comment.Single), + (r'\\\n', Text), + (r'\\', Text), + include('keywords'), + include('soft-keywords'), + (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'), + (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'), + (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), + 'fromimport'), + (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), + 'import'), + include('expr'), + ], + 'expr': [ + # raw f-strings + ('(?i)(rf|fr)(""")', + bygroups(String.Affix, String.Double), + combined('rfstringescape', 'tdqf')), + ("(?i)(rf|fr)(''')", + bygroups(String.Affix, String.Single), + combined('rfstringescape', 'tsqf')), + ('(?i)(rf|fr)(")', + bygroups(String.Affix, String.Double), + combined('rfstringescape', 'dqf')), + ("(?i)(rf|fr)(')", + bygroups(String.Affix, String.Single), + combined('rfstringescape', 'sqf')), + # non-raw f-strings + ('([fF])(""")', bygroups(String.Affix, String.Double), + combined('fstringescape', 'tdqf')), + ("([fF])(''')", bygroups(String.Affix, String.Single), + combined('fstringescape', 'tsqf')), + ('([fF])(")', bygroups(String.Affix, String.Double), + combined('fstringescape', 'dqf')), + ("([fF])(')", bygroups(String.Affix, String.Single), + combined('fstringescape', 'sqf')), + # raw strings + ('(?i)(rb|br|r)(""")', + bygroups(String.Affix, String.Double), 'tdqs'), + ("(?i)(rb|br|r)(''')", + bygroups(String.Affix, String.Single), 'tsqs'), + ('(?i)(rb|br|r)(")', + bygroups(String.Affix, String.Double), 'dqs'), + ("(?i)(rb|br|r)(')", + bygroups(String.Affix, String.Single), 'sqs'), + # non-raw strings + ('([uUbB]?)(""")', bygroups(String.Affix, String.Double), + combined('stringescape', 'tdqs')), + ("([uUbB]?)(''')", bygroups(String.Affix, String.Single), + combined('stringescape', 'tsqs')), + ('([uUbB]?)(")', bygroups(String.Affix, String.Double), + combined('stringescape', 'dqs')), + ("([uUbB]?)(')", bygroups(String.Affix, String.Single), + combined('stringescape', 'sqs')), + (r'[^\S\n]+', Text), + include('numbers'), + (r'!=|==|<<|>>|:=|[-~+/*%=<>&^|.]', Operator), + (r'[]{}:(),;[]', Punctuation), + (r'(in|is|and|or|not)\b', Operator.Word), + include('expr-keywords'), + include('builtins'), + include('magicfuncs'), + include('magicvars'), + include('name'), + ], + 'expr-inside-fstring': [ + (r'[{([]', Punctuation, 'expr-inside-fstring-inner'), + # without format specifier + (r'(=\s*)?' # debug (https://bugs.python.org/issue36817) + r'(\![sraf])?' # conversion + r'\}', String.Interpol, '#pop'), + # with format specifier + # we'll catch the remaining '}' in the outer scope + (r'(=\s*)?' # debug (https://bugs.python.org/issue36817) + r'(\![sraf])?' # conversion + r':', String.Interpol, '#pop'), + (r'\s+', Text), # allow new lines + include('expr'), + ], + 'expr-inside-fstring-inner': [ + (r'[{([]', Punctuation, 'expr-inside-fstring-inner'), + (r'[])}]', Punctuation, '#pop'), + (r'\s+', Text), # allow new lines + include('expr'), + ], + 'expr-keywords': [ + # Based on https://docs.python.org/3/reference/expressions.html + (words(( + 'async for', 'await', 'else', 'for', 'if', 'lambda', + 'yield', 'yield from'), suffix=r'\b'), + Keyword), + (words(('True', 'False', 'None'), suffix=r'\b'), Keyword.Constant), + ], + 'keywords': [ + (words(( + 'assert', 'async', 'await', 'break', 'continue', 'del', 'elif', + 'else', 'except', 'finally', 'for', 'global', 'if', 'lambda', + 'pass', 'raise', 'nonlocal', 'return', 'try', 'while', 'yield', + 'yield from', 'as', 'with'), suffix=r'\b'), + Keyword), + (words(('True', 'False', 'None'), suffix=r'\b'), Keyword.Constant), + ], + 'soft-keywords': [ + # `match`, `case` and `_` soft keywords + (r'(^[ \t]*)' # at beginning of line + possible indentation + r'(match|case)\b' # a possible keyword + r'(?![ \t]*(?:' # not followed by... + r'[:,;=^&|@~)\]}]|(?:' + # characters and keywords that mean this isn't + r'|'.join(keyword.kwlist) + r')\b))', # pattern matching + bygroups(Text, Keyword), 'soft-keywords-inner'), + ], + 'soft-keywords-inner': [ + # optional `_` keyword + (r'(\s+)([^\n_]*)(_\b)', bygroups(Text, using(this), Keyword)), + default('#pop') + ], + 'builtins': [ + (words(( + '__import__', 'abs', 'all', 'any', 'bin', 'bool', 'bytearray', + 'breakpoint', 'bytes', 'chr', 'classmethod', 'compile', 'complex', + 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval', 'filter', + 'float', 'format', 'frozenset', 'getattr', 'globals', 'hasattr', + 'hash', 'hex', 'id', 'input', 'int', 'isinstance', 'issubclass', + 'iter', 'len', 'list', 'locals', 'map', 'max', 'memoryview', + 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow', 'print', + 'property', 'range', 'repr', 'reversed', 'round', 'set', 'setattr', + 'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super', 'tuple', + 'type', 'vars', 'zip'), prefix=r'(?`_ source code. + + .. versionchanged:: 2.5 + This class has been renamed from ``PythonLexer``. ``PythonLexer`` now + refers to the Python 3 variant. File name patterns like ``*.py`` have + been moved to Python 3 as well. + """ + + name = 'Python 2.x' + aliases = ['python2', 'py2'] + filenames = [] # now taken over by PythonLexer (3.x) + mimetypes = ['text/x-python2', 'application/x-python2'] + + def innerstring_rules(ttype): + return [ + # the old style '%s' % (...) string formatting + (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' + '[hlL]?[E-GXc-giorsux%]', String.Interpol), + # backslashes, quotes and formatting signs must be parsed one at a time + (r'[^\\\'"%\n]+', ttype), + (r'[\'"\\]', ttype), + # unhandled string formatting sign + (r'%', ttype), + # newlines are an error (use "nl" state) + ] + + tokens = { + 'root': [ + (r'\n', Text), + (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")', + bygroups(Text, String.Affix, String.Doc)), + (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')", + bygroups(Text, String.Affix, String.Doc)), + (r'[^\S\n]+', Text), + (r'\A#!.+$', Comment.Hashbang), + (r'#.*$', Comment.Single), + (r'[]{}:(),;[]', Punctuation), + (r'\\\n', Text), + (r'\\', Text), + (r'(in|is|and|or|not)\b', Operator.Word), + (r'!=|==|<<|>>|[-~+/*%=<>&^|.]', Operator), + include('keywords'), + (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'), + (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'), + (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), + 'fromimport'), + (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), + 'import'), + include('builtins'), + include('magicfuncs'), + include('magicvars'), + include('backtick'), + ('([rR]|[uUbB][rR]|[rR][uUbB])(""")', + bygroups(String.Affix, String.Double), 'tdqs'), + ("([rR]|[uUbB][rR]|[rR][uUbB])(''')", + bygroups(String.Affix, String.Single), 'tsqs'), + ('([rR]|[uUbB][rR]|[rR][uUbB])(")', + bygroups(String.Affix, String.Double), 'dqs'), + ("([rR]|[uUbB][rR]|[rR][uUbB])(')", + bygroups(String.Affix, String.Single), 'sqs'), + ('([uUbB]?)(""")', bygroups(String.Affix, String.Double), + combined('stringescape', 'tdqs')), + ("([uUbB]?)(''')", bygroups(String.Affix, String.Single), + combined('stringescape', 'tsqs')), + ('([uUbB]?)(")', bygroups(String.Affix, String.Double), + combined('stringescape', 'dqs')), + ("([uUbB]?)(')", bygroups(String.Affix, String.Single), + combined('stringescape', 'sqs')), + include('name'), + include('numbers'), + ], + 'keywords': [ + (words(( + 'assert', 'break', 'continue', 'del', 'elif', 'else', 'except', + 'exec', 'finally', 'for', 'global', 'if', 'lambda', 'pass', + 'print', 'raise', 'return', 'try', 'while', 'yield', + 'yield from', 'as', 'with'), suffix=r'\b'), + Keyword), + ], + 'builtins': [ + (words(( + '__import__', 'abs', 'all', 'any', 'apply', 'basestring', 'bin', + 'bool', 'buffer', 'bytearray', 'bytes', 'callable', 'chr', 'classmethod', + 'cmp', 'coerce', 'compile', 'complex', 'delattr', 'dict', 'dir', 'divmod', + 'enumerate', 'eval', 'execfile', 'exit', 'file', 'filter', 'float', + 'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'hex', 'id', + 'input', 'int', 'intern', 'isinstance', 'issubclass', 'iter', 'len', + 'list', 'locals', 'long', 'map', 'max', 'min', 'next', 'object', + 'oct', 'open', 'ord', 'pow', 'property', 'range', 'raw_input', 'reduce', + 'reload', 'repr', 'reversed', 'round', 'set', 'setattr', 'slice', + 'sorted', 'staticmethod', 'str', 'sum', 'super', 'tuple', 'type', + 'unichr', 'unicode', 'vars', 'xrange', 'zip'), + prefix=r'(?>> a = 'foo' + >>> print a + foo + >>> 1 / 0 + Traceback (most recent call last): + File "", line 1, in + ZeroDivisionError: integer division or modulo by zero + + Additional options: + + `python3` + Use Python 3 lexer for code. Default is ``True``. + + .. versionadded:: 1.0 + .. versionchanged:: 2.5 + Now defaults to ``True``. + """ + name = 'Python console session' + aliases = ['pycon'] + mimetypes = ['text/x-python-doctest'] + + def __init__(self, **options): + self.python3 = get_bool_opt(options, 'python3', True) + Lexer.__init__(self, **options) + + def get_tokens_unprocessed(self, text): + if self.python3: + pylexer = PythonLexer(**self.options) + tblexer = PythonTracebackLexer(**self.options) + else: + pylexer = Python2Lexer(**self.options) + tblexer = Python2TracebackLexer(**self.options) + + curcode = '' + insertions = [] + curtb = '' + tbindex = 0 + tb = 0 + for match in line_re.finditer(text): + line = match.group() + if line.startswith('>>> ') or line.startswith('... '): + tb = 0 + insertions.append((len(curcode), + [(0, Generic.Prompt, line[:4])])) + curcode += line[4:] + elif line.rstrip() == '...' and not tb: + # only a new >>> prompt can end an exception block + # otherwise an ellipsis in place of the traceback frames + # will be mishandled + insertions.append((len(curcode), + [(0, Generic.Prompt, '...')])) + curcode += line[3:] + else: + if curcode: + yield from do_insertions( + insertions, pylexer.get_tokens_unprocessed(curcode)) + curcode = '' + insertions = [] + if (line.startswith('Traceback (most recent call last):') or + re.match(' File "[^"]+", line \\d+\\n$', line)): + tb = 1 + curtb = line + tbindex = match.start() + elif line == 'KeyboardInterrupt\n': + yield match.start(), Name.Class, line + elif tb: + curtb += line + if not (line.startswith(' ') or line.strip() == '...'): + tb = 0 + for i, t, v in tblexer.get_tokens_unprocessed(curtb): + yield tbindex+i, t, v + curtb = '' + else: + yield match.start(), Generic.Output, line + if curcode: + yield from do_insertions(insertions, + pylexer.get_tokens_unprocessed(curcode)) + if curtb: + for i, t, v in tblexer.get_tokens_unprocessed(curtb): + yield tbindex+i, t, v + + +class PythonTracebackLexer(RegexLexer): + """ + For Python 3.x tracebacks, with support for chained exceptions. + + .. versionadded:: 1.0 + + .. versionchanged:: 2.5 + This is now the default ``PythonTracebackLexer``. It is still available + as the alias ``Python3TracebackLexer``. + """ + + name = 'Python Traceback' + aliases = ['pytb', 'py3tb'] + filenames = ['*.pytb', '*.py3tb'] + mimetypes = ['text/x-python-traceback', 'text/x-python3-traceback'] + + tokens = { + 'root': [ + (r'\n', Text), + (r'^Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'), + (r'^During handling of the above exception, another ' + r'exception occurred:\n\n', Generic.Traceback), + (r'^The above exception was the direct cause of the ' + r'following exception:\n\n', Generic.Traceback), + (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'), + (r'^.*\n', Other), + ], + 'intb': [ + (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)', + bygroups(Text, Name.Builtin, Text, Number, Text, Name, Text)), + (r'^( File )("[^"]+")(, line )(\d+)(\n)', + bygroups(Text, Name.Builtin, Text, Number, Text)), + (r'^( )(.+)(\n)', + bygroups(Text, using(PythonLexer), Text), 'markers'), + (r'^([ \t]*)(\.\.\.)(\n)', + bygroups(Text, Comment, Text)), # for doctests... + (r'^([^:]+)(: )(.+)(\n)', + bygroups(Generic.Error, Text, Name, Text), '#pop'), + (r'^([a-zA-Z_][\w.]*)(:?\n)', + bygroups(Generic.Error, Text), '#pop') + ], + 'markers': [ + # Either `PEP 657 ` + # error locations in Python 3.11+, or single-caret markers + # for syntax errors before that. + (r'^( {4,})([~^]+)(\n)', + bygroups(Text, Punctuation.Marker, Text), + '#pop'), + default('#pop'), + ], + } + + +Python3TracebackLexer = PythonTracebackLexer + + +class Python2TracebackLexer(RegexLexer): + """ + For Python tracebacks. + + .. versionadded:: 0.7 + + .. versionchanged:: 2.5 + This class has been renamed from ``PythonTracebackLexer``. + ``PythonTracebackLexer`` now refers to the Python 3 variant. + """ + + name = 'Python 2.x Traceback' + aliases = ['py2tb'] + filenames = ['*.py2tb'] + mimetypes = ['text/x-python2-traceback'] + + tokens = { + 'root': [ + # Cover both (most recent call last) and (innermost last) + # The optional ^C allows us to catch keyboard interrupt signals. + (r'^(\^C)?(Traceback.*\n)', + bygroups(Text, Generic.Traceback), 'intb'), + # SyntaxError starts with this. + (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'), + (r'^.*\n', Other), + ], + 'intb': [ + (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)', + bygroups(Text, Name.Builtin, Text, Number, Text, Name, Text)), + (r'^( File )("[^"]+")(, line )(\d+)(\n)', + bygroups(Text, Name.Builtin, Text, Number, Text)), + (r'^( )(.+)(\n)', + bygroups(Text, using(Python2Lexer), Text), 'marker'), + (r'^([ \t]*)(\.\.\.)(\n)', + bygroups(Text, Comment, Text)), # for doctests... + (r'^([^:]+)(: )(.+)(\n)', + bygroups(Generic.Error, Text, Name, Text), '#pop'), + (r'^([a-zA-Z_]\w*)(:?\n)', + bygroups(Generic.Error, Text), '#pop') + ], + 'marker': [ + # For syntax errors. + (r'( {4,})(\^)', bygroups(Text, Punctuation.Marker), '#pop'), + default('#pop'), + ], + } + + +class CythonLexer(RegexLexer): + """ + For Pyrex and `Cython `_ source code. + + .. versionadded:: 1.1 + """ + + name = 'Cython' + aliases = ['cython', 'pyx', 'pyrex'] + filenames = ['*.pyx', '*.pxd', '*.pxi'] + mimetypes = ['text/x-cython', 'application/x-cython'] + + tokens = { + 'root': [ + (r'\n', Text), + (r'^(\s*)("""(?:.|\n)*?""")', bygroups(Text, String.Doc)), + (r"^(\s*)('''(?:.|\n)*?''')", bygroups(Text, String.Doc)), + (r'[^\S\n]+', Text), + (r'#.*$', Comment), + (r'[]{}:(),;[]', Punctuation), + (r'\\\n', Text), + (r'\\', Text), + (r'(in|is|and|or|not)\b', Operator.Word), + (r'(<)([a-zA-Z0-9.?]+)(>)', + bygroups(Punctuation, Keyword.Type, Punctuation)), + (r'!=|==|<<|>>|[-~+/*%=<>&^|.?]', Operator), + (r'(from)(\d+)(<=)(\s+)(<)(\d+)(:)', + bygroups(Keyword, Number.Integer, Operator, Name, Operator, + Name, Punctuation)), + include('keywords'), + (r'(def|property)(\s+)', bygroups(Keyword, Text), 'funcname'), + (r'(cp?def)(\s+)', bygroups(Keyword, Text), 'cdef'), + # (should actually start a block with only cdefs) + (r'(cdef)(:)', bygroups(Keyword, Punctuation)), + (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'classname'), + (r'(from)(\s+)', bygroups(Keyword, Text), 'fromimport'), + (r'(c?import)(\s+)', bygroups(Keyword, Text), 'import'), + include('builtins'), + include('backtick'), + ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'), + ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, 'tsqs'), + ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'), + ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'), + ('[uU]?"""', String, combined('stringescape', 'tdqs')), + ("[uU]?'''", String, combined('stringescape', 'tsqs')), + ('[uU]?"', String, combined('stringescape', 'dqs')), + ("[uU]?'", String, combined('stringescape', 'sqs')), + include('name'), + include('numbers'), + ], + 'keywords': [ + (words(( + 'assert', 'async', 'await', 'break', 'by', 'continue', 'ctypedef', 'del', 'elif', + 'else', 'except', 'except?', 'exec', 'finally', 'for', 'fused', 'gil', + 'global', 'if', 'include', 'lambda', 'nogil', 'pass', 'print', + 'raise', 'return', 'try', 'while', 'yield', 'as', 'with'), suffix=r'\b'), + Keyword), + (r'(DEF|IF|ELIF|ELSE)\b', Comment.Preproc), + ], + 'builtins': [ + (words(( + '__import__', 'abs', 'all', 'any', 'apply', 'basestring', 'bin', 'bint', + 'bool', 'buffer', 'bytearray', 'bytes', 'callable', 'chr', + 'classmethod', 'cmp', 'coerce', 'compile', 'complex', 'delattr', + 'dict', 'dir', 'divmod', 'enumerate', 'eval', 'execfile', 'exit', + 'file', 'filter', 'float', 'frozenset', 'getattr', 'globals', + 'hasattr', 'hash', 'hex', 'id', 'input', 'int', 'intern', 'isinstance', + 'issubclass', 'iter', 'len', 'list', 'locals', 'long', 'map', 'max', + 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow', 'property', 'Py_ssize_t', + 'range', 'raw_input', 'reduce', 'reload', 'repr', 'reversed', + 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod', + 'str', 'sum', 'super', 'tuple', 'type', 'unichr', 'unicode', 'unsigned', + 'vars', 'xrange', 'zip'), prefix=r'(?`_, + a functional and object-oriented programming language + running on the CPython 3 VM. + + .. versionadded:: 1.6 + """ + name = 'dg' + aliases = ['dg'] + filenames = ['*.dg'] + mimetypes = ['text/x-dg'] + + tokens = { + 'root': [ + (r'\s+', Text), + (r'#.*?$', Comment.Single), + + (r'(?i)0b[01]+', Number.Bin), + (r'(?i)0o[0-7]+', Number.Oct), + (r'(?i)0x[0-9a-f]+', Number.Hex), + (r'(?i)[+-]?[0-9]+\.[0-9]+(e[+-]?[0-9]+)?j?', Number.Float), + (r'(?i)[+-]?[0-9]+e[+-]?\d+j?', Number.Float), + (r'(?i)[+-]?[0-9]+j?', Number.Integer), + + (r"(?i)(br|r?b?)'''", String, combined('stringescape', 'tsqs', 'string')), + (r'(?i)(br|r?b?)"""', String, combined('stringescape', 'tdqs', 'string')), + (r"(?i)(br|r?b?)'", String, combined('stringescape', 'sqs', 'string')), + (r'(?i)(br|r?b?)"', String, combined('stringescape', 'dqs', 'string')), + + (r"`\w+'*`", Operator), + (r'\b(and|in|is|or|where)\b', Operator.Word), + (r'[!$%&*+\-./:<-@\\^|~;,]+', Operator), + + (words(( + 'bool', 'bytearray', 'bytes', 'classmethod', 'complex', 'dict', 'dict\'', + 'float', 'frozenset', 'int', 'list', 'list\'', 'memoryview', 'object', + 'property', 'range', 'set', 'set\'', 'slice', 'staticmethod', 'str', + 'super', 'tuple', 'tuple\'', 'type'), + prefix=r'(? Date: Fri, 26 Apr 2024 13:51:20 -0400 Subject: [PATCH 13/32] using userwarning now --- pandas/_libs/tslib.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 49b035a5c076a..b9fa16fbd6b69 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -215,9 +215,9 @@ def format_array_from_datetime( res = ts.strftime(format) except (ValueError, NotImplementedError): # Catches errors and replaces result with None - mesg= "The following timestamp could not be converted to string:" +\ - f"[{ts}] Set errors='raise' to see the details" - warnings.warn(mesg,StrftimeErrorWarning, + mesg= "The following timestamp could not be converted to string: " +\ + f"[{ts}]. Set errors='raise' to see the details" + warnings.warn(mesg, UserWarning, stacklevel=find_stack_level()); res = None From fc0f003be3f3364020d94a492bb350f894e15101 Mon Sep 17 00:00:00 2001 From: summitwei Date: Fri, 26 Apr 2024 13:52:23 -0400 Subject: [PATCH 14/32] fixed virtualenvs lexer --- .../site-packages/pip/_vendor/pygments/lexers/python.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/~virtualenvs/panda-dev/lib/python3.10/site-packages/pip/_vendor/pygments/lexers/python.py b/~virtualenvs/panda-dev/lib/python3.10/site-packages/pip/_vendor/pygments/lexers/python.py index 0f8536fa2e50b..c9d3f55291953 100644 --- a/~virtualenvs/panda-dev/lib/python3.10/site-packages/pip/_vendor/pygments/lexers/python.py +++ b/~virtualenvs/panda-dev/lib/python3.10/site-packages/pip/_vendor/pygments/lexers/python.py @@ -246,7 +246,7 @@ def fstring_rules(ttype): 'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError', 'NotImplementedError', 'OSError', 'OverflowError', 'PendingDeprecationWarning', 'ReferenceError', 'ResourceWarning', - 'RuntimeError', 'RuntimeWarning', 'StopIteration', 'StrftimeErrorWarning', + 'RuntimeError', 'RuntimeWarning', 'StopIteration', 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError', 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError', 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError', @@ -508,7 +508,7 @@ def innerstring_rules(ttype): 'MemoryError', 'NameError', 'NotImplementedError', 'OSError', 'OverflowError', 'OverflowWarning', 'PendingDeprecationWarning', 'ReferenceError', 'RuntimeError', - 'RuntimeWarning', 'StandardError', 'StopIteration', 'StrftimeErrorWarning', + 'RuntimeWarning', 'StandardError', 'StopIteration', 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError', 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError', 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError', @@ -899,7 +899,7 @@ class CythonLexer(RegexLexer): 'MemoryError', 'NameError', 'NotImplemented', 'NotImplementedError', 'OSError', 'OverflowError', 'OverflowWarning', 'PendingDeprecationWarning', 'ReferenceError', 'RuntimeError', 'RuntimeWarning', 'StandardError', - 'StopIteration', 'StrftimeErrorWarning', 'SyntaxError', + 'StopIteration', 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError', 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError', 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError', From 135e1cf7ab9df9cc193a235ccbffd0b3c617cf6d Mon Sep 17 00:00:00 2001 From: summitwei Date: Fri, 26 Apr 2024 14:01:14 -0400 Subject: [PATCH 15/32] added custom warning --- pandas/_libs/tslib.pyx | 2 +- pandas/errors/__init__.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index b9fa16fbd6b69..daef7e0f33ef7 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -217,7 +217,7 @@ def format_array_from_datetime( # Catches errors and replaces result with None mesg= "The following timestamp could not be converted to string: " +\ f"[{ts}]. Set errors='raise' to see the details" - warnings.warn(mesg, UserWarning, + warnings.warn(mesg, StrftimeErrorWarning, stacklevel=find_stack_level()); res = None diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 402bbdb872a18..2f72db22c3469 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -686,6 +686,16 @@ class CategoricalConversionWarning(Warning): ... # CategoricalConversionWarning: One or more series with value labels... """ +class StrftimeErrorWarning(Warning): + """ + Warning is raised by ``Timestamp.strftime`` when a ValueError or + NotImplementedError occurs, under the ``errors='warn'`` parameter value. + + Examples + -------- + >>> dta = pd.DatetimeIndex(np.array(['-0020-01-01', '2020-01-02'], 'datetime64[s]')) + >>> dta.strftime("%y", errors='warn') + """ class LossySetitemError(Exception): """ From 07c586969829c36be0149de24b6555708e452160 Mon Sep 17 00:00:00 2001 From: summitwei Date: Fri, 26 Apr 2024 14:07:34 -0400 Subject: [PATCH 16/32] fixed custom warning stuff --- pandas/_libs/tslib.pyx | 2 ++ pandas/errors/__init__.py | 1 + 2 files changed, 3 insertions(+) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index daef7e0f33ef7..47e0d3b93c34c 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -86,6 +86,8 @@ from pandas._libs.tslibs.timestamps import Timestamp from pandas._libs.missing cimport checknull_with_nat_and_na from pandas._libs.tslibs.tzconversion cimport tz_localize_to_utc_single +from pandas.errors import StrftimeErrorWarning + def _test_parse_iso8601(ts: str): """ diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 2f72db22c3469..fc383d37d7563 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -758,6 +758,7 @@ class InvalidComparison(Exception): "PossiblePrecisionLoss", "PyperclipException", "PyperclipWindowsException", + "StrftimeErrorWarning", "SpecificationError", "UndefinedVariableError", "UnsortedIndexError", From fb9cfe0e17ecc458630d2c6eb19f2b860b638a6c Mon Sep 17 00:00:00 2001 From: summitwei Date: Fri, 26 Apr 2024 14:25:57 -0400 Subject: [PATCH 17/32] added StrftimeErrorWarning into test_errors.py --- pandas/tests/test_errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py index c5c4b234eb129..53758e4522457 100644 --- a/pandas/tests/test_errors.py +++ b/pandas/tests/test_errors.py @@ -38,6 +38,7 @@ "PossiblePrecisionLoss", "PyperclipException", "SpecificationError", + "StrftimeErrorWarning" "UnsortedIndexError", "UnsupportedFunctionCall", "ValueLabelTypeMismatch", From 802211825372abdb3822d354b899f1724074c8a6 Mon Sep 17 00:00:00 2001 From: summitwei Date: Fri, 26 Apr 2024 14:38:03 -0400 Subject: [PATCH 18/32] fixed linting issues --- doc/source/reference/testing.rst | 1 + pandas/errors/__init__.py | 6 ++++-- pandas/tests/arrays/test_datetimelike.py | 6 +++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/doc/source/reference/testing.rst b/doc/source/reference/testing.rst index 1f164d1aa98b4..343200c9a774f 100644 --- a/doc/source/reference/testing.rst +++ b/doc/source/reference/testing.rst @@ -58,6 +58,7 @@ Exceptions and warnings errors.PossiblePrecisionLoss errors.PyperclipException errors.PyperclipWindowsException + errors.StrftimeErrorWarning errors.SpecificationError errors.UndefinedVariableError errors.UnsortedIndexError diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index fc383d37d7563..bcbc2dfd97ca5 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -686,6 +686,7 @@ class CategoricalConversionWarning(Warning): ... # CategoricalConversionWarning: One or more series with value labels... """ + class StrftimeErrorWarning(Warning): """ Warning is raised by ``Timestamp.strftime`` when a ValueError or @@ -693,10 +694,11 @@ class StrftimeErrorWarning(Warning): Examples -------- - >>> dta = pd.DatetimeIndex(np.array(['-0020-01-01', '2020-01-02'], 'datetime64[s]')) - >>> dta.strftime("%y", errors='warn') + >>> dta = pd.DatetimeIndex(np.array(["-0020-01-01", "2020-01-02"], "datetime64[s]")) + >>> dta.strftime("%y", errors="warn") """ + class LossySetitemError(Exception): """ Raised when trying to do a __setitem__ on an np.ndarray that is not lossless. diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index c287034277813..0a0c0b1ef71d1 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -1,11 +1,11 @@ from __future__ import annotations +import datetime import re import warnings import numpy as np import pytest -import datetime from pandas._libs import ( NaT, @@ -920,7 +920,6 @@ def test_strftime_err(self): else: expected = pd.Index(["20", "20"], dtype="object") - # with pytest.raises(ValueError): # result = arr.strftime("%y", "raise") @@ -931,7 +930,8 @@ def test_strftime_err(self): # expected = pd.Index([None, "20"], dtype="object") tm.assert_index_equal(result, expected) - # with tm.assert_produces_warning(TODO): + # with tm.assert_produces_warning(StrftimeErrorWarning): + # result = arr.strftime("%y", "warn") result = arr.strftime("%y", "warn") # expected = pd.Index([None, "20"], dtype="object") From 1698a64a911f03300641c1ce95c4adeb30ee6a52 Mon Sep 17 00:00:00 2001 From: summitwei Date: Fri, 26 Apr 2024 15:14:36 -0400 Subject: [PATCH 19/32] fixed test_strftime_err --- pandas/tests/arrays/test_datetimelike.py | 18 +++++++++++++++--- pandas/tests/test_errors.py | 2 +- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 0a0c0b1ef71d1..4ee4b014fdacc 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -14,6 +14,7 @@ ) from pandas._libs.tslibs import to_offset from pandas.compat.numpy import np_version_gt2 +from pandas.errors import StrftimeErrorWarning from pandas.core.dtypes.dtypes import PeriodDtype @@ -920,6 +921,10 @@ def test_strftime_err(self): else: expected = pd.Index(["20", "20"], dtype="object") + if windowFlag: + with tm.assert_produces_warning(StrftimeErrorWarning): + result = arr.strftime("%y", "warn") + # with pytest.raises(ValueError): # result = arr.strftime("%y", "raise") @@ -930,13 +935,20 @@ def test_strftime_err(self): # expected = pd.Index([None, "20"], dtype="object") tm.assert_index_equal(result, expected) - # with tm.assert_produces_warning(StrftimeErrorWarning): - # result = arr.strftime("%y", "warn") - result = arr.strftime("%y", "warn") # expected = pd.Index([None, "20"], dtype="object") tm.assert_index_equal(result, expected) + arr2 = DatetimeIndex(np.array(["-0020-01-01", "2020-01-02"], "datetime64[s]")) + expected = pd.Index([None, "20"], dtype="object") + + with pytest.raises(NotImplementedError): + result = arr2.strftime("%y", "raise") + with tm.assert_produces_warning(StrftimeErrorWarning): + result = arr2.strftime("%y", "warn") + result = arr2.strftime("%y", "ignore") + tm.assert_index_equal(result, expected) + def test_strftime_nat(self): # GH 29578 arr = DatetimeIndex(["2019-01-01", NaT])._data diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py index 53758e4522457..7fbb9752acf36 100644 --- a/pandas/tests/test_errors.py +++ b/pandas/tests/test_errors.py @@ -38,7 +38,7 @@ "PossiblePrecisionLoss", "PyperclipException", "SpecificationError", - "StrftimeErrorWarning" + "StrftimeErrorWarning", "UnsortedIndexError", "UnsupportedFunctionCall", "ValueLabelTypeMismatch", From d836207621f7853483f6717549b87dee708a4e20 Mon Sep 17 00:00:00 2001 From: summitwei Date: Fri, 26 Apr 2024 15:16:22 -0400 Subject: [PATCH 20/32] removed lexer file --- .../pip/_vendor/pygments/lexers/python.py | 1188 ----------------- 1 file changed, 1188 deletions(-) delete mode 100644 ~virtualenvs/panda-dev/lib/python3.10/site-packages/pip/_vendor/pygments/lexers/python.py diff --git a/~virtualenvs/panda-dev/lib/python3.10/site-packages/pip/_vendor/pygments/lexers/python.py b/~virtualenvs/panda-dev/lib/python3.10/site-packages/pip/_vendor/pygments/lexers/python.py deleted file mode 100644 index c9d3f55291953..0000000000000 --- a/~virtualenvs/panda-dev/lib/python3.10/site-packages/pip/_vendor/pygments/lexers/python.py +++ /dev/null @@ -1,1188 +0,0 @@ -""" - pygments.lexers.python - ~~~~~~~~~~~~~~~~~~~~~~ - - Lexers for Python and related languages. - - :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS. - :license: BSD, see LICENSE for details. -""" - -import re -import keyword - -from pip._vendor.pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \ - default, words, combined, do_insertions, this -from pip._vendor.pygments.util import get_bool_opt, shebang_matches -from pip._vendor.pygments.token import Text, Comment, Operator, Keyword, Name, String, \ - Number, Punctuation, Generic, Other, Error -from pip._vendor.pygments import unistring as uni - -__all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer', - 'Python2Lexer', 'Python2TracebackLexer', - 'CythonLexer', 'DgLexer', 'NumPyLexer'] - -line_re = re.compile('.*?\n') - - -class PythonLexer(RegexLexer): - """ - For `Python `_ source code (version 3.x). - - .. versionadded:: 0.10 - - .. versionchanged:: 2.5 - This is now the default ``PythonLexer``. It is still available as the - alias ``Python3Lexer``. - """ - - name = 'Python' - aliases = ['python', 'py', 'sage', 'python3', 'py3'] - filenames = [ - '*.py', - '*.pyw', - # Jython - '*.jy', - # Sage - '*.sage', - # SCons - '*.sc', - 'SConstruct', - 'SConscript', - # Skylark/Starlark (used by Bazel, Buck, and Pants) - '*.bzl', - 'BUCK', - 'BUILD', - 'BUILD.bazel', - 'WORKSPACE', - # Twisted Application infrastructure - '*.tac', - ] - mimetypes = ['text/x-python', 'application/x-python', - 'text/x-python3', 'application/x-python3'] - - flags = re.MULTILINE | re.UNICODE - - uni_name = "[%s][%s]*" % (uni.xid_start, uni.xid_continue) - - def innerstring_rules(ttype): - return [ - # the old style '%s' % (...) string formatting (still valid in Py3) - (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' - '[hlL]?[E-GXc-giorsaux%]', String.Interpol), - # the new style '{}'.format(...) string formatting - (r'\{' - r'((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name - r'(\![sra])?' # conversion - r'(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?' - r'\}', String.Interpol), - - # backslashes, quotes and formatting signs must be parsed one at a time - (r'[^\\\'"%{\n]+', ttype), - (r'[\'"\\]', ttype), - # unhandled string formatting sign - (r'%|(\{{1,2})', ttype) - # newlines are an error (use "nl" state) - ] - - def fstring_rules(ttype): - return [ - # Assuming that a '}' is the closing brace after format specifier. - # Sadly, this means that we won't detect syntax error. But it's - # more important to parse correct syntax correctly, than to - # highlight invalid syntax. - (r'\}', String.Interpol), - (r'\{', String.Interpol, 'expr-inside-fstring'), - # backslashes, quotes and formatting signs must be parsed one at a time - (r'[^\\\'"{}\n]+', ttype), - (r'[\'"\\]', ttype), - # newlines are an error (use "nl" state) - ] - - tokens = { - 'root': [ - (r'\n', Text), - (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")', - bygroups(Text, String.Affix, String.Doc)), - (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')", - bygroups(Text, String.Affix, String.Doc)), - (r'\A#!.+$', Comment.Hashbang), - (r'#.*$', Comment.Single), - (r'\\\n', Text), - (r'\\', Text), - include('keywords'), - include('soft-keywords'), - (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'), - (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'), - (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), - 'fromimport'), - (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), - 'import'), - include('expr'), - ], - 'expr': [ - # raw f-strings - ('(?i)(rf|fr)(""")', - bygroups(String.Affix, String.Double), - combined('rfstringescape', 'tdqf')), - ("(?i)(rf|fr)(''')", - bygroups(String.Affix, String.Single), - combined('rfstringescape', 'tsqf')), - ('(?i)(rf|fr)(")', - bygroups(String.Affix, String.Double), - combined('rfstringescape', 'dqf')), - ("(?i)(rf|fr)(')", - bygroups(String.Affix, String.Single), - combined('rfstringescape', 'sqf')), - # non-raw f-strings - ('([fF])(""")', bygroups(String.Affix, String.Double), - combined('fstringescape', 'tdqf')), - ("([fF])(''')", bygroups(String.Affix, String.Single), - combined('fstringescape', 'tsqf')), - ('([fF])(")', bygroups(String.Affix, String.Double), - combined('fstringescape', 'dqf')), - ("([fF])(')", bygroups(String.Affix, String.Single), - combined('fstringescape', 'sqf')), - # raw strings - ('(?i)(rb|br|r)(""")', - bygroups(String.Affix, String.Double), 'tdqs'), - ("(?i)(rb|br|r)(''')", - bygroups(String.Affix, String.Single), 'tsqs'), - ('(?i)(rb|br|r)(")', - bygroups(String.Affix, String.Double), 'dqs'), - ("(?i)(rb|br|r)(')", - bygroups(String.Affix, String.Single), 'sqs'), - # non-raw strings - ('([uUbB]?)(""")', bygroups(String.Affix, String.Double), - combined('stringescape', 'tdqs')), - ("([uUbB]?)(''')", bygroups(String.Affix, String.Single), - combined('stringescape', 'tsqs')), - ('([uUbB]?)(")', bygroups(String.Affix, String.Double), - combined('stringescape', 'dqs')), - ("([uUbB]?)(')", bygroups(String.Affix, String.Single), - combined('stringescape', 'sqs')), - (r'[^\S\n]+', Text), - include('numbers'), - (r'!=|==|<<|>>|:=|[-~+/*%=<>&^|.]', Operator), - (r'[]{}:(),;[]', Punctuation), - (r'(in|is|and|or|not)\b', Operator.Word), - include('expr-keywords'), - include('builtins'), - include('magicfuncs'), - include('magicvars'), - include('name'), - ], - 'expr-inside-fstring': [ - (r'[{([]', Punctuation, 'expr-inside-fstring-inner'), - # without format specifier - (r'(=\s*)?' # debug (https://bugs.python.org/issue36817) - r'(\![sraf])?' # conversion - r'\}', String.Interpol, '#pop'), - # with format specifier - # we'll catch the remaining '}' in the outer scope - (r'(=\s*)?' # debug (https://bugs.python.org/issue36817) - r'(\![sraf])?' # conversion - r':', String.Interpol, '#pop'), - (r'\s+', Text), # allow new lines - include('expr'), - ], - 'expr-inside-fstring-inner': [ - (r'[{([]', Punctuation, 'expr-inside-fstring-inner'), - (r'[])}]', Punctuation, '#pop'), - (r'\s+', Text), # allow new lines - include('expr'), - ], - 'expr-keywords': [ - # Based on https://docs.python.org/3/reference/expressions.html - (words(( - 'async for', 'await', 'else', 'for', 'if', 'lambda', - 'yield', 'yield from'), suffix=r'\b'), - Keyword), - (words(('True', 'False', 'None'), suffix=r'\b'), Keyword.Constant), - ], - 'keywords': [ - (words(( - 'assert', 'async', 'await', 'break', 'continue', 'del', 'elif', - 'else', 'except', 'finally', 'for', 'global', 'if', 'lambda', - 'pass', 'raise', 'nonlocal', 'return', 'try', 'while', 'yield', - 'yield from', 'as', 'with'), suffix=r'\b'), - Keyword), - (words(('True', 'False', 'None'), suffix=r'\b'), Keyword.Constant), - ], - 'soft-keywords': [ - # `match`, `case` and `_` soft keywords - (r'(^[ \t]*)' # at beginning of line + possible indentation - r'(match|case)\b' # a possible keyword - r'(?![ \t]*(?:' # not followed by... - r'[:,;=^&|@~)\]}]|(?:' + # characters and keywords that mean this isn't - r'|'.join(keyword.kwlist) + r')\b))', # pattern matching - bygroups(Text, Keyword), 'soft-keywords-inner'), - ], - 'soft-keywords-inner': [ - # optional `_` keyword - (r'(\s+)([^\n_]*)(_\b)', bygroups(Text, using(this), Keyword)), - default('#pop') - ], - 'builtins': [ - (words(( - '__import__', 'abs', 'all', 'any', 'bin', 'bool', 'bytearray', - 'breakpoint', 'bytes', 'chr', 'classmethod', 'compile', 'complex', - 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval', 'filter', - 'float', 'format', 'frozenset', 'getattr', 'globals', 'hasattr', - 'hash', 'hex', 'id', 'input', 'int', 'isinstance', 'issubclass', - 'iter', 'len', 'list', 'locals', 'map', 'max', 'memoryview', - 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow', 'print', - 'property', 'range', 'repr', 'reversed', 'round', 'set', 'setattr', - 'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super', 'tuple', - 'type', 'vars', 'zip'), prefix=r'(?`_ source code. - - .. versionchanged:: 2.5 - This class has been renamed from ``PythonLexer``. ``PythonLexer`` now - refers to the Python 3 variant. File name patterns like ``*.py`` have - been moved to Python 3 as well. - """ - - name = 'Python 2.x' - aliases = ['python2', 'py2'] - filenames = [] # now taken over by PythonLexer (3.x) - mimetypes = ['text/x-python2', 'application/x-python2'] - - def innerstring_rules(ttype): - return [ - # the old style '%s' % (...) string formatting - (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' - '[hlL]?[E-GXc-giorsux%]', String.Interpol), - # backslashes, quotes and formatting signs must be parsed one at a time - (r'[^\\\'"%\n]+', ttype), - (r'[\'"\\]', ttype), - # unhandled string formatting sign - (r'%', ttype), - # newlines are an error (use "nl" state) - ] - - tokens = { - 'root': [ - (r'\n', Text), - (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")', - bygroups(Text, String.Affix, String.Doc)), - (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')", - bygroups(Text, String.Affix, String.Doc)), - (r'[^\S\n]+', Text), - (r'\A#!.+$', Comment.Hashbang), - (r'#.*$', Comment.Single), - (r'[]{}:(),;[]', Punctuation), - (r'\\\n', Text), - (r'\\', Text), - (r'(in|is|and|or|not)\b', Operator.Word), - (r'!=|==|<<|>>|[-~+/*%=<>&^|.]', Operator), - include('keywords'), - (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'), - (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'), - (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), - 'fromimport'), - (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), - 'import'), - include('builtins'), - include('magicfuncs'), - include('magicvars'), - include('backtick'), - ('([rR]|[uUbB][rR]|[rR][uUbB])(""")', - bygroups(String.Affix, String.Double), 'tdqs'), - ("([rR]|[uUbB][rR]|[rR][uUbB])(''')", - bygroups(String.Affix, String.Single), 'tsqs'), - ('([rR]|[uUbB][rR]|[rR][uUbB])(")', - bygroups(String.Affix, String.Double), 'dqs'), - ("([rR]|[uUbB][rR]|[rR][uUbB])(')", - bygroups(String.Affix, String.Single), 'sqs'), - ('([uUbB]?)(""")', bygroups(String.Affix, String.Double), - combined('stringescape', 'tdqs')), - ("([uUbB]?)(''')", bygroups(String.Affix, String.Single), - combined('stringescape', 'tsqs')), - ('([uUbB]?)(")', bygroups(String.Affix, String.Double), - combined('stringescape', 'dqs')), - ("([uUbB]?)(')", bygroups(String.Affix, String.Single), - combined('stringescape', 'sqs')), - include('name'), - include('numbers'), - ], - 'keywords': [ - (words(( - 'assert', 'break', 'continue', 'del', 'elif', 'else', 'except', - 'exec', 'finally', 'for', 'global', 'if', 'lambda', 'pass', - 'print', 'raise', 'return', 'try', 'while', 'yield', - 'yield from', 'as', 'with'), suffix=r'\b'), - Keyword), - ], - 'builtins': [ - (words(( - '__import__', 'abs', 'all', 'any', 'apply', 'basestring', 'bin', - 'bool', 'buffer', 'bytearray', 'bytes', 'callable', 'chr', 'classmethod', - 'cmp', 'coerce', 'compile', 'complex', 'delattr', 'dict', 'dir', 'divmod', - 'enumerate', 'eval', 'execfile', 'exit', 'file', 'filter', 'float', - 'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'hex', 'id', - 'input', 'int', 'intern', 'isinstance', 'issubclass', 'iter', 'len', - 'list', 'locals', 'long', 'map', 'max', 'min', 'next', 'object', - 'oct', 'open', 'ord', 'pow', 'property', 'range', 'raw_input', 'reduce', - 'reload', 'repr', 'reversed', 'round', 'set', 'setattr', 'slice', - 'sorted', 'staticmethod', 'str', 'sum', 'super', 'tuple', 'type', - 'unichr', 'unicode', 'vars', 'xrange', 'zip'), - prefix=r'(?>> a = 'foo' - >>> print a - foo - >>> 1 / 0 - Traceback (most recent call last): - File "", line 1, in - ZeroDivisionError: integer division or modulo by zero - - Additional options: - - `python3` - Use Python 3 lexer for code. Default is ``True``. - - .. versionadded:: 1.0 - .. versionchanged:: 2.5 - Now defaults to ``True``. - """ - name = 'Python console session' - aliases = ['pycon'] - mimetypes = ['text/x-python-doctest'] - - def __init__(self, **options): - self.python3 = get_bool_opt(options, 'python3', True) - Lexer.__init__(self, **options) - - def get_tokens_unprocessed(self, text): - if self.python3: - pylexer = PythonLexer(**self.options) - tblexer = PythonTracebackLexer(**self.options) - else: - pylexer = Python2Lexer(**self.options) - tblexer = Python2TracebackLexer(**self.options) - - curcode = '' - insertions = [] - curtb = '' - tbindex = 0 - tb = 0 - for match in line_re.finditer(text): - line = match.group() - if line.startswith('>>> ') or line.startswith('... '): - tb = 0 - insertions.append((len(curcode), - [(0, Generic.Prompt, line[:4])])) - curcode += line[4:] - elif line.rstrip() == '...' and not tb: - # only a new >>> prompt can end an exception block - # otherwise an ellipsis in place of the traceback frames - # will be mishandled - insertions.append((len(curcode), - [(0, Generic.Prompt, '...')])) - curcode += line[3:] - else: - if curcode: - yield from do_insertions( - insertions, pylexer.get_tokens_unprocessed(curcode)) - curcode = '' - insertions = [] - if (line.startswith('Traceback (most recent call last):') or - re.match(' File "[^"]+", line \\d+\\n$', line)): - tb = 1 - curtb = line - tbindex = match.start() - elif line == 'KeyboardInterrupt\n': - yield match.start(), Name.Class, line - elif tb: - curtb += line - if not (line.startswith(' ') or line.strip() == '...'): - tb = 0 - for i, t, v in tblexer.get_tokens_unprocessed(curtb): - yield tbindex+i, t, v - curtb = '' - else: - yield match.start(), Generic.Output, line - if curcode: - yield from do_insertions(insertions, - pylexer.get_tokens_unprocessed(curcode)) - if curtb: - for i, t, v in tblexer.get_tokens_unprocessed(curtb): - yield tbindex+i, t, v - - -class PythonTracebackLexer(RegexLexer): - """ - For Python 3.x tracebacks, with support for chained exceptions. - - .. versionadded:: 1.0 - - .. versionchanged:: 2.5 - This is now the default ``PythonTracebackLexer``. It is still available - as the alias ``Python3TracebackLexer``. - """ - - name = 'Python Traceback' - aliases = ['pytb', 'py3tb'] - filenames = ['*.pytb', '*.py3tb'] - mimetypes = ['text/x-python-traceback', 'text/x-python3-traceback'] - - tokens = { - 'root': [ - (r'\n', Text), - (r'^Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'), - (r'^During handling of the above exception, another ' - r'exception occurred:\n\n', Generic.Traceback), - (r'^The above exception was the direct cause of the ' - r'following exception:\n\n', Generic.Traceback), - (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'), - (r'^.*\n', Other), - ], - 'intb': [ - (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)', - bygroups(Text, Name.Builtin, Text, Number, Text, Name, Text)), - (r'^( File )("[^"]+")(, line )(\d+)(\n)', - bygroups(Text, Name.Builtin, Text, Number, Text)), - (r'^( )(.+)(\n)', - bygroups(Text, using(PythonLexer), Text), 'markers'), - (r'^([ \t]*)(\.\.\.)(\n)', - bygroups(Text, Comment, Text)), # for doctests... - (r'^([^:]+)(: )(.+)(\n)', - bygroups(Generic.Error, Text, Name, Text), '#pop'), - (r'^([a-zA-Z_][\w.]*)(:?\n)', - bygroups(Generic.Error, Text), '#pop') - ], - 'markers': [ - # Either `PEP 657 ` - # error locations in Python 3.11+, or single-caret markers - # for syntax errors before that. - (r'^( {4,})([~^]+)(\n)', - bygroups(Text, Punctuation.Marker, Text), - '#pop'), - default('#pop'), - ], - } - - -Python3TracebackLexer = PythonTracebackLexer - - -class Python2TracebackLexer(RegexLexer): - """ - For Python tracebacks. - - .. versionadded:: 0.7 - - .. versionchanged:: 2.5 - This class has been renamed from ``PythonTracebackLexer``. - ``PythonTracebackLexer`` now refers to the Python 3 variant. - """ - - name = 'Python 2.x Traceback' - aliases = ['py2tb'] - filenames = ['*.py2tb'] - mimetypes = ['text/x-python2-traceback'] - - tokens = { - 'root': [ - # Cover both (most recent call last) and (innermost last) - # The optional ^C allows us to catch keyboard interrupt signals. - (r'^(\^C)?(Traceback.*\n)', - bygroups(Text, Generic.Traceback), 'intb'), - # SyntaxError starts with this. - (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'), - (r'^.*\n', Other), - ], - 'intb': [ - (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)', - bygroups(Text, Name.Builtin, Text, Number, Text, Name, Text)), - (r'^( File )("[^"]+")(, line )(\d+)(\n)', - bygroups(Text, Name.Builtin, Text, Number, Text)), - (r'^( )(.+)(\n)', - bygroups(Text, using(Python2Lexer), Text), 'marker'), - (r'^([ \t]*)(\.\.\.)(\n)', - bygroups(Text, Comment, Text)), # for doctests... - (r'^([^:]+)(: )(.+)(\n)', - bygroups(Generic.Error, Text, Name, Text), '#pop'), - (r'^([a-zA-Z_]\w*)(:?\n)', - bygroups(Generic.Error, Text), '#pop') - ], - 'marker': [ - # For syntax errors. - (r'( {4,})(\^)', bygroups(Text, Punctuation.Marker), '#pop'), - default('#pop'), - ], - } - - -class CythonLexer(RegexLexer): - """ - For Pyrex and `Cython `_ source code. - - .. versionadded:: 1.1 - """ - - name = 'Cython' - aliases = ['cython', 'pyx', 'pyrex'] - filenames = ['*.pyx', '*.pxd', '*.pxi'] - mimetypes = ['text/x-cython', 'application/x-cython'] - - tokens = { - 'root': [ - (r'\n', Text), - (r'^(\s*)("""(?:.|\n)*?""")', bygroups(Text, String.Doc)), - (r"^(\s*)('''(?:.|\n)*?''')", bygroups(Text, String.Doc)), - (r'[^\S\n]+', Text), - (r'#.*$', Comment), - (r'[]{}:(),;[]', Punctuation), - (r'\\\n', Text), - (r'\\', Text), - (r'(in|is|and|or|not)\b', Operator.Word), - (r'(<)([a-zA-Z0-9.?]+)(>)', - bygroups(Punctuation, Keyword.Type, Punctuation)), - (r'!=|==|<<|>>|[-~+/*%=<>&^|.?]', Operator), - (r'(from)(\d+)(<=)(\s+)(<)(\d+)(:)', - bygroups(Keyword, Number.Integer, Operator, Name, Operator, - Name, Punctuation)), - include('keywords'), - (r'(def|property)(\s+)', bygroups(Keyword, Text), 'funcname'), - (r'(cp?def)(\s+)', bygroups(Keyword, Text), 'cdef'), - # (should actually start a block with only cdefs) - (r'(cdef)(:)', bygroups(Keyword, Punctuation)), - (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'classname'), - (r'(from)(\s+)', bygroups(Keyword, Text), 'fromimport'), - (r'(c?import)(\s+)', bygroups(Keyword, Text), 'import'), - include('builtins'), - include('backtick'), - ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'), - ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, 'tsqs'), - ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'), - ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'), - ('[uU]?"""', String, combined('stringescape', 'tdqs')), - ("[uU]?'''", String, combined('stringescape', 'tsqs')), - ('[uU]?"', String, combined('stringescape', 'dqs')), - ("[uU]?'", String, combined('stringescape', 'sqs')), - include('name'), - include('numbers'), - ], - 'keywords': [ - (words(( - 'assert', 'async', 'await', 'break', 'by', 'continue', 'ctypedef', 'del', 'elif', - 'else', 'except', 'except?', 'exec', 'finally', 'for', 'fused', 'gil', - 'global', 'if', 'include', 'lambda', 'nogil', 'pass', 'print', - 'raise', 'return', 'try', 'while', 'yield', 'as', 'with'), suffix=r'\b'), - Keyword), - (r'(DEF|IF|ELIF|ELSE)\b', Comment.Preproc), - ], - 'builtins': [ - (words(( - '__import__', 'abs', 'all', 'any', 'apply', 'basestring', 'bin', 'bint', - 'bool', 'buffer', 'bytearray', 'bytes', 'callable', 'chr', - 'classmethod', 'cmp', 'coerce', 'compile', 'complex', 'delattr', - 'dict', 'dir', 'divmod', 'enumerate', 'eval', 'execfile', 'exit', - 'file', 'filter', 'float', 'frozenset', 'getattr', 'globals', - 'hasattr', 'hash', 'hex', 'id', 'input', 'int', 'intern', 'isinstance', - 'issubclass', 'iter', 'len', 'list', 'locals', 'long', 'map', 'max', - 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow', 'property', 'Py_ssize_t', - 'range', 'raw_input', 'reduce', 'reload', 'repr', 'reversed', - 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod', - 'str', 'sum', 'super', 'tuple', 'type', 'unichr', 'unicode', 'unsigned', - 'vars', 'xrange', 'zip'), prefix=r'(?`_, - a functional and object-oriented programming language - running on the CPython 3 VM. - - .. versionadded:: 1.6 - """ - name = 'dg' - aliases = ['dg'] - filenames = ['*.dg'] - mimetypes = ['text/x-dg'] - - tokens = { - 'root': [ - (r'\s+', Text), - (r'#.*?$', Comment.Single), - - (r'(?i)0b[01]+', Number.Bin), - (r'(?i)0o[0-7]+', Number.Oct), - (r'(?i)0x[0-9a-f]+', Number.Hex), - (r'(?i)[+-]?[0-9]+\.[0-9]+(e[+-]?[0-9]+)?j?', Number.Float), - (r'(?i)[+-]?[0-9]+e[+-]?\d+j?', Number.Float), - (r'(?i)[+-]?[0-9]+j?', Number.Integer), - - (r"(?i)(br|r?b?)'''", String, combined('stringescape', 'tsqs', 'string')), - (r'(?i)(br|r?b?)"""', String, combined('stringescape', 'tdqs', 'string')), - (r"(?i)(br|r?b?)'", String, combined('stringescape', 'sqs', 'string')), - (r'(?i)(br|r?b?)"', String, combined('stringescape', 'dqs', 'string')), - - (r"`\w+'*`", Operator), - (r'\b(and|in|is|or|where)\b', Operator.Word), - (r'[!$%&*+\-./:<-@\\^|~;,]+', Operator), - - (words(( - 'bool', 'bytearray', 'bytes', 'classmethod', 'complex', 'dict', 'dict\'', - 'float', 'frozenset', 'int', 'list', 'list\'', 'memoryview', 'object', - 'property', 'range', 'set', 'set\'', 'slice', 'staticmethod', 'str', - 'super', 'tuple', 'tuple\'', 'type'), - prefix=r'(? Date: Fri, 26 Apr 2024 15:17:51 -0400 Subject: [PATCH 21/32] i know how the alphabet works trust me --- doc/source/reference/testing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/reference/testing.rst b/doc/source/reference/testing.rst index 343200c9a774f..7513ab6afa6f2 100644 --- a/doc/source/reference/testing.rst +++ b/doc/source/reference/testing.rst @@ -58,8 +58,8 @@ Exceptions and warnings errors.PossiblePrecisionLoss errors.PyperclipException errors.PyperclipWindowsException - errors.StrftimeErrorWarning errors.SpecificationError + errors.StrftimeErrorWarning errors.UndefinedVariableError errors.UnsortedIndexError errors.UnsupportedFunctionCall From ee882c251e869a9d3553ee9e1f6b172bce14e57d Mon Sep 17 00:00:00 2001 From: summitwei Date: Fri, 26 Apr 2024 15:18:31 -0400 Subject: [PATCH 22/32] okay i lied before but now i really know how the alphabet works --- pandas/errors/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index bcbc2dfd97ca5..9db159c6b3e34 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -760,8 +760,8 @@ class InvalidComparison(Exception): "PossiblePrecisionLoss", "PyperclipException", "PyperclipWindowsException", - "StrftimeErrorWarning", "SpecificationError", + "StrftimeErrorWarning", "UndefinedVariableError", "UnsortedIndexError", "UnsupportedFunctionCall", From 7de84d515ee42158af77248b3cee91a67be0b9a9 Mon Sep 17 00:00:00 2001 From: pyrevoid15 Date: Fri, 26 Apr 2024 15:35:12 -0400 Subject: [PATCH 23/32] Simplified windowFlag section --- pandas/tests/arrays/test_datetimelike.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 4ee4b014fdacc..cd98c0907bebc 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -913,14 +913,12 @@ def test_strftime_err(self): windowFlag = False try: - datetime.datetime(1820, 1, 1) + _ = datetime.datetime(1820, 1, 1) + expected = pd.Index(["20", "20"], dtype="object") except ValueError: windowFlag = True - if windowFlag: expected = pd.Index([None, "20"], dtype="object") - else: - expected = pd.Index(["20", "20"], dtype="object") - + if windowFlag: with tm.assert_produces_warning(StrftimeErrorWarning): result = arr.strftime("%y", "warn") From cba0d57746c69a91522fd63b4499214377e304a0 Mon Sep 17 00:00:00 2001 From: Stanley Ou Date: Fri, 26 Apr 2024 15:50:18 -0400 Subject: [PATCH 24/32] Fixed tests for error handling, fixed windowFlag --- pandas/tests/arrays/test_datetimelike.py | 35 ++++++++++++++++-------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 4ee4b014fdacc..22168c9d6dcd8 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -1,6 +1,5 @@ from __future__ import annotations -import datetime import re import warnings @@ -913,9 +912,11 @@ def test_strftime_err(self): windowFlag = False try: - datetime.datetime(1820, 1, 1) + _ = arr[0].strftime("%y") except ValueError: windowFlag = True + expected = pd.Index([None, "20"], dtype="object") + if windowFlag: expected = pd.Index([None, "20"], dtype="object") else: @@ -925,19 +926,29 @@ def test_strftime_err(self): with tm.assert_produces_warning(StrftimeErrorWarning): result = arr.strftime("%y", "warn") - # with pytest.raises(ValueError): - # result = arr.strftime("%y", "raise") + with pytest.raises( + ValueError, match="format %y requires year >= 1900 on Windows" + ): + result = arr.strftime("%y", "raise") - # with pytest.raises(ValueError): - # result = arr[0].strftime("%y") + with pytest.raises( + ValueError, match="format %y requires year >= 1900 on Windows" + ): + result = arr[0].strftime("%y") - result = arr.strftime("%y", "ignore") - # expected = pd.Index([None, "20"], dtype="object") - tm.assert_index_equal(result, expected) + result = arr.strftime("%y", "ignore") + tm.assert_index_equal(result, expected) - result = arr.strftime("%y", "warn") - # expected = pd.Index([None, "20"], dtype="object") - tm.assert_index_equal(result, expected) + else: + # "1820-01-01" should be converted properly if not on Windows. + result = arr.strftime("%y", "warn") + tm.assert_index_equal(result, expected) + + result = arr.strftime("%y", "raise") + tm.assert_index_equal(result, expected) + + result = arr.strftime("%y", "ignore") + tm.assert_index_equal(result, expected) arr2 = DatetimeIndex(np.array(["-0020-01-01", "2020-01-02"], "datetime64[s]")) expected = pd.Index([None, "20"], dtype="object") From 123d5d46b0ea6a1511a621a0254c2bc1f369d2e7 Mon Sep 17 00:00:00 2001 From: Stanley Ou Date: Fri, 26 Apr 2024 16:23:05 -0400 Subject: [PATCH 25/32] Added type annotations for 'errors' parameter --- pandas/_libs/tslib.pyx | 11 ++++++----- pandas/core/arrays/datetimelike.py | 13 ++++++++++--- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 47e0d3b93c34c..6c9e1164b352e 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -122,7 +122,7 @@ def format_array_from_datetime( str format=None, na_rep: str | float = "NaT", NPY_DATETIMEUNIT reso=NPY_FR_ns, - str errors=None, + str errors="raise", ) -> np.ndarray: """ return a np object array of the string formatted values @@ -136,6 +136,7 @@ def format_array_from_datetime( na_rep : optional, default is None a nat format reso : NPY_DATETIMEUNIT, default NPY_FR_ns + errors : str, "raise" (default), "ignore", or "warn" Returns ------- @@ -217,10 +218,10 @@ def format_array_from_datetime( res = ts.strftime(format) except (ValueError, NotImplementedError): # Catches errors and replaces result with None - mesg= "The following timestamp could not be converted to string: " +\ - f"[{ts}]. Set errors='raise' to see the details" - warnings.warn(mesg, StrftimeErrorWarning, - stacklevel=find_stack_level()); + mesg= "The following timestamp could not be converted to string:" +\ + f" [{ts}]. Set errors='raise' to see the details" + warnings.warn( + mesg, StrftimeErrorWarning, stacklevel=find_stack_level()) res = None elif (errors == "ignore"): diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 11129c57691fb..8f64319ca53da 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -334,7 +334,7 @@ def asi8(self) -> npt.NDArray[np.int64]: # Rendering Methods def _format_native_types( - self, *, na_rep: str | float = "NaT", date_format=None, errors = 'raise' + self, *, na_rep: str | float = "NaT", date_format=None, errors="raise" ) -> npt.NDArray[np.object_]: """ Helper method for astype when converting to strings. @@ -1732,7 +1732,7 @@ class DatelikeOps(DatetimeLikeArrayMixin): URL="https://docs.python.org/3/library/datetime.html" "#strftime-and-strptime-behavior" ) - def strftime(self, date_format: str, errors = 'raise') -> npt.NDArray[np.object_]: + def strftime(self, date_format: str, errors="raise") -> npt.NDArray[np.object_]: """ Convert to Index using specified date_format. @@ -1753,6 +1753,11 @@ def strftime(self, date_format: str, errors = 'raise') -> npt.NDArray[np.object_ ---------- date_format : str Date format string (e.g. "%%Y-%%m-%%d"). + errors : 'ignore', 'warn', default 'raise' + - 'raise' (default) does not catch any underlying error and raise them as is + - 'ignore' catches all errors and silently replace the output with None + - 'warn' has the same behaviour as 'ignore' and issue a StrftimeErrorWarning + warning message Returns ------- @@ -1776,7 +1781,9 @@ def strftime(self, date_format: str, errors = 'raise') -> npt.NDArray[np.object_ 'March 10, 2018, 09:00:02 AM'], dtype='object') """ - result = self._format_native_types(date_format=date_format, na_rep=np.nan, errors=errors) + result = self._format_native_types( + date_format=date_format, na_rep=np.nan, errors=errors + ) return result.astype(object, copy=False) From 2006da9e53e6130ef15ed658a6d7199973f93ff8 Mon Sep 17 00:00:00 2001 From: Stanley Ou Date: Fri, 26 Apr 2024 17:15:07 -0400 Subject: [PATCH 26/32] added imports to fix "warnings.warning" undefined --- pandas/_libs/tslib.pyx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index c4fed7c69fa06..3eca35b57b0ec 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -21,8 +21,12 @@ from numpy cimport ( ndarray, ) +import warnings + import numpy as np +from pandas.util._exceptions import find_stack_level + cnp.import_array() from pandas._libs.tslibs.dtypes cimport ( From 1b3c29b6d7bdaca93d0c137e062dfe21cd846670 Mon Sep 17 00:00:00 2001 From: Stanley Ou Date: Fri, 26 Apr 2024 17:30:56 -0400 Subject: [PATCH 27/32] Fixed linting issues --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/core/arrays/datetimes.py | 11 ++++++++--- pandas/core/indexes/datetimes.py | 2 +- pandas/tests/arrays/test_datetimes.py | 2 +- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 79f876f95d87d..3662c71432242 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -39,10 +39,10 @@ Other enhancements - Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`) - :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`) - :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`) +- :func:`DatetimeIndex.strftime` and :func:`DatetimeArray.strftime` now have an optional ``errors`` parameter, which can be set to ``'ignore'`` or ``'warn'`` to interpret bad datetimes as ``None``. The latter also throws a warning (:issue:`58178`) - :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`) - :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`) - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`) -- :func:`DatetimeIndex.strftime` and :func:`DatetimeArray.strftime` now have an optional ``errors`` paramater, which can be set to ``'ignore'`` or ``'warn'`` to intepret bad datetimes as ``None``. The latter also throws a warning (:issue:`58178`) .. --------------------------------------------------------------------------- .. _whatsnew_300.notable_bug_fixes: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 3ab8053ca185b..de358607bde3e 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -760,15 +760,20 @@ def astype(self, dtype, copy: bool = True): # Rendering Methods def _format_native_types( - self, *, na_rep: str | float = "NaT", date_format=None, errors = 'raise', **kwargs + self, *, na_rep: str | float = "NaT", date_format=None, errors="raise", **kwargs ) -> npt.NDArray[np.object_]: if date_format is None and self._is_dates_only: # Only dates and no timezone: provide a default format date_format = "%Y-%m-%d" return tslib.format_array_from_datetime( - self.asi8, tz=self.tz, format=date_format, na_rep=na_rep, reso=self._creso, - errors=errors) + self.asi8, + tz=self.tz, + format=date_format, + na_rep=na_rep, + reso=self._creso, + errors=errors, + ) # ----------------------------------------------------------------- # Comparison Methods diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index f1b171a5e57f7..21abc84b89fae 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -262,7 +262,7 @@ def _engine_type(self) -> type[libindex.DatetimeEngine]: # methods that dispatch to DatetimeArray and wrap result @doc(DatetimeArray.strftime) - def strftime(self, date_format, errors = 'raise') -> Index: + def strftime(self, date_format, errors="raise") -> Index: arr = self._data.strftime(date_format, errors) return Index(arr, name=self.name, dtype=object) diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 1281cff227787..8650be62ae7eb 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -161,7 +161,7 @@ def test_format_native_types(self, unit, dtype, dta_dti): # In this case we should get the same formatted values with our nano # version dti._data as we do with the non-nano dta dta, dti = dta_dti - + res = dta._format_native_types() exp = dti._data._format_native_types() tm.assert_numpy_array_equal(res, exp) From 97435d1f55eff1c77f38163906419345ea301e0c Mon Sep 17 00:00:00 2001 From: Stanley Ou Date: Fri, 26 Apr 2024 18:00:11 -0400 Subject: [PATCH 28/32] Added period to errors parameter descriptions. --- pandas/core/arrays/datetimelike.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index b6c382985e151..bb6ea43b5d0a2 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1764,10 +1764,10 @@ def strftime(self, date_format: str, errors="raise") -> npt.NDArray[np.object_]: date_format : str Date format string (e.g. "%%Y-%%m-%%d"). errors : 'ignore', 'warn', default 'raise' - - 'raise' (default) does not catch any underlying error and raise them as is - - 'ignore' catches all errors and silently replace the output with None + - 'raise' (default) does not catch any underlying error and raises them. + - 'ignore' catches all errors and silently replace the output with None. - 'warn' has the same behaviour as 'ignore' and issue a StrftimeErrorWarning - warning message + warning message. Returns ------- From b365efbb49d82c8997266456d397e262de2b3790 Mon Sep 17 00:00:00 2001 From: Stanley Ou Date: Fri, 26 Apr 2024 18:36:35 -0400 Subject: [PATCH 29/32] StrftimeErrorWarning summary now one line, added see also --- pandas/errors/__init__.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 4d9eff4671cdb..8d5a8a433b047 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -689,8 +689,11 @@ class CategoricalConversionWarning(Warning): class StrftimeErrorWarning(Warning): """ - Warning is raised by ``Timestamp.strftime`` when a ValueError or - NotImplementedError occurs, under the ``errors='warn'`` parameter value. + Raised by ``Series.dt.strftime`` when an error occurs, when errors is set to 'warn'. + + See Also + -------- + Series.dt.strftime Examples -------- From 782453491cacaed23748fe81f5bc9f57778bdc63 Mon Sep 17 00:00:00 2001 From: Stanley Ou Date: Fri, 26 Apr 2024 19:13:14 -0400 Subject: [PATCH 30/32] see also description --- pandas/errors/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 8d5a8a433b047..60af1b06d68b1 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -693,7 +693,7 @@ class StrftimeErrorWarning(Warning): See Also -------- - Series.dt.strftime + Series.dt.strftime : Convert to Index using specified date_format. Examples -------- From b644b864ab5100908779fb3df9e43e091f17adb1 Mon Sep 17 00:00:00 2001 From: Stanley Ou Date: Fri, 26 Apr 2024 19:44:30 -0400 Subject: [PATCH 31/32] added # doctest: +SKIP to StrftimeErrorWarning init --- pandas/errors/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 60af1b06d68b1..091482e05e90f 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -698,7 +698,7 @@ class StrftimeErrorWarning(Warning): Examples -------- >>> dta = pd.DatetimeIndex(np.array(["-0020-01-01", "2020-01-02"], "datetime64[s]")) - >>> dta.strftime("%y", errors="warn") + >>> dta.strftime("%y", errors="warn") # doctest: +SKIP """ From 281c243aee833f668afd48642588080a1227e4a6 Mon Sep 17 00:00:00 2001 From: Stanley Ou Date: Sat, 27 Apr 2024 01:57:05 -0400 Subject: [PATCH 32/32] strftime parameter description formatting --- pandas/core/arrays/datetimelike.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index bb6ea43b5d0a2..c10c03056fdd6 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1764,10 +1764,11 @@ def strftime(self, date_format: str, errors="raise") -> npt.NDArray[np.object_]: date_format : str Date format string (e.g. "%%Y-%%m-%%d"). errors : 'ignore', 'warn', default 'raise' + - 'raise' (default) does not catch any underlying error and raises them. - 'ignore' catches all errors and silently replace the output with None. - 'warn' has the same behaviour as 'ignore' and issue a StrftimeErrorWarning - warning message. + warning message. Returns -------