From b8f14c4a6815a66ac701ba755b607f7cefe34b2d Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Tue, 27 Jun 2023 19:26:34 +0100 Subject: [PATCH] Backport PR #53795 on branch 2.0.x (BUG: fixes weekday for dates before 1752) (#53884) Backport PR #53795: BUG: fixes weekday for dates before 1752 Co-authored-by: Conrad Mcgee Stocks --- doc/source/whatsnew/v2.0.3.rst | 1 + pandas/_libs/tslibs/ccalendar.pyx | 47 ++++++++++++++----- .../tests/scalar/timestamp/test_timestamp.py | 37 +++++++++++++++ 3 files changed, 72 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v2.0.3.rst b/doc/source/whatsnew/v2.0.3.rst index a1acc9d92c694..8cfced2ee385a 100644 --- a/doc/source/whatsnew/v2.0.3.rst +++ b/doc/source/whatsnew/v2.0.3.rst @@ -13,6 +13,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ +- Bug in :meth:`Timestamp.weekday`` was returning incorrect results before ``'0000-02-29'`` (:issue:`53738`) - Fixed performance regression in merging on datetime-like columns (:issue:`53231`) - Fixed regression when :meth:`DataFrame.to_string` creates extra space for string dtypes (:issue:`52690`) - diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index 19c732e2a313b..8b5b079649c2e 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -2,7 +2,6 @@ """ Cython implementations of functions resembling the stdlib calendar module """ - cimport cython from numpy cimport ( int32_t, @@ -19,7 +18,7 @@ cdef int32_t* days_per_month_array = [ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] -cdef int* sakamoto_arr = [0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4] +cdef int* em = [0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334] # The first 13 entries give the month days elapsed as of the first of month N # (or the total number of days in the year for N=13) in non-leap years. @@ -76,11 +75,22 @@ cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil: @cython.wraparound(False) @cython.boundscheck(False) -@cython.cdivision -cdef int dayofweek(int y, int m, int d) nogil: +@cython.cdivision(True) +cdef long quot(long a , long b) noexcept nogil: + cdef long x + x = a/b + if (a < 0): + x -= (a % b != 0) + return x + + +@cython.wraparound(False) +@cython.boundscheck(False) +@cython.cdivision(True) +cdef int dayofweek(int y, int m, int d) noexcept nogil: """ Find the day of week for the date described by the Y/M/D triple y, m, d - using Sakamoto's method, from wikipedia. + using Gauss' method, from wikipedia. 0 represents Monday. See [1]_. @@ -103,16 +113,27 @@ cdef int dayofweek(int y, int m, int d) nogil: [1] https://docs.python.org/3/library/calendar.html#calendar.weekday [2] https://en.wikipedia.org/wiki/\ - Determination_of_the_day_of_the_week#Sakamoto.27s_methods + Determination_of_the_day_of_the_week#Gauss's_algorithm """ + # Note: this particular implementation comes from + # http://berndt-schwerdtfeger.de/wp-content/uploads/pdf/cal.pdf cdef: - int day - - y -= m < 3 - day = (y + y / 4 - y / 100 + y / 400 + sakamoto_arr[m - 1] + d) % 7 - # convert to python day - return (day + 6) % 7 - + long c + int g + int f + int e + + if (m < 3): + y -= 1 + + c = quot(y, 100) + g = y - c * 100 + f = 5 * (c - quot(c, 4) * 4) + e = em[m] + + if (m > 2): + e -= 1 + return (-1 + d + e + f + g + g/4) % 7 cdef bint is_leapyear(int64_t year) nogil: """ diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index afb4dd7422114..fb7868fd63fc3 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -14,6 +14,10 @@ tzlocal, tzutc, ) +from hypothesis import ( + given, + strategies as st, +) import numpy as np import pytest import pytz @@ -223,6 +227,39 @@ def test_resolution(self): assert dt.as_unit("ms").resolution == Timedelta(milliseconds=1) assert dt.as_unit("s").resolution == Timedelta(seconds=1) + @pytest.mark.parametrize( + "date_string, expected", + [ + ("0000-2-29", 1), + ("0000-3-1", 2), + ("1582-10-14", 3), + ("-0040-1-1", 4), + ("2023-06-18", 6), + ], + ) + def test_dow_historic(self, date_string, expected): + # GH 53738 + ts = Timestamp(date_string) + dow = ts.weekday() + assert dow == expected + + @given( + ts=st.datetimes(), + sign=st.sampled_from(["-", ""]), + ) + def test_dow_parametric(self, ts, sign): + # GH 53738 + ts = ( + f"{sign}{str(ts.year).zfill(4)}" + f"-{str(ts.month).zfill(2)}" + f"-{str(ts.day).zfill(2)}" + ) + result = Timestamp(ts).weekday() + expected = ( + (np.datetime64(ts) - np.datetime64("1970-01-01")).astype("int64") - 4 + ) % 7 + assert result == expected + class TestTimestamp: def test_default_to_stdlib_utc(self):