Skip to content

Commit

Permalink
Merge branch 'main' into ci/cache/daily
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke committed May 15, 2024
2 parents 17c4a96 + 0fc0336 commit 655e6ef
Show file tree
Hide file tree
Showing 26 changed files with 215 additions and 354 deletions.
32 changes: 7 additions & 25 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.Series.cat.rename_categories PR01,PR02" \
-i "pandas.Series.cat.reorder_categories PR01,PR02" \
-i "pandas.Series.cat.set_categories PR01,PR02" \
-i "pandas.Series.div PR07" \
-i "pandas.Series.dt.as_unit PR01,PR02" \
-i "pandas.Series.dt.ceil PR01,PR02" \
-i "pandas.Series.dt.components SA01" \
Expand All @@ -166,33 +165,23 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.Series.dt.tz_convert PR01,PR02" \
-i "pandas.Series.dt.tz_localize PR01,PR02" \
-i "pandas.Series.dt.unit GL08" \
-i "pandas.Series.eq PR07,SA01" \
-i "pandas.Series.floordiv PR07" \
-i "pandas.Series.ge PR07,SA01" \
-i "pandas.Series.gt PR07,SA01" \
-i "pandas.Series.eq SA01" \
-i "pandas.Series.ge SA01" \
-i "pandas.Series.gt SA01" \
-i "pandas.Series.kurt RT03,SA01" \
-i "pandas.Series.kurtosis RT03,SA01" \
-i "pandas.Series.le PR07,SA01" \
-i "pandas.Series.le SA01" \
-i "pandas.Series.list.__getitem__ SA01" \
-i "pandas.Series.list.flatten SA01" \
-i "pandas.Series.list.len SA01" \
-i "pandas.Series.lt PR07,SA01" \
-i "pandas.Series.ne PR07,SA01" \
-i "pandas.Series.lt SA01" \
-i "pandas.Series.ne SA01" \
-i "pandas.Series.pad PR01,SA01" \
-i "pandas.Series.plot PR02,SA01" \
-i "pandas.Series.pop RT03,SA01" \
-i "pandas.Series.pow PR07" \
-i "pandas.Series.prod RT03" \
-i "pandas.Series.product RT03" \
-i "pandas.Series.radd PR07" \
-i "pandas.Series.rdiv PR07" \
-i "pandas.Series.reorder_levels RT03,SA01" \
-i "pandas.Series.rfloordiv PR07" \
-i "pandas.Series.rmod PR07" \
-i "pandas.Series.rmul PR07" \
-i "pandas.Series.rpow PR07" \
-i "pandas.Series.rsub PR07" \
-i "pandas.Series.rtruediv PR07" \
-i "pandas.Series.sem PR01,RT03,SA01" \
-i "pandas.Series.skew RT03,SA01" \
-i "pandas.Series.sparse PR01,SA01" \
Expand Down Expand Up @@ -232,14 +221,12 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.Series.str.wrap RT03,SA01" \
-i "pandas.Series.str.zfill RT03" \
-i "pandas.Series.struct.dtypes SA01" \
-i "pandas.Series.sub PR07" \
-i "pandas.Series.sum RT03" \
-i "pandas.Series.swaplevel SA01" \
-i "pandas.Series.to_dict SA01" \
-i "pandas.Series.to_frame SA01" \
-i "pandas.Series.to_markdown SA01" \
-i "pandas.Series.to_string SA01" \
-i "pandas.Series.truediv PR07" \
-i "pandas.Series.update PR07,SA01" \
-i "pandas.Series.var PR01,RT03,SA01" \
-i "pandas.Timedelta PR07,SA01" \
Expand Down Expand Up @@ -290,7 +277,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.Timestamp.quarter SA01" \
-i "pandas.Timestamp.replace PR07,SA01" \
-i "pandas.Timestamp.resolution PR02,PR07,SA01" \
-i "pandas.Timestamp.round SA01" \
-i "pandas.Timestamp.second GL08" \
-i "pandas.Timestamp.strptime PR01,SA01" \
-i "pandas.Timestamp.time SA01" \
Expand All @@ -301,7 +287,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.Timestamp.to_julian_date SA01" \
-i "pandas.Timestamp.to_numpy PR01" \
-i "pandas.Timestamp.to_period PR01,SA01" \
-i "pandas.Timestamp.to_pydatetime PR01,SA01" \
-i "pandas.Timestamp.today SA01" \
-i "pandas.Timestamp.toordinal SA01" \
-i "pandas.Timestamp.tz SA01" \
Expand Down Expand Up @@ -548,7 +533,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.tseries.api.guess_datetime_format SA01" \
-i "pandas.tseries.offsets.BDay PR02,SA01" \
-i "pandas.tseries.offsets.BMonthBegin PR02" \
-i "pandas.tseries.offsets.BMonthEnd PR02" \
-i "pandas.tseries.offsets.BQuarterBegin PR02" \
-i "pandas.tseries.offsets.BQuarterBegin.freqstr SA01" \
-i "pandas.tseries.offsets.BQuarterBegin.is_on_offset GL08" \
Expand Down Expand Up @@ -609,7 +593,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.tseries.offsets.BusinessMonthBegin.nanos GL08" \
-i "pandas.tseries.offsets.BusinessMonthBegin.normalize GL08" \
-i "pandas.tseries.offsets.BusinessMonthBegin.rule_code GL08" \
-i "pandas.tseries.offsets.BusinessMonthEnd PR02" \
-i "pandas.tseries.offsets.BusinessMonthEnd.freqstr SA01" \
-i "pandas.tseries.offsets.BusinessMonthEnd.is_on_offset GL08" \
-i "pandas.tseries.offsets.BusinessMonthEnd.n GL08" \
Expand Down Expand Up @@ -754,7 +737,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.tseries.offsets.MonthBegin.nanos GL08" \
-i "pandas.tseries.offsets.MonthBegin.normalize GL08" \
-i "pandas.tseries.offsets.MonthBegin.rule_code GL08" \
-i "pandas.tseries.offsets.MonthEnd PR02" \
-i "pandas.tseries.offsets.MonthEnd.freqstr SA01" \
-i "pandas.tseries.offsets.MonthEnd.is_on_offset GL08" \
-i "pandas.tseries.offsets.MonthEnd.n GL08" \
Expand Down Expand Up @@ -799,7 +781,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.tseries.offsets.SemiMonthBegin.nanos GL08" \
-i "pandas.tseries.offsets.SemiMonthBegin.normalize GL08" \
-i "pandas.tseries.offsets.SemiMonthBegin.rule_code GL08" \
-i "pandas.tseries.offsets.SemiMonthEnd PR02,SA01" \
-i "pandas.tseries.offsets.SemiMonthEnd SA01" \
-i "pandas.tseries.offsets.SemiMonthEnd.day_of_month GL08" \
-i "pandas.tseries.offsets.SemiMonthEnd.freqstr SA01" \
-i "pandas.tseries.offsets.SemiMonthEnd.is_on_offset GL08" \
Expand Down
2 changes: 1 addition & 1 deletion doc/source/user_guide/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1711,6 +1711,6 @@ Why does assignment fail when using chained indexing?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

:ref:`Copy-on-Write <copy_on_write>` is the new default with pandas 3.0.
This means than chained indexing will never work.
This means that chained indexing will never work.
See :ref:`this section <copy_on_write_chained_assignment>`
for more context.
2 changes: 0 additions & 2 deletions doc/source/user_guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1511,7 +1511,6 @@ Currently, options unsupported by the C and pyarrow engines include:

* ``sep`` other than a single character (e.g. regex separators)
* ``skipfooter``
* ``sep=None`` with ``delim_whitespace=False``

Specifying any of the above options will produce a ``ParserWarning`` unless the
python engine is selected explicitly using ``engine='python'``.
Expand All @@ -1526,7 +1525,6 @@ Options that are unsupported by the pyarrow engine which are not covered by the
* ``memory_map``
* ``dialect``
* ``on_bad_lines``
* ``delim_whitespace``
* ``quoting``
* ``lineterminator``
* ``converters``
Expand Down
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,9 @@ Removal of prior version deprecations/changes
- Removed deprecated "method" and "limit" keywords from :meth:`Series.replace` and :meth:`DataFrame.replace` (:issue:`53492`)
- Removed extension test classes ``BaseNoReduceTests``, ``BaseNumericReduceTests``, ``BaseBooleanReduceTests`` (:issue:`54663`)
- Removed the "closed" and "normalize" keywords in :meth:`DatetimeIndex.__new__` (:issue:`52628`)
- Removed the deprecated ``delim_whitespace`` keyword in :func:`read_csv` and :func:`read_table`, use ``sep=r"\s+"`` instead (:issue:`55569`)
- Require :meth:`SparseDtype.fill_value` to be a valid value for the :meth:`SparseDtype.subtype` (:issue:`53043`)
- Stopped performing dtype inference when setting a :class:`Index` into a :class:`DataFrame` (:issue:`56102`)
- Stopped performing dtype inference with in :meth:`Index.insert` with object-dtype index; this often affects the index/columns that result when setting new entries into an empty :class:`Series` or :class:`DataFrame` (:issue:`51363`)
- Removed the "closed" and "unit" keywords in :meth:`TimedeltaIndex.__new__` (:issue:`52628`, :issue:`55499`)
- All arguments in :meth:`Index.sort_values` are now keyword only (:issue:`56493`)
Expand Down
39 changes: 38 additions & 1 deletion pandas/_libs/tslibs/nattype.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -882,7 +882,30 @@ class NaTType(_NaT):
"""
Convert a Timestamp object to a native Python datetime object.
If warn=True, issue a warning if nanoseconds is nonzero.
This method is useful for when you need to utilize a pandas Timestamp
object in contexts where native Python datetime objects are expected
or required. The conversion discards the nanoseconds component, and a
warning can be issued in such cases if desired.
Parameters
----------
warn : bool, default True
If True, issues a warning when the timestamp includes nonzero
nanoseconds, as these will be discarded during the conversion.
Returns
-------
datetime.datetime or NaT
Returns a datetime.datetime object representing the timestamp,
with year, month, day, hour, minute, second, and microsecond components.
If the timestamp is NaT (Not a Time), returns NaT.
See Also
--------
datetime.datetime : The standard Python datetime class that this method
returns.
Timestamp.timestamp : Convert a Timestamp object to POSIX timestamp.
Timestamp.to_datetime64 : Convert a Timestamp object to numpy.datetime64.
Examples
--------
Expand Down Expand Up @@ -947,6 +970,12 @@ class NaTType(_NaT):
"""
Round the Timestamp to the specified resolution.
This method rounds the given Timestamp down to a specified frequency
level. It is particularly useful in data analysis to normalize timestamps
to regular frequency intervals. For instance, rounding to the nearest
minute, hour, or day can help in time series comparisons or resampling
operations.
Parameters
----------
freq : str
Expand Down Expand Up @@ -981,6 +1010,14 @@ timedelta}, default 'raise'
------
ValueError if the freq cannot be converted
See Also
--------
datetime.round : Similar behavior in native Python datetime module.
Timestamp.floor : Round the Timestamp downward to the nearest multiple
of the specified frequency.
Timestamp.ceil : Round the Timestamp upward to the nearest multiple of
the specified frequency.
Notes
-----
If the Timestamp has a timezone, rounding will take place relative to the
Expand Down
6 changes: 3 additions & 3 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2939,7 +2939,7 @@ cdef class MonthEnd(MonthOffset):
MonthEnd goes to the next date which is an end of the month.
Parameters
Attributes
----------
n : int, default 1
The number of months represented.
Expand Down Expand Up @@ -3014,7 +3014,7 @@ cdef class BusinessMonthEnd(MonthOffset):
BusinessMonthEnd goes to the next date which is the last business day of the month.
Parameters
Attributes
----------
n : int, default 1
The number of months represented.
Expand Down Expand Up @@ -3222,7 +3222,7 @@ cdef class SemiMonthEnd(SemiMonthOffset):
"""
Two DateOffset's per month repeating on the last day of the month & day_of_month.
Parameters
Attributes
----------
n : int, default 1
The number of months represented.
Expand Down
39 changes: 38 additions & 1 deletion pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1196,7 +1196,30 @@ cdef class _Timestamp(ABCTimestamp):
"""
Convert a Timestamp object to a native Python datetime object.
If warn=True, issue a warning if nanoseconds is nonzero.
This method is useful for when you need to utilize a pandas Timestamp
object in contexts where native Python datetime objects are expected
or required. The conversion discards the nanoseconds component, and a
warning can be issued in such cases if desired.
Parameters
----------
warn : bool, default True
If True, issues a warning when the timestamp includes nonzero
nanoseconds, as these will be discarded during the conversion.
Returns
-------
datetime.datetime or NaT
Returns a datetime.datetime object representing the timestamp,
with year, month, day, hour, minute, second, and microsecond components.
If the timestamp is NaT (Not a Time), returns NaT.
See Also
--------
datetime.datetime : The standard Python datetime class that this method
returns.
Timestamp.timestamp : Convert a Timestamp object to POSIX timestamp.
Timestamp.to_datetime64 : Convert a Timestamp object to numpy.datetime64.
Examples
--------
Expand Down Expand Up @@ -2015,6 +2038,12 @@ class Timestamp(_Timestamp):
"""
Round the Timestamp to the specified resolution.
This method rounds the given Timestamp down to a specified frequency
level. It is particularly useful in data analysis to normalize timestamps
to regular frequency intervals. For instance, rounding to the nearest
minute, hour, or day can help in time series comparisons or resampling
operations.
Parameters
----------
freq : str
Expand Down Expand Up @@ -2049,6 +2078,14 @@ timedelta}, default 'raise'
------
ValueError if the freq cannot be converted
See Also
--------
datetime.round : Similar behavior in native Python datetime module.
Timestamp.floor : Round the Timestamp downward to the nearest multiple
of the specified frequency.
Timestamp.ceil : Round the Timestamp upward to the nearest multiple of
the specified frequency.
Notes
-----
If the Timestamp has a timezone, rounding will take place relative to the
Expand Down
14 changes: 2 additions & 12 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
ensure_object,
is_list_like,
is_object_dtype,
is_string_dtype,
pandas_dtype,
)
from pandas.core.dtypes.dtypes import NumpyEADtype
Expand Down Expand Up @@ -555,9 +554,7 @@ def sanitize_array(
# Avoid ending up with a NumpyExtensionArray
dtype = dtype.numpy_dtype

object_index = False
if isinstance(data, ABCIndex) and data.dtype == object and dtype is None:
object_index = True
data_was_index = isinstance(data, ABCIndex)

# extract ndarray or ExtensionArray, ensure we have no NumpyExtensionArray
data = extract_array(data, extract_numpy=True, extract_range=True)
Expand Down Expand Up @@ -610,15 +607,8 @@ def sanitize_array(

if dtype is None:
subarr = data
if data.dtype == object:
if data.dtype == object and not data_was_index:
subarr = maybe_infer_to_datetimelike(data)
if (
object_index
and using_pyarrow_string_dtype()
and is_string_dtype(subarr)
):
# Avoid inference when string option is set
subarr = data
elif data.dtype.kind == "U" and using_pyarrow_string_dtype():
from pandas.core.arrays.string_ import StringDtype

Expand Down
17 changes: 1 addition & 16 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5059,22 +5059,7 @@ def _sanitize_column(self, value) -> tuple[ArrayLike, BlockValuesRefs | None]:

if is_list_like(value):
com.require_length_match(value, self.index)
arr = sanitize_array(value, self.index, copy=True, allow_2d=True)
if (
isinstance(value, Index)
and value.dtype == "object"
and arr.dtype != value.dtype
): #
# TODO: Remove kludge in sanitize_array for string mode when enforcing
# this deprecation
warnings.warn(
"Setting an Index with object dtype into a DataFrame will stop "
"inferring another dtype in a future version. Cast the Index "
"explicitly before setting it into the DataFrame.",
FutureWarning,
stacklevel=find_stack_level(),
)
return arr, None
return sanitize_array(value, self.index, copy=True, allow_2d=True), None

@property
def _series(self):
Expand Down
1 change: 1 addition & 0 deletions pandas/core/ops/docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,7 @@ def make_flex_doc(op_name: str, typ: str) -> str:
Parameters
----------
other : Series or scalar value
The second operand in this operation.
level : int or name
Broadcast across a level, matching Index values on the
passed MultiIndex level.
Expand Down
Loading

0 comments on commit 655e6ef

Please sign in to comment.