Skip to content

Commit

Permalink
PERF: op(frame, series) when series is not EA (pandas-dev#33600)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and rhshadrach committed May 10, 2020
1 parent ef3f430 commit d455c21
Show file tree
Hide file tree
Showing 9 changed files with 73 additions and 55 deletions.
10 changes: 7 additions & 3 deletions asv_bench/benchmarks/arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def time_series_op_with_fill_value_no_nas(self):
self.ser.add(self.ser, fill_value=4)


class MixedFrameWithSeriesAxis0:
class MixedFrameWithSeriesAxis:
params = [
[
"eq",
Expand All @@ -78,7 +78,7 @@ class MixedFrameWithSeriesAxis0:
"gt",
"add",
"sub",
"div",
"truediv",
"floordiv",
"mul",
"pow",
Expand All @@ -87,15 +87,19 @@ class MixedFrameWithSeriesAxis0:
param_names = ["opname"]

def setup(self, opname):
arr = np.arange(10 ** 6).reshape(100, -1)
arr = np.arange(10 ** 6).reshape(1000, -1)
df = DataFrame(arr)
df["C"] = 1.0
self.df = df
self.ser = df[0]
self.row = df.iloc[0]

def time_frame_op_with_series_axis0(self, opname):
getattr(self.df, opname)(self.ser, axis=0)

def time_frame_op_with_series_axis1(self, opname):
getattr(operator, opname)(self.df, self.ser)


class Ops:

Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/stat_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ class FrameOps:
param_names = ["op", "dtype", "axis"]

def setup(self, op, dtype, axis):
if op == "mad" and dtype == "Int64" and axis == 1:
# GH-33036
if op == "mad" and dtype == "Int64":
# GH-33036, GH#33600
raise NotImplementedError
values = np.random.randn(100000, 4)
if dtype == "Int64":
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ Performance improvements
- Performance improvement in :class:`Timedelta` constructor (:issue:`30543`)
- Performance improvement in :class:`Timestamp` constructor (:issue:`30543`)
- Performance improvement in flex arithmetic ops between :class:`DataFrame` and :class:`Series` with ``axis=0`` (:issue:`31296`)
- Performance improvement in arithmetic ops between :class:`DataFrame` and :class:`Series` with ``axis=1`` (:issue:`33600`)
- The internal index method :meth:`~Index._shallow_copy` now copies cached attributes over to the new index,
avoiding creating these again on the new index. This can speed up many operations that depend on creating copies of
existing indexes (:issue:`28584`, :issue:`32640`, :issue:`32669`)
Expand Down
21 changes: 16 additions & 5 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,11 +518,22 @@ def __truediv__(self, other):
return self._data / other

elif is_object_dtype(other.dtype):
# Note: we do not do type inference on the result, so either
# an object array or numeric-dtyped (if numpy does inference)
# will be returned. GH#23829
result = [self[n] / other[n] for n in range(len(self))]
result = np.array(result)
# We operate on raveled arrays to avoid problems in inference
# on NaT
srav = self.ravel()
orav = other.ravel()
result = [srav[n] / orav[n] for n in range(len(srav))]
result = np.array(result).reshape(self.shape)

# We need to do dtype inference in order to keep DataFrame ops
# behavior consistent with Series behavior
inferred = lib.infer_dtype(result)
if inferred == "timedelta":
flat = result.ravel()
result = type(self)._from_sequence(flat).reshape(result.shape)
elif inferred == "floating":
result = result.astype(float)

return result

else:
Expand Down
10 changes: 10 additions & 0 deletions pandas/core/ops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,16 @@ def _combine_series_frame(left, right, func, axis: int, str_rep: str):
new_data = dispatch_to_series(left, right, func)

else:
rvalues = right._values
if isinstance(rvalues, np.ndarray):
# We can operate block-wise
rvalues = rvalues.reshape(1, -1)
rvalues = np.broadcast_to(rvalues, left.shape)

array_op = get_array_op(func, str_rep=str_rep)
bm = left._mgr.apply(array_op, right=rvalues.T, align_keys=["right"])
return type(left)(bm)

new_data = dispatch_to_series(left, right, func, axis="columns")

return left._construct_result(new_data)
Expand Down
10 changes: 8 additions & 2 deletions pandas/tests/arithmetic/test_datetime64.py
Original file line number Diff line number Diff line change
Expand Up @@ -1473,7 +1473,10 @@ def test_dt64arr_add_sub_offset_ndarray(self, tz_naive_fixture, box_with_array):

other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)])

warn = None if box_with_array is pd.DataFrame else PerformanceWarning
warn = PerformanceWarning
if box_with_array is pd.DataFrame and tz is not None:
warn = None

with tm.assert_produces_warning(warn):
res = dtarr + other
expected = DatetimeIndex(
Expand Down Expand Up @@ -2438,7 +2441,10 @@ def test_dti_addsub_object_arraylike(
expected = pd.DatetimeIndex(["2017-01-31", "2017-01-06"], tz=tz_naive_fixture)
expected = tm.box_expected(expected, xbox)

warn = None if box_with_array is pd.DataFrame else PerformanceWarning
warn = PerformanceWarning
if box_with_array is pd.DataFrame and tz is not None:
warn = None

with tm.assert_produces_warning(warn):
result = dtarr + other
tm.assert_equal(result, expected)
Expand Down
36 changes: 14 additions & 22 deletions pandas/tests/arithmetic/test_timedelta64.py
Original file line number Diff line number Diff line change
Expand Up @@ -1327,14 +1327,11 @@ def test_td64arr_add_offset_index(self, names, box):
tdi = tm.box_expected(tdi, box)
expected = tm.box_expected(expected, box)

# The DataFrame operation is transposed and so operates as separate
# scalar operations, which do not issue a PerformanceWarning
warn = PerformanceWarning if box is not pd.DataFrame else None
with tm.assert_produces_warning(warn):
with tm.assert_produces_warning(PerformanceWarning):
res = tdi + other
tm.assert_equal(res, expected)

with tm.assert_produces_warning(warn):
with tm.assert_produces_warning(PerformanceWarning):
res2 = other + tdi
tm.assert_equal(res2, expected)

Expand All @@ -1353,14 +1350,11 @@ def test_td64arr_add_offset_array(self, box_with_array):
tdi = tm.box_expected(tdi, box)
expected = tm.box_expected(expected, box)

# The DataFrame operation is transposed and so operates as separate
# scalar operations, which do not issue a PerformanceWarning
warn = PerformanceWarning if box is not pd.DataFrame else None
with tm.assert_produces_warning(warn):
with tm.assert_produces_warning(PerformanceWarning):
res = tdi + other
tm.assert_equal(res, expected)

with tm.assert_produces_warning(warn):
with tm.assert_produces_warning(PerformanceWarning):
res2 = other + tdi
tm.assert_equal(res2, expected)

Expand Down Expand Up @@ -1389,10 +1383,7 @@ def test_td64arr_sub_offset_index(self, names, box_with_array):
tdi = tm.box_expected(tdi, box)
expected = tm.box_expected(expected, xbox)

# The DataFrame operation is transposed and so operates as separate
# scalar operations, which do not issue a PerformanceWarning
warn = PerformanceWarning if box is not pd.DataFrame else None
with tm.assert_produces_warning(warn):
with tm.assert_produces_warning(PerformanceWarning):
res = tdi - other
tm.assert_equal(res, expected)

Expand All @@ -1408,10 +1399,7 @@ def test_td64arr_sub_offset_array(self, box_with_array):
tdi = tm.box_expected(tdi, box_with_array)
expected = tm.box_expected(expected, box_with_array)

# The DataFrame operation is transposed and so operates as separate
# scalar operations, which do not issue a PerformanceWarning
warn = None if box_with_array is pd.DataFrame else PerformanceWarning
with tm.assert_produces_warning(warn):
with tm.assert_produces_warning(PerformanceWarning):
res = tdi - other
tm.assert_equal(res, expected)

Expand Down Expand Up @@ -1482,28 +1470,31 @@ def test_td64arr_add_sub_object_array(self, box_with_array):
[pd.Timedelta(days=1), pd.offsets.Day(2), pd.Timestamp("2000-01-04")]
)

warn = PerformanceWarning if box_with_array is not pd.DataFrame else None
with tm.assert_produces_warning(warn):
with tm.assert_produces_warning(PerformanceWarning):
result = tdarr + other

expected = pd.Index(
[pd.Timedelta(days=2), pd.Timedelta(days=4), pd.Timestamp("2000-01-07")]
)
expected = tm.box_expected(expected, box_with_array)
if box_with_array is pd.DataFrame:
expected = expected.astype(object)
tm.assert_equal(result, expected)

msg = "unsupported operand type|cannot subtract a datelike"
with pytest.raises(TypeError, match=msg):
with tm.assert_produces_warning(warn):
with tm.assert_produces_warning(PerformanceWarning):
tdarr - other

with tm.assert_produces_warning(warn):
with tm.assert_produces_warning(PerformanceWarning):
result = other - tdarr

expected = pd.Index(
[pd.Timedelta(0), pd.Timedelta(0), pd.Timestamp("2000-01-01")]
)
expected = tm.box_expected(expected, box_with_array)
if box_with_array is pd.DataFrame:
expected = expected.astype(object)
tm.assert_equal(result, expected)


Expand Down Expand Up @@ -2043,6 +2034,7 @@ def test_td64arr_div_numeric_array(self, box_with_array, vector, any_real_dtype)
expected = [tdser.iloc[0, n] / vector[n] for n in range(len(vector))]
else:
expected = [tdser[n] / vector[n] for n in range(len(tdser))]
expected = pd.Index(expected) # do dtype inference
expected = tm.box_expected(expected, xbox)
tm.assert_equal(result, expected)

Expand Down
9 changes: 1 addition & 8 deletions pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,13 +613,6 @@ def test_df_arith_2d_array_rowlike_broadcasts(self, all_arithmetic_operators):

expected = pd.DataFrame(exvals, columns=df.columns, index=df.index)

if opname in ["__rmod__", "__rfloordiv__"]:
# exvals will have dtypes [f8, i8, i8] so expected will be
# all-f8, but the DataFrame operation will return mixed dtypes
# use exvals[-1].dtype instead of "i8" for compat with 32-bit
# systems/pythons
expected[False] = expected[False].astype(exvals[-1].dtype)

result = getattr(df, opname)(rowlike)
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -1042,7 +1035,7 @@ def test_combine_series(

# no upcast needed
added = mixed_float_frame + series
_check_mixed_float(added)
assert np.all(added.dtypes == series.dtype)

# vs mix (upcast) as needed
added = mixed_float_frame + series.astype("float32")
Expand Down
27 changes: 14 additions & 13 deletions pandas/tests/series/test_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,23 +266,24 @@ def test_scalar_na_logical_ops_corners(self):
result = s & list(s)
tm.assert_series_equal(result, expected)

def test_scalar_na_logical_ops_corners_aligns(self):
s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)])
s[::2] = np.nan
d = DataFrame({"A": s})
# TODO: Fix this exception - needs to be fixed! (see GH5035)
# (previously this was a TypeError because series returned
# NotImplemented

# this is an alignment issue; these are equivalent
# https://github.com/pandas-dev/pandas/issues/5284
expected = DataFrame(False, index=range(9), columns=["A"] + list(range(9)))

with pytest.raises(TypeError):
d.__and__(s, axis="columns")
with pytest.raises(TypeError):
d.__and__(s, axis=1)
result = d.__and__(s, axis="columns")
tm.assert_frame_equal(result, expected)

with pytest.raises(TypeError):
s & d
with pytest.raises(TypeError):
d & s
result = d.__and__(s, axis=1)
tm.assert_frame_equal(result, expected)

result = s & d
tm.assert_frame_equal(result, expected)

result = d & s
tm.assert_frame_equal(result, expected)

expected = (s & s).to_frame("A")
result = d.__and__(s, axis="index")
Expand Down

0 comments on commit d455c21

Please sign in to comment.