Skip to content

ENH: Rename DataFrame._append to _append_internal #62137

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3721,7 +3721,7 @@ def memory_usage(self, index: bool = True, deep: bool = False) -> Series:
index_memory_usage = self._constructor_sliced(
self.index.memory_usage(deep=deep), index=["Index"]
)
result = index_memory_usage._append(result)
result = index_memory_usage._append_internal(result)
return result

def transpose(
Expand Down Expand Up @@ -10882,7 +10882,7 @@ def infer(x):
# ----------------------------------------------------------------------
# Merging / joining methods

def _append(
def _append_internal(
self,
other,
ignore_index: bool = False,
Expand Down Expand Up @@ -11745,7 +11745,7 @@ def c(x):
idx_diff = result_index.difference(correl.index)

if len(idx_diff) > 0:
correl = correl._append(
correl = correl._append_internal(
Series([np.nan] * len(idx_diff), index=idx_diff)
)

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2323,7 +2323,7 @@ def _setitem_with_indexer_missing(self, indexer, value):
df = df.infer_objects()
self.obj._mgr = df._mgr
else:
self.obj._mgr = self.obj._append(value)._mgr
self.obj._mgr = self.obj._append_internal(value)._mgr

def _ensure_iterable_column_indexer(self, column_indexer):
"""
Expand Down
10 changes: 6 additions & 4 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,9 @@ def _add_margins(
if not values and isinstance(table, ABCSeries):
# If there are no values and the table is a series, then there is only
# one column in the data. Compute grand margin and return it.
return table._append(table._constructor({key: grand_margin[margins_name]}))
return table._append_internal(
table._constructor({key: grand_margin[margins_name]})
)

elif values:
marginal_result_set = _generate_marginal_results(
Expand Down Expand Up @@ -502,7 +504,7 @@ def _add_margins(
margin_dummy[cols] = margin_dummy[cols].apply(
maybe_downcast_to_dtype, args=(dtype,)
)
result = result._append(margin_dummy)
result = result._append_internal(margin_dummy)
result.index.names = row_names

return result
Expand Down Expand Up @@ -1185,7 +1187,7 @@ def _normalize(

elif normalize == "index":
index_margin = index_margin / index_margin.sum()
table = table._append(index_margin, ignore_index=True)
table = table._append_internal(index_margin, ignore_index=True)
table = table.fillna(0)
table.index = table_index

Expand All @@ -1194,7 +1196,7 @@ def _normalize(
index_margin = index_margin / index_margin.sum()
index_margin.loc[margins_name] = 1
table = concat([table, column_margin], axis=1)
table = table._append(index_margin, ignore_index=True)
table = table._append_internal(index_margin, ignore_index=True)

table = table.fillna(0)
table.index = table_index
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2991,7 +2991,7 @@ def searchsorted( # type: ignore[override]
# -------------------------------------------------------------------
# Combination

def _append(
def _append_internal(
self, to_append, ignore_index: bool = False, verify_integrity: bool = False
):
from pandas.core.reshape.concat import concat
Expand Down
60 changes: 30 additions & 30 deletions pandas/tests/reshape/concat/test_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,23 +28,23 @@ def test_append(self, sort, float_frame):
begin_frame = float_frame.reindex(begin_index)
end_frame = float_frame.reindex(end_index)

appended = begin_frame._append(end_frame)
appended = begin_frame._append_internal(end_frame)
tm.assert_almost_equal(appended["A"], float_frame["A"])

del end_frame["A"]
partial_appended = begin_frame._append(end_frame, sort=sort)
partial_appended = begin_frame._append_internal(end_frame, sort=sort)
assert "A" in partial_appended

partial_appended = end_frame._append(begin_frame, sort=sort)
partial_appended = end_frame._append_internal(begin_frame, sort=sort)
assert "A" in partial_appended

# mixed type handling
appended = mixed_frame[:5]._append(mixed_frame[5:])
appended = mixed_frame[:5]._append_internal(mixed_frame[5:])
tm.assert_frame_equal(appended, mixed_frame)

# what to test here
mixed_appended = mixed_frame[:5]._append(float_frame[5:], sort=sort)
mixed_appended2 = float_frame[:5]._append(mixed_frame[5:], sort=sort)
mixed_appended = mixed_frame[:5]._append_internal(float_frame[5:], sort=sort)
mixed_appended2 = float_frame[:5]._append_internal(mixed_frame[5:], sort=sort)

# all equal except 'foo' column
tm.assert_frame_equal(
Expand All @@ -55,18 +55,18 @@ def test_append(self, sort, float_frame):
def test_append_empty(self, float_frame):
empty = DataFrame()

appended = float_frame._append(empty)
appended = float_frame._append_internal(empty)
tm.assert_frame_equal(float_frame, appended)
assert appended is not float_frame

appended = empty._append(float_frame)
appended = empty._append_internal(float_frame)
tm.assert_frame_equal(float_frame, appended)
assert appended is not float_frame

def test_append_overlap_raises(self, float_frame):
msg = "Indexes have overlapping values"
with pytest.raises(ValueError, match=msg):
float_frame._append(float_frame, verify_integrity=True)
float_frame._append_internal(float_frame, verify_integrity=True)

def test_append_new_columns(self):
# see gh-6129: new columns
Expand All @@ -79,13 +79,13 @@ def test_append_new_columns(self):
"c": {"z": 7},
}
)
result = df._append(row)
result = df._append_internal(row)
tm.assert_frame_equal(result, expected)

def test_append_length0_frame(self, sort):
df = DataFrame(columns=["A", "B", "C"])
df3 = DataFrame(index=[0, 1], columns=["A", "B"])
df5 = df._append(df3, sort=sort)
df5 = df._append_internal(df3, sort=sort)

expected = DataFrame(index=[0, 1], columns=["A", "B", "C"])
tm.assert_frame_equal(df5, expected)
Expand All @@ -100,7 +100,7 @@ def test_append_records(self):
df1 = DataFrame(arr1)
df2 = DataFrame(arr2)

result = df1._append(df2, ignore_index=True)
result = df1._append_internal(df2, ignore_index=True)
expected = DataFrame(np.concatenate((arr1, arr2)))
tm.assert_frame_equal(result, expected)

Expand All @@ -109,7 +109,7 @@ def test_append_sorts(self, sort):
df1 = DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"])
df2 = DataFrame({"a": [1, 2], "c": [3, 4]}, index=[2, 3])

result = df1._append(df2, sort=sort)
result = df1._append_internal(df2, sort=sort)

# for None / True
expected = DataFrame(
Expand All @@ -133,7 +133,7 @@ def test_append_different_columns(self, sort):
a = df[:5].loc[:, ["bools", "ints", "floats"]]
b = df[5:].loc[:, ["strings", "ints", "floats"]]

appended = a._append(b, sort=sort)
appended = a._append_internal(b, sort=sort)
assert isna(appended["strings"][0:4]).all()
assert isna(appended["bools"][5:]).all()

Expand All @@ -145,12 +145,12 @@ def test_append_many(self, sort, float_frame):
float_frame[15:],
]

result = chunks[0]._append(chunks[1:])
result = chunks[0]._append_internal(chunks[1:])
tm.assert_frame_equal(result, float_frame)

chunks[-1] = chunks[-1].copy()
chunks[-1]["foo"] = "bar"
result = chunks[0]._append(chunks[1:], sort=sort)
result = chunks[0]._append_internal(chunks[1:], sort=sort)
tm.assert_frame_equal(result.loc[:, float_frame.columns], float_frame)
assert (result["foo"][15:] == "bar").all()
assert result["foo"][:15].isna().all()
Expand All @@ -162,7 +162,7 @@ def test_append_preserve_index_name(self):
df2 = DataFrame(data=[[1, 4, 7], [2, 5, 8], [3, 6, 9]], columns=["A", "B", "C"])
df2 = df2.set_index(["A"])

result = df1._append(df2)
result = df1._append_internal(df2)
assert result.index.name == "A"

indexes_can_append = [
Expand Down Expand Up @@ -193,7 +193,7 @@ def test_append_same_columns_type(self, index):
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=index)
ser_index = index[:2]
ser = Series([7, 8], index=ser_index, name=2)
result = df._append(ser)
result = df._append_internal(ser)
expected = DataFrame(
[[1, 2, 3.0], [4, 5, 6], [7, 8, np.nan]], index=[0, 1, 2], columns=index
)
Expand All @@ -208,7 +208,7 @@ def test_append_same_columns_type(self, index):
index = index[:2]
df = DataFrame([[1, 2], [4, 5]], columns=index)
ser = Series([7, 8, 9], index=ser_index, name=2)
result = df._append(ser)
result = df._append_internal(ser)
expected = DataFrame(
[[1, 2, np.nan], [4, 5, np.nan], [7, 8, 9]],
index=[0, 1, 2],
Expand All @@ -229,7 +229,7 @@ def test_append_different_columns_types(self, df_columns, series_index):
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns)
ser = Series([7, 8, 9], index=series_index, name=2)

result = df._append(ser)
result = df._append_internal(ser)
idx_diff = ser.index.difference(df_columns)
combined_columns = Index(df_columns.tolist()).append(idx_diff)
expected = DataFrame(
Expand Down Expand Up @@ -285,7 +285,7 @@ def test_append_dtype_coerce(self, sort):
axis=1,
sort=sort,
)
result = df1._append(df2, ignore_index=True, sort=sort)
result = df1._append_internal(df2, ignore_index=True, sort=sort)
if sort:
expected = expected[["end_time", "start_time"]]
else:
Expand All @@ -297,7 +297,7 @@ def test_append_missing_column_proper_upcast(self, sort):
df1 = DataFrame({"A": np.array([1, 2, 3, 4], dtype="i8")})
df2 = DataFrame({"B": np.array([True, False, True, False], dtype=bool)})

appended = df1._append(df2, ignore_index=True, sort=sort)
appended = df1._append_internal(df2, ignore_index=True, sort=sort)
assert appended["A"].dtype == "f8"
assert appended["B"].dtype == "O"

Expand All @@ -306,7 +306,7 @@ def test_append_empty_frame_to_series_with_dateutil_tz(self):
date = Timestamp("2018-10-24 07:30:00", tz=dateutil.tz.tzutc())
ser = Series({"a": 1.0, "b": 2.0, "date": date})
df = DataFrame(columns=["c", "d"])
result_a = df._append(ser, ignore_index=True)
result_a = df._append_internal(ser, ignore_index=True)
expected = DataFrame(
[[np.nan, np.nan, 1.0, 2.0, date]], columns=["c", "d", "a", "b", "date"]
)
Expand All @@ -320,30 +320,30 @@ def test_append_empty_frame_to_series_with_dateutil_tz(self):
)
expected["c"] = expected["c"].astype(object)
expected["d"] = expected["d"].astype(object)
result_b = result_a._append(ser, ignore_index=True)
result_b = result_a._append_internal(ser, ignore_index=True)
tm.assert_frame_equal(result_b, expected)

result = df._append([ser, ser], ignore_index=True)
result = df._append_internal([ser, ser], ignore_index=True)
tm.assert_frame_equal(result, expected)

def test_append_empty_tz_frame_with_datetime64ns(self):
# https://github.com/pandas-dev/pandas/issues/35460
df = DataFrame(columns=["a"]).astype("datetime64[ns, UTC]")

# pd.NaT gets inferred as tz-naive, so append result is tz-naive
result = df._append({"a": pd.NaT}, ignore_index=True)
result = df._append_internal({"a": pd.NaT}, ignore_index=True)
expected = DataFrame({"a": [pd.NaT]}, dtype=object)
tm.assert_frame_equal(result, expected)

# also test with typed value to append
df = DataFrame(columns=["a"]).astype("datetime64[ns, UTC]")
other = Series({"a": pd.NaT}, dtype="datetime64[ns]")
result = df._append(other, ignore_index=True)
result = df._append_internal(other, ignore_index=True)
tm.assert_frame_equal(result, expected)

# mismatched tz
other = Series({"a": pd.NaT}, dtype="datetime64[ns, US/Pacific]")
result = df._append(other, ignore_index=True)
result = df._append_internal(other, ignore_index=True)
expected = DataFrame({"a": [pd.NaT]}).astype(object)
tm.assert_frame_equal(result, expected)

Expand All @@ -356,7 +356,7 @@ def test_append_empty_frame_with_timedelta64ns_nat(self, dtype_str, val):
df = DataFrame(columns=["a"]).astype(dtype_str)

other = DataFrame({"a": [np.timedelta64(val, "ns")]})
result = df._append(other, ignore_index=True)
result = df._append_internal(other, ignore_index=True)

expected = other.astype(object)
tm.assert_frame_equal(result, expected)
Expand All @@ -370,7 +370,7 @@ def test_append_frame_with_timedelta64ns_nat(self, dtype_str, val):
df = DataFrame({"a": pd.array([1], dtype=dtype_str)})

other = DataFrame({"a": [np.timedelta64(val, "ns")]})
result = df._append(other, ignore_index=True)
result = df._append_internal(other, ignore_index=True)

expected = DataFrame({"a": [df.iloc[0, 0], other.iloc[0, 0]]}, dtype=object)
tm.assert_frame_equal(result, expected)
Loading
Loading