From 10f998d374bbe90373f699adba6517cd71505afc Mon Sep 17 00:00:00 2001 From: kantologist Date: Sat, 10 Mar 2018 17:57:21 +0100 Subject: [PATCH 1/8] DOC: Improved the docstring of DataFrame.update() --- pandas/core/frame.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a66d00fff9714..54f78f1cc6ddd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4207,17 +4207,23 @@ def update(self, other, join='left', overwrite=True, filter_func=None, raise_conflict=False): """ Modify DataFrame in place using non-NA values from passed - DataFrame. Aligns on indices + DataFrame. + + Aligns on indices. Parameters ---------- other : DataFrame, or object coercible into a DataFrame + Index should be similar to one of the columns in this one. If a + Series is passed, its name attribute must be set, and that will be + used as the column name in the resulting joined DataFrame. join : {'left'}, default 'left' + Indicates which column values overwrite. overwrite : boolean, default True - If True then overwrite values for common keys in the calling frame + If True then overwrite values for common keys in the calling frame. filter_func : callable(1d-array) -> 1d-array, default None Can choose to replace values other than NA. Return True for values - that should be updated + that should be updated. raise_conflict : boolean If True, will raise an error if the DataFrame and other both contain data in the same place. @@ -4276,6 +4282,14 @@ def update(self, other, join='left', overwrite=True, filter_func=None, 0 1 4.0 1 2 500.0 2 3 6.0 + + See also + -------- + DataFrame.merge : For column(s)-on-columns(s) operations + + Returns + ------- + updated : DataFrame """ import pandas.core.computation.expressions as expressions # TODO: Support other joins From 714ead4fc25d387c087a8dabf0880c55979b2ad4 Mon Sep 17 00:00:00 2001 From: kantologist Date: Sun, 11 Mar 2018 00:27:04 +0100 Subject: [PATCH 2/8] DOC: Improved the docstring of DataFrame.update() --- pandas/core/frame.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 54f78f1cc6ddd..aa7319db25342 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4206,8 +4206,7 @@ def combiner(x, y, needs_i8_conversion=False): def update(self, other, join='left', overwrite=True, filter_func=None, raise_conflict=False): """ - Modify DataFrame in place using non-NA values from passed - DataFrame. + Modify in place using non-NA values from other DataFrame. Aligns on indices. @@ -4228,6 +4227,14 @@ def update(self, other, join='left', overwrite=True, filter_func=None, If True, will raise an error if the DataFrame and other both contain data in the same place. + Returns + ------- + updated : DataFrame + + See Also + -------- + DataFrame.merge : For column(s)-on-columns(s) operations + Examples -------- >>> df = pd.DataFrame({'A': [1, 2, 3], @@ -4282,14 +4289,6 @@ def update(self, other, join='left', overwrite=True, filter_func=None, 0 1 4.0 1 2 500.0 2 3 6.0 - - See also - -------- - DataFrame.merge : For column(s)-on-columns(s) operations - - Returns - ------- - updated : DataFrame """ import pandas.core.computation.expressions as expressions # TODO: Support other joins From ee07bc68704068e2ff4ec662f55bc85438ff4579 Mon Sep 17 00:00:00 2001 From: kantologist Date: Sun, 11 Mar 2018 23:26:18 +0100 Subject: [PATCH 3/8] DOC: Improved the docstring of DataFrame.join() --- pandas/core/frame.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index aa7319db25342..2c3f6dfe0ced0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4217,7 +4217,8 @@ def update(self, other, join='left', overwrite=True, filter_func=None, Series is passed, its name attribute must be set, and that will be used as the column name in the resulting joined DataFrame. join : {'left'}, default 'left' - Indicates which column values overwrite. + Only left join is implemented, + keeping the index and columns of the original object. overwrite : boolean, default True If True then overwrite values for common keys in the calling frame. filter_func : callable(1d-array) -> 1d-array, default None @@ -4248,6 +4249,8 @@ def update(self, other, join='left', overwrite=True, filter_func=None, 1 2 5 2 3 6 + The DataFrame's length does not increase as a result of the update. + >>> df = pd.DataFrame({'A': ['a', 'b', 'c'], ... 'B': ['x', 'y', 'z']}) >>> new_df = pd.DataFrame({'B': ['d', 'e', 'f', 'g', 'h', 'i']}) @@ -4258,6 +4261,8 @@ def update(self, other, join='left', overwrite=True, filter_func=None, 1 b e 2 c f + For Series, it's name attribute must be set. + >>> df = pd.DataFrame({'A': ['a', 'b', 'c'], ... 'B': ['x', 'y', 'z']}) >>> new_column = pd.Series(['d', 'e'], name='B', index=[0, 2]) From 2ce79821f913045d94b3c9e8a418c300d601a42d Mon Sep 17 00:00:00 2001 From: kantologist Date: Sun, 11 Mar 2018 23:33:35 +0100 Subject: [PATCH 4/8] DOC: Improved the docstring of DataFrame.join() --- pandas/core/frame.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c85460754c51c..df4e9d75dd575 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4308,6 +4308,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None, keeping the index and columns of the original object. overwrite : boolean, default True If True then overwrite values for common keys in the calling frame. + If False then only NA values in the calling object are updated. filter_func : callable(1d-array) -> 1d-array, default None Can choose to replace values other than NA. Return True for values that should be updated. From ff0b3fafe74d05f5713c5d586f4bc3900a3d961f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 12 Mar 2018 09:46:31 -0500 Subject: [PATCH 5/8] Update returns. Added Raises Reformatted join. Added dict.update to see also --- pandas/core/frame.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index df4e9d75dd575..64998c538cf4d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4293,9 +4293,9 @@ def combiner(x, y, needs_i8_conversion=False): def update(self, other, join='left', overwrite=True, filter_func=None, raise_conflict=False): """ - Modify in place using non-NA values from other DataFrame. + Modify in place using non-NA values from another DataFrame. - Aligns on indices. + Aligns on indices. There is no return value. Parameters ---------- @@ -4304,25 +4304,30 @@ def update(self, other, join='left', overwrite=True, filter_func=None, Series is passed, its name attribute must be set, and that will be used as the column name in the resulting joined DataFrame. join : {'left'}, default 'left' - Only left join is implemented, - keeping the index and columns of the original object. + Only left join is implemented, keeping the index and columns of the + original object. overwrite : boolean, default True - If True then overwrite values for common keys in the calling frame. - If False then only NA values in the calling object are updated. + How to handle non-NA values for overlapping keys. + + * True : overwrite values in `self` with values from `other`. + * False : only update values that are NA in `self`. + filter_func : callable(1d-array) -> 1d-array, default None Can choose to replace values other than NA. Return True for values that should be updated. raise_conflict : boolean - If True, will raise an error if the DataFrame and other both - contain data in the same place. + If True, will raise a `ValueError` if the DataFrame and `other` + both contain non-NA data in the same place. - Returns - ------- - updated : DataFrame + Raises + ------ + ValueError + When `raise_conflict` is True and there's overlapping non-NA data. See Also -------- - DataFrame.merge : For column(s)-on-columns(s) operations + dict.update : Similar method for dictionaries. + DataFrame.merge : For column(s)-on-columns(s) operations. Examples -------- From 88386312b10a8d24a5df3f7604cb137fd068db58 Mon Sep 17 00:00:00 2001 From: kantologist Date: Mon, 12 Mar 2018 16:33:57 +0100 Subject: [PATCH 6/8] DOC: Improved the docstring of DataFrame.update() --- pandas/core/frame.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5fb94000d1aee..1b8adfe11b270 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4321,9 +4321,9 @@ def combiner(x, y, needs_i8_conversion=False): def update(self, other, join='left', overwrite=True, filter_func=None, raise_conflict=False): """ - Modify in place using non-NA values from other DataFrame. + Modify in place using non-NA values from another DataFrame. - Aligns on indices. + Aligns on indices. There is no return value. Parameters ---------- @@ -4335,22 +4335,27 @@ def update(self, other, join='left', overwrite=True, filter_func=None, Only left join is implemented, keeping the index and columns of the original object. overwrite : boolean, default True - If True then overwrite values for common keys in the calling frame. - If False then only NA values in the calling object are updated. + How to handle non-NA values for overlapping keys. + + * True : overwrite values in `self` with values from `other`. + * False : only update values that are NA in `self`. + filter_func : callable(1d-array) -> 1d-array, default None Can choose to replace values other than NA. Return True for values that should be updated. raise_conflict : boolean - If True, will raise an error if the DataFrame and other both - contain data in the same place. + If True, will raise a `ValueError` if the DataFrame and `other` both + contain non-NA data in the same place. - Returns - ------- - updated : DataFrame + Raises + ------ + ValueError + When `raise conflict` is True and there's overlapping non-NA data. See Also -------- - DataFrame.merge : For column(s)-on-columns(s) operations + dict.update : Similar method for dictionaries. + DataFrame.merge : For column(s)-on-columns(s) operations. Examples -------- From 4c2e352d0227342a35c96e003e00eb47d2a9f6fa Mon Sep 17 00:00:00 2001 From: kantologist Date: Tue, 13 Mar 2018 14:52:01 +0100 Subject: [PATCH 7/8] DOC: Improve the docstring of DataFrame.update() --- pandas/core/frame.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 42ac16acd76d7..c8386c81dc798 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4328,8 +4328,9 @@ def update(self, other, join='left', overwrite=True, filter_func=None, Parameters ---------- other : DataFrame, or object coercible into a DataFrame - Index should be similar to one of the columns in this one. If a - Series is passed, its name attribute must be set, and that will be + Should have at least one matching index/column label + with the original DataFrame. If a Series is passed, + its name attribute must be set, and that will be used as the column name in the resulting joined DataFrame. join : {'left'}, default 'left' Only left join is implemented, keeping the index and columns of the @@ -4337,10 +4338,13 @@ def update(self, other, join='left', overwrite=True, filter_func=None, overwrite : boolean, default True How to handle non-NA values for overlapping keys. - * True : overwrite values in `self` with values from `other`. - * False : only update values that are NA in `self`. + * True : overwrite original DataFrame's values + with values from `other`. + * False : only update values that are NA in + the original DataFrame. - filter_func : callable(1d-array) -> 1d-array, default None + filter_func : callable(1d-array) -> 1d-array,\ + "default None" -> "optional" Can choose to replace values other than NA. Return True for values that should be updated. raise_conflict : boolean @@ -4370,7 +4374,8 @@ def update(self, other, join='left', overwrite=True, filter_func=None, 1 2 5 2 3 6 - The DataFrame's length does not increase as a result of the update. + The DataFrame's length does not increase as a result of the update, + only values at matching index/column labels are updated. >>> df = pd.DataFrame({'A': ['a', 'b', 'c'], ... 'B': ['x', 'y', 'z']}) From ce18a2607ddf17455e09ffd2500c8da3a5648e04 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 13 Mar 2018 22:40:09 +0100 Subject: [PATCH 8/8] Update frame.py --- pandas/core/frame.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4bf56b0cbf0ac..8af56e5e1070d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4331,24 +4331,23 @@ def update(self, other, join='left', overwrite=True, filter_func=None, Should have at least one matching index/column label with the original DataFrame. If a Series is passed, its name attribute must be set, and that will be - used as the column name in the resulting joined DataFrame. + used as the column name to align with the original DataFrame. join : {'left'}, default 'left' Only left join is implemented, keeping the index and columns of the original object. - overwrite : boolean, default True - How to handle non-NA values for overlapping keys. + overwrite : bool, default True + How to handle non-NA values for overlapping keys: - * True : overwrite original DataFrame's values - with values from `other`. - * False : only update values that are NA in - the original DataFrame. + * True: overwrite original DataFrame's values + with values from `other`. + * False: only update values that are NA in + the original DataFrame. - filter_func : callable(1d-array) -> 1d-array,\ - "default None" -> "optional" + filter_func : callable(1d-array) -> boolean 1d-array, optional Can choose to replace values other than NA. Return True for values that should be updated. - raise_conflict : boolean - If True, will raise a `ValueError` if the DataFrame and `other` + raise_conflict : bool, default False + If True, will raise a ValueError if the DataFrame and `other` both contain non-NA data in the same place. Raises @@ -4408,7 +4407,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None, 1 b d 2 c e - If ``other`` contains NaNs the corresponding values are not updated + If `other` contains NaNs the corresponding values are not updated in the original dataframe. >>> df = pd.DataFrame({'A': [1, 2, 3],