In [1]:
%run nb_helpers.py
from datar.all import *

nb_header(
    rows_insert, 
    rows_update, 
    rows_patch, 
    rows_upsert, 
    rows_delete, 
    book='rows'
)

### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ rows_insert</div>

##### Adds new rows to a data frame

Argument `in_place` not supported, as we always do data frames here.  

##### Args:
&emsp;&emsp;`x`: The seed data frame  
&emsp;&emsp;`y`: The data frame with rows to be inserted into `x`.  
&emsp;&emsp;&emsp;&emsp;- Key values in `y` must not occur in `x`

&emsp;&emsp;&emsp;&emsp;- `y` must have the same or a subset columns of `x`

&emsp;&emsp;`by`: A string or a list of strings giving the key columns.  
&emsp;&emsp;&emsp;&emsp;The key values must uniquely identify each row  
&emsp;&emsp;&emsp;&emsp;(i.e. each combination of key values occurs at most once),  
&emsp;&emsp;&emsp;&emsp;and the key columns must exist in both x and y.  
&emsp;&emsp;&emsp;&emsp;By default, we use the first column in y, since the first column  
&emsp;&emsp;&emsp;&emsp;is a reasonable place to put an identifier variable.  

&emsp;&emsp;`copy`: If `False`, do not copy data unnecessarily.  
&emsp;&emsp;&emsp;&emsp;Original API does not support this. This argument will be  
&emsp;&emsp;&emsp;&emsp;passed by to `pandas.concat()` as `copy` argument.  

##### Returns:
&emsp;&emsp;A data frame with `y` inserted into `x`  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ rows_update</div>

##### Modifies existing rows in a data frame

See Also:  
&emsp;&emsp;[`rows_insert`](datar.dplyr.rows.rows_insert)  

##### Args:
&emsp;&emsp;`x`: The seed data frame  
&emsp;&emsp;`y`: The data frame with rows to be inserted into `x`.  
&emsp;&emsp;&emsp;&emsp;- Key values in `y` must not occur in `x`

&emsp;&emsp;&emsp;&emsp;- `y` must have the same or a subset columns of `x`

&emsp;&emsp;`by`: A string or a list of strings giving the key columns.  
&emsp;&emsp;&emsp;&emsp;The key values must uniquely identify each row  
&emsp;&emsp;&emsp;&emsp;(i.e. each combination of key values occurs at most once),  
&emsp;&emsp;&emsp;&emsp;and the key columns must exist in both x and y.  
&emsp;&emsp;&emsp;&emsp;By default, we use the first column in y, since the first column  
&emsp;&emsp;&emsp;&emsp;is a reasonable place to put an identifier variable.  

&emsp;&emsp;`copy`: Whether `x` should be copied and updated or updated directly  

##### Returns:
&emsp;&emsp;`x` with values of keys updated  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ rows_patch</div>

##### Works like `rows_update()` but only overwrites `NA` values.

See Also:  
&emsp;&emsp;[`rows_insert`](datar.dplyr.rows.rows_insert)  

##### Args:
&emsp;&emsp;`x`: The seed data frame  
&emsp;&emsp;`y`: The data frame with rows to be inserted into `x`.  
&emsp;&emsp;&emsp;&emsp;- Key values in `y` must not occur in `x`

&emsp;&emsp;&emsp;&emsp;- `y` must have the same or a subset columns of `x`

&emsp;&emsp;`by`: A string or a list of strings giving the key columns.  
&emsp;&emsp;&emsp;&emsp;The key values must uniquely identify each row  
&emsp;&emsp;&emsp;&emsp;(i.e. each combination of key values occurs at most once),  
&emsp;&emsp;&emsp;&emsp;and the key columns must exist in both x and y.  
&emsp;&emsp;&emsp;&emsp;By default, we use the first column in y, since the first column  
&emsp;&emsp;&emsp;&emsp;is a reasonable place to put an identifier variable.  

&emsp;&emsp;`copy`: Whether `x` should be copied and updated or updated directly  

##### Returns:
&emsp;&emsp;`x` with values of keys updated  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ rows_upsert</div>

##### Inserts or updates depending on whether or not the
key value in `y` already exists in `x`.  

See Also:  
&emsp;&emsp;[`rows_insert`](datar.dplyr.rows.rows_insert)  

##### Args:
&emsp;&emsp;`x`: The seed data frame  
&emsp;&emsp;`y`: The data frame with rows to be inserted into `x`.  
&emsp;&emsp;&emsp;&emsp;- Key values in `y` must not occur in `x`

&emsp;&emsp;&emsp;&emsp;- `y` must have the same or a subset columns of `x`

&emsp;&emsp;`by`: A string or a list of strings giving the key columns.  
&emsp;&emsp;&emsp;&emsp;The key values must uniquely identify each row  
&emsp;&emsp;&emsp;&emsp;(i.e. each combination of key values occurs at most once),  
&emsp;&emsp;&emsp;&emsp;and the key columns must exist in both x and y.  
&emsp;&emsp;&emsp;&emsp;By default, we use the first column in y, since the first column  
&emsp;&emsp;&emsp;&emsp;is a reasonable place to put an identifier variable.  

&emsp;&emsp;`copy`: If `False`, do not copy data unnecessarily.  
&emsp;&emsp;&emsp;&emsp;Original API does not support this. This argument will be  
&emsp;&emsp;&emsp;&emsp;passed by to `pandas.concat()` as `copy` argument.  

##### Returns:
&emsp;&emsp;`x` with values of keys updated  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ rows_delete</div>

##### Deletes rows; key values in `y` must exist in `x`.

See Also:  
&emsp;&emsp;[`rows_insert`](datar.dplyr.rows.rows_insert)  

##### Args:
&emsp;&emsp;`x`: The seed data frame  
&emsp;&emsp;`y`: The data frame with rows to be inserted into `x`.  
&emsp;&emsp;&emsp;&emsp;- Key values in `y` must not occur in `x`

&emsp;&emsp;&emsp;&emsp;- `y` must have the same or a subset columns of `x`

&emsp;&emsp;`by`: A string or a list of strings giving the key columns.  
&emsp;&emsp;&emsp;&emsp;The key values must uniquely identify each row  
&emsp;&emsp;&emsp;&emsp;(i.e. each combination of key values occurs at most once),  
&emsp;&emsp;&emsp;&emsp;and the key columns must exist in both x and y.  
&emsp;&emsp;&emsp;&emsp;By default, we use the first column in y, since the first column  
&emsp;&emsp;&emsp;&emsp;is a reasonable place to put an identifier variable.  

&emsp;&emsp;`copy`: Whether `x` should be copied and deleted or deleted directly  

##### Returns:
&emsp;&emsp;`x` with values of keys deleted  


In [2]:
data = tibble(a = seq(1, 3), b = c(letters[[0, 1]], NA), c = [.5, 1.5, 2.5])
data

Unnamed: 0,a,b,c
,<int64>,<object>,<float64>
0.0,1,a,0.5
1.0,2,b,1.5
2.0,3,,2.5


In [3]:
rows_insert(data, tibble(a = 4, b = "z"))

[2022-03-18 17:35:21][datar][   INFO] Matching, by='a'


Unnamed: 0,a,b,c
,<int64>,<object>,<float64>
0.0,1,a,0.5
1.0,2,b,1.5
2.0,3,,2.5
3.0,4,z,


In [4]:
with try_catch():
    rows_insert(data, tibble(a = 3, b = "z"))

[2022-03-18 17:35:22][datar][   INFO] Matching, by='a'


[ValueError] Attempting to insert duplicate rows.


In [5]:
rows_update(data, tibble(a = [2,3], b = "z"))

[2022-03-18 17:35:22][datar][   INFO] Matching, by='a'


Unnamed: 0,a,b,c
,<int64>,<object>,<float64>
0.0,1,a,0.5
1.0,2,z,1.5
2.0,3,z,2.5


In [6]:
rows_update(data, tibble(b = "z", a = [2,3]), by = "a")

Unnamed: 0,a,b,c
,<int64>,<object>,<float64>
0.0,1,a,0.5
1.0,2,z,1.5
2.0,3,z,2.5


In [7]:
rows_patch(data, tibble(a = [2,3], b = "z"))

[2022-03-18 17:35:23][datar][   INFO] Matching, by='a'


Unnamed: 0,a,b,c
,<int64>,<object>,<float64>
0.0,1,a,0.5
1.0,2,b,1.5
2.0,3,z,2.5


In [8]:
rows_upsert(data, tibble(a = seq(2, 4), b = "z"))

[2022-03-18 17:35:24][datar][   INFO] Matching, by='a'


Unnamed: 0,a,b,c
,<int64>,<object>,<float64>
0.0,1,a,0.5
1.0,2,z,1.5
2.0,3,z,2.5
3.0,4,z,


In [9]:
rows_delete(data, tibble(a = [2, 3]))

[2022-03-18 17:35:25][datar][   INFO] Matching, by='a'


Unnamed: 0,a,b,c
,<int64>,<object>,<float64>
0.0,1,a,0.5


In [10]:
rows_delete(data, tibble(a = [2, 3], b = "b"))

[2022-03-18 17:35:25][datar][   INFO] Matching, by='a'
[2022-03-18 17:35:25][datar][   INFO] Ignoring extra columns: ['b']


Unnamed: 0,a,b,c
,<int64>,<object>,<float64>
0.0,1,a,0.5


In [11]:
with try_catch():
    rows_delete(data, tibble(a = [2,3], b = "b"), by = c("a", "b"))

[ValueError] Attempting to delete missing rows.
