In [1]:
%run nb_helpers.py
from datar.all import *

nb_header(
    rows_insert, 
    rows_update, 
    rows_patch, 
    rows_upsert, 
    rows_delete, 
    book='rows'
)

### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ rows_insert</div>

##### Insert rows from y into x

See original API  
https://dplyr.tidyverse.org/reference/rows.html  

##### Args:
&emsp;&emsp;`x`: A data frame  
&emsp;&emsp;`y`: A data frame  
&emsp;&emsp;`by`: An unnamed character vector giving the key columns.  
&emsp;&emsp;&emsp;&emsp;The key columns must exist in both x and y.  
&emsp;&emsp;&emsp;&emsp;Keys typically uniquely identify each row, but this is only  
&emsp;&emsp;&emsp;&emsp;enforced for the key values of y  
&emsp;&emsp;&emsp;&emsp;By default, we use the first column in y, since the first column is  
&emsp;&emsp;&emsp;&emsp;a reasonable place to put an identifier variable.  

&emsp;&emsp;`conflict`: How to handle conflicts  
&emsp;&emsp;&emsp;&emsp;- "error": Throw an error

&emsp;&emsp;&emsp;&emsp;- "ignore": Ignore conflicts

&emsp;&emsp;`copy`: If x and y are not from the same data source, and copy is TRUE,  
&emsp;&emsp;&emsp;&emsp;then y will be copied into the same src as x.  
&emsp;&emsp;&emsp;&emsp;This allows you to join tables across srcs, but it is a potentially  
&emsp;&emsp;&emsp;&emsp;expensive operation so you must opt into it.  

&emsp;&emsp;`in_place`: Should x be modified in place?  
&emsp;&emsp;&emsp;&emsp;This may not be supported, depending on the backend implementation.  

##### Returns:
&emsp;&emsp;A data frame with all existing rows and potentially new rows  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ rows_update</div>

##### Update rows in x with values from y

See original API  
https://dplyr.tidyverse.org/reference/rows.html  

##### Args:
&emsp;&emsp;`x`: A data frame  
&emsp;&emsp;`y`: A data frame  
&emsp;&emsp;`by`: An unnamed character vector giving the key columns.  
&emsp;&emsp;&emsp;&emsp;The key columns must exist in both x and y.  
&emsp;&emsp;&emsp;&emsp;Keys typically uniquely identify each row, but this is only  
&emsp;&emsp;&emsp;&emsp;enforced for the key values of y  
&emsp;&emsp;&emsp;&emsp;By default, we use the first column in y, since the first column is  
&emsp;&emsp;&emsp;&emsp;a reasonable place to put an identifier variable.  

&emsp;&emsp;`unmatched`: how should keys in y that are unmatched by the keys  
&emsp;&emsp;&emsp;&emsp;in x be handled?  
&emsp;&emsp;&emsp;&emsp;One of -  
&emsp;&emsp;&emsp;&emsp;"error", the default, will error if there are any keys in y that  
&emsp;&emsp;&emsp;&emsp;are unmatched by the keys in x.  
&emsp;&emsp;&emsp;&emsp;"ignore" will ignore rows in y with keys that are unmatched  
&emsp;&emsp;&emsp;&emsp;by the keys in x.  

&emsp;&emsp;`copy`: If x and y are not from the same data source, and copy is TRUE,  
&emsp;&emsp;&emsp;&emsp;then y will be copied into the same src as x.  
&emsp;&emsp;&emsp;&emsp;This allows you to join tables across srcs, but it is a potentially  
&emsp;&emsp;&emsp;&emsp;expensive operation so you must opt into it.  

&emsp;&emsp;`in_place`: Should x be modified in place?  
&emsp;&emsp;&emsp;&emsp;This may not be supported, depending on the backend implementation.  

##### Returns:
&emsp;&emsp;A data frame with all existing rows and potentially new rows  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ rows_patch</div>

##### Patch rows in x with values from y

See original API  
https://dplyr.tidyverse.org/reference/rows.html  

##### Args:
&emsp;&emsp;`x`: A data frame  
&emsp;&emsp;`y`: A data frame  
&emsp;&emsp;`by`: An unnamed character vector giving the key columns.  
&emsp;&emsp;&emsp;&emsp;The key columns must exist in both x and y.  
&emsp;&emsp;&emsp;&emsp;Keys typically uniquely identify each row, but this is only  
&emsp;&emsp;&emsp;&emsp;enforced for the key values of y  
&emsp;&emsp;&emsp;&emsp;By default, we use the first column in y, since the first column is  
&emsp;&emsp;&emsp;&emsp;a reasonable place to put an identifier variable.  

&emsp;&emsp;`unmatched`: how should keys in y that are unmatched by the keys  
&emsp;&emsp;&emsp;&emsp;in x be handled?  
&emsp;&emsp;&emsp;&emsp;One of -  
&emsp;&emsp;&emsp;&emsp;"error", the default, will error if there are any keys in y that  
&emsp;&emsp;&emsp;&emsp;are unmatched by the keys in x.  
&emsp;&emsp;&emsp;&emsp;"ignore" will ignore rows in y with keys that are unmatched  
&emsp;&emsp;&emsp;&emsp;by the keys in x.  

&emsp;&emsp;`copy`: If x and y are not from the same data source, and copy is TRUE,  
&emsp;&emsp;&emsp;&emsp;then y will be copied into the same src as x.  
&emsp;&emsp;&emsp;&emsp;This allows you to join tables across srcs, but it is a potentially  
&emsp;&emsp;&emsp;&emsp;expensive operation so you must opt into it.  

&emsp;&emsp;`in_place`: Should x be modified in place?  
&emsp;&emsp;&emsp;&emsp;This may not be supported, depending on the backend implementation.  

##### Returns:
&emsp;&emsp;A data frame with NA values overwritten and the number of rows preserved  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ rows_upsert</div>

##### Upsert rows in x with values from y

See original API  
https://dplyr.tidyverse.org/reference/rows.html  

##### Args:
&emsp;&emsp;`x`: A data frame  
&emsp;&emsp;`y`: A data frame  
&emsp;&emsp;`by`: An unnamed character vector giving the key columns.  
&emsp;&emsp;&emsp;&emsp;The key columns must exist in both x and y.  
&emsp;&emsp;&emsp;&emsp;Keys typically uniquely identify each row, but this is only  
&emsp;&emsp;&emsp;&emsp;enforced for the key values of y  
&emsp;&emsp;&emsp;&emsp;By default, we use the first column in y, since the first column is  
&emsp;&emsp;&emsp;&emsp;a reasonable place to put an identifier variable.  

&emsp;&emsp;`copy`: If x and y are not from the same data source, and copy is TRUE,  
&emsp;&emsp;&emsp;&emsp;then y will be copied into the same src as x.  
&emsp;&emsp;&emsp;&emsp;This allows you to join tables across srcs, but it is a potentially  
&emsp;&emsp;&emsp;&emsp;expensive operation so you must opt into it.  

&emsp;&emsp;`in_place`: Should x be modified in place?  
&emsp;&emsp;&emsp;&emsp;This may not be supported, depending on the backend implementation.  

##### Returns:
&emsp;&emsp;A data frame with inserted or updated depending on whether or not  
&emsp;&emsp;the key value in y already exists in x. Key values in y must be unique.  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ rows_delete</div>

##### Delete rows in x that match keys in y

See original API  
https://dplyr.tidyverse.org/reference/rows.html  

##### Args:
&emsp;&emsp;`x`: A data frame  
&emsp;&emsp;`y`: A data frame  
&emsp;&emsp;`by`: An unnamed character vector giving the key columns.  
&emsp;&emsp;&emsp;&emsp;The key columns must exist in both x and y.  
&emsp;&emsp;&emsp;&emsp;Keys typically uniquely identify each row, but this is only  
&emsp;&emsp;&emsp;&emsp;enforced for the key values of y  
&emsp;&emsp;&emsp;&emsp;By default, we use the first column in y, since the first column is  
&emsp;&emsp;&emsp;&emsp;a reasonable place to put an identifier variable.  

&emsp;&emsp;`unmatched`: how should keys in y that are unmatched by the keys  
&emsp;&emsp;&emsp;&emsp;in x be handled?  
&emsp;&emsp;&emsp;&emsp;One of -  
&emsp;&emsp;&emsp;&emsp;"error", the default, will error if there are any keys in y that  
&emsp;&emsp;&emsp;&emsp;are unmatched by the keys in x.  
&emsp;&emsp;&emsp;&emsp;"ignore" will ignore rows in y with keys that are unmatched  
&emsp;&emsp;&emsp;&emsp;by the keys in x.  

&emsp;&emsp;`copy`: If x and y are not from the same data source, and copy is TRUE,  
&emsp;&emsp;&emsp;&emsp;then y will be copied into the same src as x.  
&emsp;&emsp;&emsp;&emsp;This allows you to join tables across srcs, but it is a potentially  
&emsp;&emsp;&emsp;&emsp;expensive operation so you must opt into it.  

&emsp;&emsp;`in_place`: Should x be modified in place?  
&emsp;&emsp;&emsp;&emsp;This may not be supported, depending on the backend implementation.  

##### Returns:
&emsp;&emsp;A data frame with rows deleted  


In [2]:
data = tibble(a = seq(1, 3), b = c(letters[[0, 1]], NA), c = [.5, 1.5, 2.5])
data

Unnamed: 0,a,b,c
,<int64>,<object>,<float64>
0.0,1,a,0.5
1.0,2,b,1.5
2.0,3,,2.5


In [3]:
rows_insert(data, tibble(a = 4, b = "z"))

[2022-12-02 14:22:14][datar][   INFO] Matching, by='a'


Unnamed: 0,a,b,c
,<int64>,<object>,<float64>
0.0,1,a,0.5
1.0,2,b,1.5
2.0,3,,2.5
3.0,4,z,


In [4]:
with try_catch():
    rows_insert(data, tibble(a = 3, b = "z"))

[2022-12-02 14:22:14][datar][   INFO] Matching, by='a'


[ValueError] Attempting to insert duplicate rows.


In [5]:
rows_update(data, tibble(a = [2,3], b = "z"))

[2022-12-02 14:22:15][datar][   INFO] Matching, by='a'


Unnamed: 0,a,b,c
,<int64>,<object>,<float64>
0.0,1,a,0.5
1.0,2,z,1.5
2.0,3,z,2.5


In [6]:
rows_update(data, tibble(b = "z", a = [2,3]), by = "a")

Unnamed: 0,a,b,c
,<int64>,<object>,<float64>
0.0,1,a,0.5
1.0,2,z,1.5
2.0,3,z,2.5


In [7]:
rows_patch(data, tibble(a = [2,3], b = "z"))

[2022-12-02 14:22:17][datar][   INFO] Matching, by='a'


Unnamed: 0,a,b,c
,<int64>,<object>,<float64>
0.0,1,a,0.5
1.0,2,b,1.5
2.0,3,z,2.5


In [8]:
rows_upsert(data, tibble(a = seq(2, 4), b = "z"))

[2022-12-02 14:22:18][datar][   INFO] Matching, by='a'


Unnamed: 0,a,b,c
,<int64>,<object>,<float64>
0.0,1,a,0.5
1.0,2,z,1.5
2.0,3,z,2.5
3.0,4,z,


In [9]:
rows_delete(data, tibble(a = [2, 3]))

[2022-12-02 14:22:18][datar][   INFO] Matching, by='a'


Unnamed: 0,a,b,c
,<int64>,<object>,<float64>
0.0,1,a,0.5


In [10]:
rows_delete(data, tibble(a = [2, 3], b = "b"))

[2022-12-02 14:22:19][datar][   INFO] Matching, by='a'
[2022-12-02 14:22:19][datar][   INFO] Ignoring extra columns: ['b']


Unnamed: 0,a,b,c
,<int64>,<object>,<float64>
0.0,1,a,0.5


In [11]:
with try_catch():
    rows_delete(data, tibble(a = [2,3], b = "b"), by = c("a", "b"))

[ValueError] Attempting to delete missing rows.
