In [1]:
# https://tidyr.tidyverse.org/reference/extract.html

from datar.all import *

%run nb_helpers.py
nb_header(extract)

### # extract  

##### Given a regular expression with capturing groups, extract() turns each
group into a new column. If the groups don't match, or the input is NA,  
the output will be NA.  

See https://tidyr.tidyverse.org/reference/extract.html  

##### Args:
&emsp;&emsp;`data`: The dataframe  
&emsp;&emsp;`col`: Column name or position.  
&emsp;&emsp;`into`: Names of new variables to create as character vector.  
&emsp;&emsp;&emsp;&emsp;Use None to omit the variable in the output.  

&emsp;&emsp;`regex`: a regular expression used to extract the desired values.  
&emsp;&emsp;&emsp;&emsp;There should be one group (defined by ()) for each element of into.  

&emsp;&emsp;`remove`: If TRUE, remove input column from output data frame.  
&emsp;&emsp;`convert`: The universal type for the extracted columns or a dict for  
&emsp;&emsp;&emsp;&emsp;individual ones  

&emsp;&emsp;`base0_`: Whether `col` is 0-based when given by index  
&emsp;&emsp;&emsp;&emsp;If not provided, will use `datar.base.get_option('index.base.0')`  

##### Returns:
&emsp;&emsp;Dataframe with extracted columns.  


In [2]:
df = tibble(x = c(NA, "a-b", "a-d", "b-c", "d-e"))
df >> extract(f.x, "A")

Unnamed: 0,A
0,
1,a
2,a
3,b
4,d


In [3]:
df >> extract(f.x, c("A", "B"), r"(\w+)-(\w+)")

Unnamed: 0,A,B
0,,
1,a,b
2,a,d
3,b,c
4,d,e


In [4]:
df >> extract(f.x, c("A", "B"), r"([a-d]+)-([a-d]+)")

Unnamed: 0,A,B
0,,
1,a,b
2,a,d
3,b,c
4,,


In [5]:
# combine multiple columns
df = tibble(x='abcd')
df >> extract(f.x, ['a', 'b', 'a', 'b'], r'(.)(.)(.)(.)')

Unnamed: 0,a,b
0,ac,bd
