In [1]:
# https://tibble.tidyverse.org/reference/tibble.html
# https://tibble.tidyverse.org/reference/tribble.html

from datar import f
from datar.tibble import tibble, tibble_row, tribble, fibble
from datar.base import diag
from datar.stats import runif

%run nb_helpers.py
nb_header(tibble, tibble_row, tribble, fibble)

### # tibble  

##### Constructs a data frame

##### Args:
&emsp;&emsp;`*args`: and  
&emsp;&emsp;`**kwargs`: A set of name-value pairs.  
&emsp;&emsp;`_name_repair`: treatment of problematic column names:  
&emsp;&emsp;&emsp;&emsp;- "minimal": No name repair or checks, beyond basic existence,

&emsp;&emsp;&emsp;&emsp;- "unique": Make sure names are unique and not empty,

&emsp;&emsp;&emsp;&emsp;- "check_unique": (default value), no name repair,
&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;but check they are unique,  

&emsp;&emsp;&emsp;&emsp;- "universal": Make the names unique and syntactic

&emsp;&emsp;&emsp;&emsp;- a function: apply custom name repair

&emsp;&emsp;`_rows`: Number of rows of a 0-col dataframe when args and kwargs are  
&emsp;&emsp;&emsp;&emsp;not provided. When args or kwargs are provided, this is ignored.  

&emsp;&emsp;`_base0`: Whether the suffixes of repaired names should be 0-based.  
&emsp;&emsp;&emsp;&emsp;If not provided, will use `datar.base.get_option('index.base.0')`.  

##### Returns:
&emsp;&emsp;A constructed dataframe  


### # tibble_row  

##### Constructs a data frame that is guaranteed to occupy one row.

Scalar values will be wrapped with `[]`  

##### Args:
&emsp;&emsp;`*args`: and  
&emsp;&emsp;`**kwargs`: A set of name-value pairs.  
&emsp;&emsp;`_name_repair`: treatment of problematic column names:  
&emsp;&emsp;&emsp;&emsp;- "minimal": No name repair or checks, beyond basic existence,

&emsp;&emsp;&emsp;&emsp;- "unique": Make sure names are unique and not empty,

&emsp;&emsp;&emsp;&emsp;- "check_unique": (default value), no name repair,
&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;but check they are unique,  

&emsp;&emsp;&emsp;&emsp;- "universal": Make the names unique and syntactic

&emsp;&emsp;&emsp;&emsp;- a function: apply custom name repair

&emsp;&emsp;`_base0`: Whether the suffixes of repaired names should be 0-based.  
&emsp;&emsp;&emsp;&emsp;If not provided, will use `datar.base.get_option('index.base.0')`.  

##### Returns:
&emsp;&emsp;A constructed dataframe  


### # tribble  

##### Create dataframe using an easier to read row-by-row layout

Unlike original API that uses formula (`f.col`) to indicate the column  
names, we use `f.col` to indicate them.  

##### Args:
&emsp;&emsp;`*dummies`: Arguments specifying the structure of a dataframe  
&emsp;&emsp;&emsp;&emsp;Variable names should be specified with `f.name`  

##### Examples:
&emsp;&emsp;>>> tribble(  
&emsp;&emsp;>>>     f.colA, f.colB,  
&emsp;&emsp;>>>     "a",    1,  
&emsp;&emsp;>>>     "b",    2,  
&emsp;&emsp;>>>     "c",    3,  
&emsp;&emsp;>>> )  

##### Returns:
&emsp;&emsp;A dataframe  


### # fibble  

##### A function of tibble that can be used as an argument of verbs

Since `tibble` can recycle previous items, for example:  
&emsp;&emsp;>>> df >> tibble(x=1, y=f.x+1)  
&emsp;&emsp;>>> # x y  
&emsp;&emsp;>>> # 1 2  

It gets confused when it is used as an argument of a verb, the we can't tell  
whether `f` if a proxy for the data of the verb or the data frame that  
`tibble` is constructing. So then here is the function to be used as a verb  
argument so `f` refers to the data of the verb. Note that in such a case,  
the items coming in previously cannot be recycled.  

See Also:  
&emsp;&emsp;[`tibble`](datar.tibble.funcs.tibble)  


In [2]:
a = range(5)
tibble(a, f.a*2)

Unnamed: 0,a,a.1
0,0,0
1,1,2
2,2,4
3,3,6
4,4,8


In [3]:
tibble(a, b=f.a * 2, c=1)

Unnamed: 0,a,b,c
0,0,0,1
1,1,2,1
2,2,4,1
3,3,6,1
4,4,8,1


In [4]:
runif(10).shape

(10,)

In [5]:
tibble(x=runif(10), y=f.x*2)

Unnamed: 0,x,y
0,0.888366,1.776732
1,0.181909,0.363817
2,0.442927,0.885855
3,0.605795,1.21159
4,0.65745,1.3149
5,0.217014,0.434028
6,0.010977,0.021954
7,0.924317,1.848634
8,0.78754,1.575079
9,0.450512,0.901024


In [6]:
from datar.core.exceptions import NameNonUniqueError
x = 1
try:
    tibble(x, x)
except NameNonUniqueError as err:
    print(err)

Names must be unique: x


In [7]:
tibble(x, x, _name_repair="unique")



Unnamed: 0,x__1,x__2
0,1,1


In [8]:
# 0-based suffixing for name repair
tibble(x, x, _name_repair="unique", _base0=True)



Unnamed: 0,x__0,x__1
0,1,1


In [9]:
tibble(x, x, _name_repair="minimal") # duplicated columns collapse

Unnamed: 0,x,x.1
0,1,1


In [10]:
a = 1
tibble(a * 1, a * 2, _name_repair="universal")



Unnamed: 0,a___1,a___2
0,1,2


In [11]:
from typing import Iterable
# use annotation to tell it's all names
# not only a single name
def make_unique(names: Iterable[str]): 
    new_names = []
    for name in names:
        name_count = new_names.count(name)
        if name_count == 0:
            new_names.append(name)
        else:
            new_names.append(f'{name}_{name_count}')
    return new_names

tibble(a, a, _name_repair=make_unique)

Unnamed: 0,a,a_1
0,1,1


In [12]:
# if not annotation specified
# assuming a single name
def fix_names(name):
    import re
    return re.sub(r'\s+', '_', name)


tibble(a + 1, a + 2, _name_repair = fix_names)

Unnamed: 0,a_+_1,a_+_2
0,2,3


In [13]:
tibble(x, x, _name_repair=["a", "b"])

Unnamed: 0,a,b
0,1,1


In [14]:
tibble(
  tibble(
    b = [4,5,6],
    c = [7,8,9]
  ),
  a = range(3),
  d = f.b
)

Unnamed: 0,b,c,a,d
0,4,7,0,4
1,5,8,1,5
2,6,9,2,6


In [15]:
s = tibble(diag(1, 4))
t = tibble(s.iloc[:, :2], _name_repair=['x', 'y'])
tibble(
  a=range(4),
  b=s,
  c=t
)

Unnamed: 0,a,b$0,b$1,b$2,b$3,c$x$0,c$x$1
0,0,1,0,0,0,1,0
1,1,0,1,0,0,0,1
2,2,0,0,1,0,0,0
3,3,0,0,0,1,0,0


In [16]:
try:
    tibble(a=range(3), b=range(4))
except ValueError as err:
    print(err)

Length of values (4) does not match length of index (3)


In [17]:
tibble(_dotted = 3, _name_repair=lambda x: x.replace('_', '.'))

Unnamed: 0,.dotted
0,3


In [18]:
x = 3
tibble(x=1, y=f.x)

Unnamed: 0,x,y
0,1,1


In [19]:
tibble(x=1, y=x)

Unnamed: 0,x,y
0,1,3


In [20]:
tribble(
  f.colA, f.colB,
  "a",    1,
  "b",    2,
  "c",    3
)

Unnamed: 0,colA,colB
0,a,1
1,b,2
2,c,3


In [21]:
tribble(
  f.x,  f.y,
  "a",  [1,2,3],
  "b",  [4,5,6]
)

Unnamed: 0,x,y
0,a,"[1, 2, 3]"
1,b,"[4, 5, 6]"


In [22]:
tibble_row(a=1, b=[[2,3]])

Unnamed: 0,a,b
0,1,"[2, 3]"
