In [1]:
# https://tibble.tidyverse.org/reference/tibble.html
# https://tibble.tidyverse.org/reference/tribble.html

from datar import f
from datar.tibble import tibble, tibble_row, tribble, fibble, zibble
from datar.base import diag
from datar.stats import runif

%run nb_helpers.py
nb_header(tibble, tibble_row, tribble, fibble, zibble)

### # tibble  

##### Constructs a data frame

##### Args:
&emsp;&emsp;`*args`: and  
&emsp;&emsp;`**kwargs`: A set of name-value pairs.  
&emsp;&emsp;`_name_repair`: treatment of problematic column names:  
&emsp;&emsp;&emsp;&emsp;- "minimal": No name repair or checks, beyond basic existence,

&emsp;&emsp;&emsp;&emsp;- "unique": Make sure names are unique and not empty,

&emsp;&emsp;&emsp;&emsp;- "check_unique": (default value), no name repair,
&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;but check they are unique,  

&emsp;&emsp;&emsp;&emsp;- "universal": Make the names unique and syntactic

&emsp;&emsp;&emsp;&emsp;- a function: apply custom name repair

&emsp;&emsp;`_rows`: Number of rows of a 0-col dataframe when args and kwargs are  
&emsp;&emsp;&emsp;&emsp;not provided. When args or kwargs are provided, this is ignored.  

&emsp;&emsp;`base0_`: Whether the suffixes of repaired names should be 0-based.  
&emsp;&emsp;&emsp;&emsp;If not provided, will use `datar.base.get_option('index.base.0')`.  

##### Returns:
&emsp;&emsp;A constructed dataframe  


### # tibble_row  

##### Constructs a data frame that is guaranteed to occupy one row.

Scalar values will be wrapped with `[]`  

##### Args:
&emsp;&emsp;`*args`: and  
&emsp;&emsp;`**kwargs`: A set of name-value pairs.  
&emsp;&emsp;`_name_repair`: treatment of problematic column names:  
&emsp;&emsp;&emsp;&emsp;- "minimal": No name repair or checks, beyond basic existence,

&emsp;&emsp;&emsp;&emsp;- "unique": Make sure names are unique and not empty,

&emsp;&emsp;&emsp;&emsp;- "check_unique": (default value), no name repair,
&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;but check they are unique,  

&emsp;&emsp;&emsp;&emsp;- "universal": Make the names unique and syntactic

&emsp;&emsp;&emsp;&emsp;- a function: apply custom name repair

&emsp;&emsp;`base0_`: Whether the suffixes of repaired names should be 0-based.  
&emsp;&emsp;&emsp;&emsp;If not provided, will use `datar.base.get_option('index.base.0')`.  

##### Returns:
&emsp;&emsp;A constructed dataframe  


### # tribble  

##### Create dataframe using an easier to read row-by-row layout

Unlike original API that uses formula (`f.col`) to indicate the column  
names, we use `f.col` to indicate them.  

##### Args:
&emsp;&emsp;`*dummies`: Arguments specifying the structure of a dataframe  
&emsp;&emsp;&emsp;&emsp;Variable names should be specified with `f.name`  

##### Examples:
&emsp;&emsp;>>> tribble(  
&emsp;&emsp;>>>     f.colA, f.colB,  
&emsp;&emsp;>>>     "a",    1,  
&emsp;&emsp;>>>     "b",    2,  
&emsp;&emsp;>>>     "c",    3,  
&emsp;&emsp;>>> )  

##### Returns:
&emsp;&emsp;A dataframe  


### # fibble  

##### A function of tibble that can be used as an argument of verbs

Since `tibble` can recycle previous items, for example:  
&emsp;&emsp;>>> df >> tibble(x=1, y=f.x+1)  
&emsp;&emsp;>>> # x y  
&emsp;&emsp;>>> # 1 2  

It gets confused when it is used as an argument of a verb, the we can't tell  
whether `f` if a proxy for the data of the verb or the data frame that  
`tibble` is constructing. So then here is the function to be used as a verb  
argument so `f` refers to the data of the verb. Note that in such a case,  
the items coming in previously cannot be recycled.  

See Also:  
&emsp;&emsp;[`tibble`](datar.tibble.funcs.tibble)  


### # zibble  

##### Zip version of tibble, where names specify together and so do values.

So that it is easlier to create data frame with duplicated names.  

The earlier create columns can be recycled by later expressions.  
For example, `zibble(['x', 'y'], [1, f.x])`. You can also use expressions  
to specify names: `zibble([f.x, f.y], [1, f.x])`.  

Note that for `values`, do not use `c()` to quote all the values, as they  
will be flattened.  

##### Args:
&emsp;&emsp;`names`: The names of the columns  
&emsp;&emsp;&emsp;&emsp;If a name is None/NULL/NA, we should expand the corresponding value  
&emsp;&emsp;&emsp;&emsp;to get the name  

&emsp;&emsp;`values`: The values for the columns  
&emsp;&emsp;`_name_repair`: treatment of problematic column names:  
&emsp;&emsp;&emsp;&emsp;- "minimal": No name repair or checks, beyond basic existence,

&emsp;&emsp;&emsp;&emsp;- "unique": Make sure names are unique and not empty,

&emsp;&emsp;&emsp;&emsp;- "check_unique": (default value), no name repair,
&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;but check they are unique,  

&emsp;&emsp;&emsp;&emsp;- "universal": Make the names unique and syntactic

&emsp;&emsp;&emsp;&emsp;- a function: apply custom name repair

&emsp;&emsp;`base0_`: Whether the suffixes of repaired names should be 0-based.  
&emsp;&emsp;&emsp;&emsp;If not provided, will use `datar.base.get_option('index.base.0')`.  

##### Returns:
&emsp;&emsp;A data frame  


In [2]:
a = range(5)
tibble(a, f.a*2)

Unnamed: 0,a,f.a*2
,<int64>,<int64>
0.0,0,0
1.0,1,2
2.0,2,4
3.0,3,6
4.0,4,8


In [3]:
tibble(a, b=f.a * 2, c=1)

Unnamed: 0,a,b,c
,<int64>,<int64>,<int64>
0.0,0,0,1
1.0,1,2,1
2.0,2,4,1
3.0,3,6,1
4.0,4,8,1


In [4]:
tibble(x=runif(10), y=f.x*2)

Unnamed: 0,x,y
,<float64>,<float64>
0.0,0.385218,0.770436
1.0,0.809467,1.618933
2.0,0.057528,0.115056
3.0,0.635053,1.270106
4.0,0.572756,1.145511
5.0,0.415312,0.830625
6.0,0.230254,0.460508
7.0,0.541868,1.083736
8.0,0.572932,1.145863


In [5]:
x = 1
with try_catch():
    tibble(x, x)

[NameNonUniqueError] Names must be unique: x


In [6]:
tibble(x, x, _name_repair="unique")



Unnamed: 0,x__1,x__2
,<int64>,<int64>
0.0,1,1


In [7]:
# 0-based suffixing for name repair
tibble(x, x, _name_repair="unique", base0_=True)



Unnamed: 0,x__0,x__1
,<int64>,<int64>
0.0,1,1


In [8]:
tibble(x, x, _name_repair="minimal") # duplicated columns allowed

Unnamed: 0,x,x.1
,<int64>,<int64>
0.0,1,1


In [9]:
a = 1
tibble(a * 1, a * 2, _name_repair="universal")



Unnamed: 0,a___1,a___2
,<int64>,<int64>
0.0,1,2


In [10]:
from typing import Iterable
# use annotation to tell it's all names
# not only a single name
def make_unique(names: Iterable[str]): 
    new_names = []
    for name in names:
        name_count = new_names.count(name)
        if name_count == 0:
            new_names.append(name)
        else:
            new_names.append(f'{name}_{name_count}')
    return new_names

tibble(a, a, _name_repair=make_unique)

Unnamed: 0,a,a_1
,<int64>,<int64>
0.0,1,1


In [11]:
# if not annotation specified
# assuming a single name
def fix_names(name):
    import re
    return re.sub(r'\s+', '_', name)


tibble(a + 1, a + 2, _name_repair = fix_names)

Unnamed: 0,a_+_1,a_+_2
,<int64>,<int64>
0.0,2,3


In [12]:
tibble(x, x, _name_repair=["a", "b"])

Unnamed: 0,a,b
,<int64>,<int64>
0.0,1,1


In [13]:
tibble(
  tibble(
    b = [4,5,6],
    c = [7,8,9]
  ),
  a = range(3),
  d = f.b
)

Unnamed: 0,b,c,a,d
,<int64>,<int64>,<int64>,<int64>
0.0,4,7,0,4
1.0,5,8,1,5
2.0,6,9,2,6


In [14]:
s = tibble(diag(1, 4))
t = tibble(s.iloc[:, :2], _name_repair=['x', 'y'])
tibble(
  a=range(4),
  b=s,
  c=t
)

Unnamed: 0,a,b$0,b$1,b$2,b$3,c$x,c$y
,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>
0.0,0,1,0,0,0,1,0
1.0,1,0,1,0,0,0,1
2.0,2,0,0,1,0,0,0
3.0,3,0,0,0,1,0,0


In [15]:
with try_catch():
    tibble(a=range(3), b=range(4))

[DataUnrecyclable] Cannot recycle value to size 3, expect (1, 3), got 4.


In [16]:
tibble(_dotted = 3, _name_repair=lambda x: x.replace('_', '.'))

Unnamed: 0,.dotted
,<int64>
0.0,3


In [17]:
x = 3
tibble(x=1, y=f.x)

Unnamed: 0,x,y
,<int64>,<int64>
0.0,1,1


In [18]:
tibble(x=1, y=x)

Unnamed: 0,x,y
,<int64>,<int64>
0.0,1,3


In [19]:
tribble(
  f.colA, f.colB,
  "a",    1,
  "b",    2,
  "c",    3
)

Unnamed: 0,colA,colB
,<object>,<int64>
0.0,a,1
1.0,b,2
2.0,c,3


In [20]:
tribble(
  f.x,  f.y,
  "a",  [1,2,3],
  "b",  [4,5,6]
)

Unnamed: 0,x,y
,<object>,<object>
0.0,a,"[1, 2, 3]"
1.0,b,"[4, 5, 6]"


In [21]:
tibble_row(a=1, b=[[2,3]])

Unnamed: 0,a,b
,<int64>,<object>
0.0,1,"[2, 3]"


In [22]:
zibble(['a', 'a'], [1,2])

Unnamed: 0,a,a.1
,<int64>,<int64>
0.0,1,2


In [23]:
zibble(['a', 'a'], [1,f.a*2])

Unnamed: 0,a,a.1
,<int64>,<int64>
0.0,1,2
