In [3]:
# https://tibble.tidyverse.org/reference/tibble.html
# https://tibble.tidyverse.org/reference/tribble.html

from datar import f
from datar.tibble import tibble, tibble_row, tribble
from datar.base import diag, runif

%run nb_helpers.py
nb_header(tibble, tibble_row, tribble)

### # tibble  

##### Constructs a data frame

##### Args:
&emsp;&emsp;`*args`: and  
&emsp;&emsp;`**kwargs`: A set of name-value pairs.  
&emsp;&emsp;`_name_repair`: treatment of problematic column names:  
&emsp;&emsp;&emsp;&emsp;- "minimal": No name repair or checks, beyond basic existence,

&emsp;&emsp;&emsp;&emsp;- "unique": Make sure names are unique and not empty,

&emsp;&emsp;&emsp;&emsp;- "check_unique": (default value), no name repair,
&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;but check they are unique,  

&emsp;&emsp;&emsp;&emsp;- "universal": Make the names unique and syntactic

&emsp;&emsp;&emsp;&emsp;- a function: apply custom name repair

&emsp;&emsp;`_rows`: Number of rows of a 0-col dataframe when args and kwargs are  
&emsp;&emsp;&emsp;&emsp;not provided. When args or kwargs are provided, this is ignored.  

&emsp;&emsp;`_dtypes`: The dtypes for each columns to convert to.  
&emsp;&emsp;`_drop_index`: Whether drop the index for the final data frame  
&emsp;&emsp;`_index`: The new index of the output frame  

##### Returns:
&emsp;&emsp;A constructed tibble  


### # tibble_row  

##### Constructs a data frame that is guaranteed to occupy one row.
Scalar values will be wrapped with `[]`  

##### Args:
&emsp;&emsp;`*args`: and  
&emsp;&emsp;`**kwargs`: A set of name-value pairs.  
&emsp;&emsp;`_name_repair`: treatment of problematic column names:  
&emsp;&emsp;&emsp;&emsp;- "minimal": No name repair or checks, beyond basic existence,

&emsp;&emsp;&emsp;&emsp;- "unique": Make sure names are unique and not empty,

&emsp;&emsp;&emsp;&emsp;- "check_unique": (default value), no name repair,
&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;but check they are unique,  

&emsp;&emsp;&emsp;&emsp;- "universal": Make the names unique and syntactic

&emsp;&emsp;&emsp;&emsp;- a function: apply custom name repair

##### Returns:
&emsp;&emsp;A constructed dataframe  


### # tribble  

##### Create dataframe using an easier to read row-by-row layout
Unlike original API that uses formula (`f.col`) to indicate the column  
names, we use `f.col` to indicate them.  

##### Args:
&emsp;&emsp;`*dummies`: Arguments specifying the structure of a dataframe  
&emsp;&emsp;&emsp;&emsp;Variable names should be specified with `f.name`  

&emsp;&emsp;`_dtypes`: The dtypes for each columns to convert to.  

##### Examples:
&emsp;&emsp;>>> tribble(  
&emsp;&emsp;>>>     f.colA, f.colB,  
&emsp;&emsp;>>>     "a",    1,  
&emsp;&emsp;>>>     "b",    2,  
&emsp;&emsp;>>>     "c",    3,  
&emsp;&emsp;>>> )  

##### Returns:
&emsp;&emsp;A dataframe  


In [5]:
a = range(5)
tibble(a=a, b=f.a*2)

Unnamed: 0,a,b
,<int64>,<int64>
0.0,0,0
1.0,1,2
2.0,2,4
3.0,3,6
4.0,4,8


In [6]:
tibble(a=a, b=f.a * 2, c=1)

Unnamed: 0,a,b,c
,<int64>,<int64>,<int64>
0.0,0,0,1
1.0,1,2,1
2.0,2,4,1
3.0,3,6,1
4.0,4,8,1


In [7]:
tibble(x=runif(10), y=f.x*2)

Unnamed: 0,x,y
,<float64>,<float64>
0.0,0.829673,1.659347
1.0,0.682616,1.365232
2.0,0.594559,1.189117
3.0,0.245247,0.490494
4.0,0.393295,0.786590
5.0,0.736311,1.472622
6.0,0.159279,0.318559
7.0,0.459043,0.918085
8.0,0.237187,0.474374


In [8]:
x = 1
with try_catch():
    tibble(x, x)

[NameNonUniqueError] Names must be unique: 1


In [9]:
tibble(x, x, _name_repair="unique")



Unnamed: 0,1__0,1__1
,<int64>,<int64>
0.0,1,1


In [10]:
# 0-based suffixing for name repair
tibble(x, x, _name_repair="unique", base0_=True)



Unnamed: 0,1__0,1__1,base0_
,<int64>,<int64>,<bool>
0.0,1,1,True


In [11]:
tibble(x, x, _name_repair="minimal") # duplicated columns allowed

Unnamed: 0,1,1.1
,<int64>,<int64>
0.0,1,1


In [12]:
a = 1
tibble(a * 1, a * 2, _name_repair="universal")



Unnamed: 0,_1,_2
,<int64>,<int64>
0.0,1,2


In [13]:
from typing import Iterable
# use annotation to tell it's all names
# not only a single name
def make_unique(names: Iterable[str]): 
    new_names = []
    for name in names:
        name_count = new_names.count(name)
        if name_count == 0:
            new_names.append(name)
        else:
            new_names.append(f'{name}_{name_count}')
    return new_names

tibble(a, a, _name_repair=make_unique)

Unnamed: 0,1,1_1
,<int64>,<int64>
0.0,1,1


In [14]:
# if not annotation specified
# assuming a single name
def fix_names(name):
    import re
    return re.sub(r'\s+', '_', name)


tibble(a + 1, a + 2, _name_repair = fix_names)

Unnamed: 0,2,3
,<int64>,<int64>
0.0,2,3


In [15]:
tibble(x, x, _name_repair=["a", "b"])

Unnamed: 0,a,b
,<int64>,<int64>
0.0,1,1


In [16]:
tibble(
  tibble(
    b = [4,5,6],
    c = [7,8,9]
  ),
  a = range(3),
  d = f.b
)

Unnamed: 0,b,c,a,d
,<int64>,<int64>,<int64>,<int64>
0.0,4,7,0,4
1.0,5,8,1,5
2.0,6,9,2,6


In [17]:
s = tibble(diag(1, 4))
t = tibble(s.iloc[:, :2], _name_repair=['x', 'y'])
tibble(
  a=range(4),
  b=s,
  c=t
)

Unnamed: 0,a,b$0,b$1,b$2,b$3,c$x$0,c$x$1
,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>
0.0,0,1,0,0,0,1,0
1.0,1,0,1,0,0,0,1
2.0,2,0,0,1,0,0,0
3.0,3,0,0,0,1,0,0


In [18]:
with try_catch():
    tibble(a=range(3), b=range(4))

[ValueError] `b` must be size [1 3], not 4.


In [19]:
tibble(_dotted = 3, _name_repair=lambda x: x.replace('_', '.'))

Unnamed: 0,.dotted
,<int64>
0.0,3


In [20]:
x = 3
tibble(x=1, y=f.x)

Unnamed: 0,x,y
,<int64>,<int64>
0.0,1,1


In [21]:
tibble(x=1, y=x)

Unnamed: 0,x,y
,<int64>,<int64>
0.0,1,3


In [22]:
tribble(
  f.colA, f.colB,
  "a",    1,
  "b",    2,
  "c",    3
)

Unnamed: 0,colA,colB
,<object>,<int64>
0.0,a,1
1.0,b,2
2.0,c,3


In [23]:
tribble(
  f.x,  f.y,
  "a",  [1,2,3],
  "b",  [4,5,6]
)

Unnamed: 0,x,y
,<object>,<object>
0.0,a,"[1, 2, 3]"
1.0,b,"[4, 5, 6]"


In [24]:
tibble_row(a=1, b=[[2,3]])

Unnamed: 0,a,b
,<int64>,<object>
0.0,1,"[2, 3]"
