In [1]:
from datar.datasets import iris, fish_encounters, mtcars
from datar.all import *

%run nb_helpers.py
nb_header(nest, unnest)



### # nest  

##### Nesting creates a list-column of data frames

##### Args:
&emsp;&emsp;`_data`: A data frame  
&emsp;&emsp;`**cols`: Columns to nest  
&emsp;&emsp;`_names_sep`: If `None`, the default, the names will be left as is.  
&emsp;&emsp;&emsp;&emsp;Inner names will come from the former outer names  
&emsp;&emsp;&emsp;&emsp;If a string, the inner and outer names will be used together.  
&emsp;&emsp;&emsp;&emsp;The names of the new outer columns will be formed by pasting  
&emsp;&emsp;&emsp;&emsp;together the outer and the inner column names, separated by  
&emsp;&emsp;&emsp;&emsp;`_names_sep`.  

##### Returns:
&emsp;&emsp;Nested data frame.  


### # unnest  

##### Flattens list-column of data frames back out into regular columns.

##### Args:
&emsp;&emsp;`data`: A data frame to flatten.  
&emsp;&emsp;`*cols`: Columns to unnest.  
&emsp;&emsp;`keep_empty`: By default, you get one row of output for each element  
&emsp;&emsp;&emsp;&emsp;of the list your unchopping/unnesting.  
&emsp;&emsp;&emsp;&emsp;This means that if there's a size-0 element  
&emsp;&emsp;&emsp;&emsp;(like NULL or an empty data frame), that entire row will be  
&emsp;&emsp;&emsp;&emsp;dropped from the output.  
&emsp;&emsp;&emsp;&emsp;If you want to preserve all rows, use `keep_empty` = `True` to  
&emsp;&emsp;&emsp;&emsp;replace size-0 elements with a single row of missing values.  

&emsp;&emsp;`dtypes`: Providing the dtypes for the output columns.  
&emsp;&emsp;&emsp;&emsp;Could be a single dtype, which will be applied to all columns, or  
&emsp;&emsp;&emsp;&emsp;a dictionary of dtypes with keys for the columns and values the  
&emsp;&emsp;&emsp;&emsp;dtypes.  

&emsp;&emsp;`names_sep`: If `None`, the default, the names will be left as is.  
&emsp;&emsp;&emsp;&emsp;Inner names will come from the former outer names  
&emsp;&emsp;&emsp;&emsp;If a string, the inner and outer names will be used together.  
&emsp;&emsp;&emsp;&emsp;The names of the new outer columns will be formed by pasting  
&emsp;&emsp;&emsp;&emsp;together the outer and the inner column names, separated by  
&emsp;&emsp;&emsp;&emsp;`names_sep`.  

&emsp;&emsp;`names_repair`: treatment of problematic column names:  
&emsp;&emsp;&emsp;&emsp;- "minimal": No name repair or checks, beyond basic existence,

&emsp;&emsp;&emsp;&emsp;- "unique": Make sure names are unique and not empty,

&emsp;&emsp;&emsp;&emsp;- "check_unique": (default value), no name repair,
&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;but check they are unique,  

&emsp;&emsp;&emsp;&emsp;- "universal": Make the names unique and syntactic

&emsp;&emsp;&emsp;&emsp;- a function: apply custom name repair

##### Returns:
&emsp;&emsp;Data frame with selected columns unnested.  


In [2]:
df = tibble(x = c(1, 1, 1, 2, 2, 3), y = f[1:7], z = f[7:1])
df >> nest(data=c(f.y, f.z))

Unnamed: 0,x,data
,<int64>,<object>
0.0,1,<DF 3x2>
1.0,2,<DF 2x2>
2.0,3,<DF 1x2>


In [3]:
df >> chop(c(f.y, f.z))

Unnamed: 0,x,y,z
,<int64>,<object>,<object>
0.0,1,"[1, 2, 3]","[7, 6, 5]"
1.0,2,"[4, 5]","[4, 3]"
2.0,3,[6],[2]


In [4]:
df >> nest(data=any_of(c(f.y, f.z)))

Unnamed: 0,x,data
,<int64>,<object>
0.0,1,<DF 3x2>
1.0,2,<DF 2x2>
2.0,3,<DF 1x2>


In [5]:
iris >> nest(data=~f.Species)
_.data[0]

Unnamed: 0,Species,data
,<object>,<object>
0.0,setosa,<DF 50x4>
1.0,versicolor,<DF 50x4>
2.0,virginica,<DF 50x4>


Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width
,<float64>,<float64>,<float64>,<float64>
0.0,5.1,3.5,1.4,0.2
1.0,4.9,3.0,1.4,0.2
2.0,4.7,3.2,1.3,0.2
3.0,4.6,3.1,1.5,0.2
4.0,5.0,3.6,1.4,0.2
5.0,5.4,3.9,1.7,0.4
6.0,4.6,3.4,1.4,0.3
7.0,5.0,3.4,1.5,0.2
8.0,4.4,2.9,1.4,0.2


In [6]:
nest_vars = names(iris)[:4]
iris >> nest(data = any_of(nest_vars))

Unnamed: 0,Species,data
,<object>,<object>
0.0,setosa,<DF 50x4>
1.0,versicolor,<DF 50x4>
2.0,virginica,<DF 50x4>


In [7]:
iris >> nest(petal = starts_with("Petal"), sepal = starts_with("Sepal"))

Unnamed: 0,Species,petal,sepal
,<object>,<object>,<object>
0.0,setosa,<DF 50x2>,<DF 50x2>
1.0,versicolor,<DF 50x2>,<DF 50x2>
2.0,virginica,<DF 50x2>,<DF 50x2>


In [8]:
iris >> nest(width = contains("Width"), length = contains("Length"))

Unnamed: 0,Species,width,length
,<object>,<object>,<object>
0.0,setosa,<DF 50x2>,<DF 50x2>
1.0,versicolor,<DF 50x2>,<DF 50x2>
2.0,virginica,<DF 50x2>,<DF 50x2>


In [9]:
fish_encounters >> group_by(f.fish) >> nest()

Unnamed: 0,fish,data
,<int64>,<object>
0.0,4842,<DF 11x2>
1.0,4843,<DF 11x2>
2.0,4844,<DF 11x2>
3.0,4845,<DF 5x2>
4.0,4847,<DF 3x2>
5.0,4848,<DF 4x2>
6.0,4849,<DF 2x2>
7.0,4850,<DF 6x2>
8.0,4851,<DF 2x2>


In [15]:
from pipda import register_func
@register_func(None)
def get_models(dfs):
    # do whatever with the dfs
    
    return dfs.transform(lambda df: f"<df {df.values[0].shape[0]}x{df.values[0].shape[1]}>")

mtcars >> group_by(f.cyl) >> nest() >> mutate(
    models=get_models(f.data)
)

Unnamed: 0,cyl,data,models
,<int64>,<object>,<object>
0.0,6,<DF 7x10>,<df 7x10>
1.0,4,<DF 11x10>,<df 11x10>
2.0,8,<DF 14x10>,<df 14x10>


In [19]:
df = tibble(
  x = f[1:4],
  y = [
    NULL,
    tibble(a = 1, b = 2),
    tibble(a = f[1:4], b = f[4:1])
  ]
)
df >> unnest(f.y, dtypes=int)

Unnamed: 0,x,a,b
,<int64>,<int64>,<int64>
0.0,2,1,2
1.0,3,1,4
2.0,3,2,3
3.0,3,3,2


In [20]:
df >> unnest(f.y, keep_empty=True)

Unnamed: 0,x,a,b
,<int64>,<float64>,<float64>
0.0,1,,
1.0,2,1.0,2.0
2.0,3,1.0,4.0
3.0,3,2.0,3.0
4.0,3,3.0,2.0


In [21]:
df = tibble(
 a = [c("a", "b"), "c"],
 b = [[1,2], 3],
 c = c(11, 22)
)
df >> unnest(c(f.a, f.b))

Unnamed: 0,a,b,c
,<object>,<int64>,<int64>
0.0,a,1,11
1.0,b,2,11
2.0,c,3,22


In [22]:
df >> unnest(f.a) >> unnest(f.b)

Unnamed: 0,a,b,c
,<object>,<int64>,<int64>
0.0,a,1,11
1.0,a,2,11
2.0,b,1,11
3.0,b,2,11
4.0,c,3,22
