In [1]:
# https://dplyr.tidyverse.org/reference/summarise.html
%run nb_helpers.py

from datar.data import starwars
from datar.all import *

nb_header(reframe)

### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ reframe</div>

##### Reframe a data frame.

See original API  
https://dplyr.tidyverse.org/reference/reframe.html  

##### Args:
&emsp;&emsp;`_data`: A data frame  
&emsp;&emsp;`*args`: and  
&emsp;&emsp;`**kwargs`: Name-value pairs, where value is the reframed  
&emsp;&emsp;&emsp;&emsp;data for each group  

##### Returns:
&emsp;&emsp;A data frame with the reframed columns  


In [2]:
table = c("a", "b", "d", "f")

df = tibble(
  g = c(1, 1, 1, 2, 2, 2, 2),
  x = c("e", "a", "b", "c", "f", "d", "a")
)

# `reframe()` allows you to apply functions that return
# an arbitrary number of rows
df >> reframe(x = intersect(f.x, table))

Unnamed: 0,x
,<object>
0.0,a
1.0,b
2.0,f
3.0,d


In [3]:
df >> group_by(f.g)

Unnamed: 0,g,x
,<int64>,<object>
0.0,1,e
1.0,1,a
2.0,1,b
3.0,2,c
4.0,2,f
5.0,2,d
6.0,2,a


In [4]:
# The output is always ungrouped, even when using `group_by()`
(
  df
    >> group_by(f.g)
    >> reframe(x = intersect(f.x, table))
)

Unnamed: 0,g,x
,<Int64>,<string>
0.0,1,a
1.0,1,b
2.0,2,f
3.0,2,d
4.0,2,a


In [12]:
from pandas.core.groupby import SeriesGroupBy
from pipda import register_func

@register_func
def quantile_df(x, probs=[0.25, 0.5, 0.75]):
    return tibble(
        val = quantile(x, probs, na_rm=True),
        quant = [probs] if isinstance(x, SeriesGroupBy) else probs
    )


x = [10, 15, 18, 12]
quantile_df(x)

Unnamed: 0,val,quant
,<float64>,<float64>
0.0,11.50,0.25
1.0,13.50,0.50
2.0,15.75,0.75


In [13]:
starwars >> reframe(quantile_df(f.height))

Unnamed: 0,val,quant
,<float64>,<float64>
0.0,167.0,0.25
1.0,180.0,0.50
2.0,191.0,0.75


In [14]:
(
    starwars
    >> group_by(f.homeworld)
    >> reframe(quantile_df(f.height))
)

Unnamed: 0,homeworld,val,quant
,<string>,<Float64>,<Float64>
0,Tatooine,165.5,0.25
1,Tatooine,175.0,0.5
2,Tatooine,183.0,0.75
3,Naboo,165.0,0.25
...,...,...,...
4,Naboo,183.0,0.5
142,Umbara,178.0,0.5
143,Umbara,178.0,0.75
144,Utapau,206.0,0.25


In [None]:
(
    starwars
    >> group_by(f.homeworld)
    >> reframe(
        across(c(f.height, f.mass), quantile_df)
    )
)

  return _nanquantile_unchecked(


Unnamed: 0,homeworld,height$val,height$quant,mass$val,mass$quant
,<string>,<Float64>,<Float64>,<Float64>,<Float64>
0,Tatooine,165.5,0.25,75.0,0.25
1,Tatooine,175.0,0.5,80.5,0.5
2,Tatooine,183.0,0.75,93.0,0.75
3,Naboo,165.0,0.25,50.25,0.25
...,...,...,...,...,...
4,Naboo,183.0,0.5,70.5,0.5
142,Umbara,178.0,0.5,48.0,0.5
143,Umbara,178.0,0.75,48.0,0.75
144,Utapau,206.0,0.25,80.0,0.25
