In [1]:
# https://tidyr.tidyverse.org/reference/pivot_wider.html

from datar.datasets import fish_encounters, us_rent_income, warpbreaks 
from datar.all import *

%run nb_helpers.py
nb_header(pivot_wider)

### # pivot_wider  

##### "widens" data, increasing the number of columns and decreasing
the number of rows.  

##### Args:
&emsp;&emsp;`_data`: A data frame to pivot.  
&emsp;&emsp;`id_cols`: A set of columns that uniquely identifies each observation.  
&emsp;&emsp;&emsp;&emsp;Defaults to all columns in data except for the columns specified  
&emsp;&emsp;&emsp;&emsp;in names_from and values_from.  

&emsp;&emsp;`names_from`: and  
&emsp;&emsp;`values_from`: A pair of arguments describing which column  
&emsp;&emsp;&emsp;&emsp;(or columns) to get the name of the output column (names_from),  
&emsp;&emsp;&emsp;&emsp;and which column (or columns) to get the cell values from  
&emsp;&emsp;&emsp;&emsp;(values_from).  

&emsp;&emsp;`names_prefix`: String added to the start of every variable name.  
&emsp;&emsp;`names_sep`: If names_from or values_from contains multiple variables,  
&emsp;&emsp;&emsp;&emsp;this will be used to join their values together into a single  
&emsp;&emsp;&emsp;&emsp;string to use as a column name.  

&emsp;&emsp;`names_glue`: Instead of names_sep and names_prefix, you can supply  
&emsp;&emsp;&emsp;&emsp;a glue specification that uses the names_from columns  
&emsp;&emsp;&emsp;&emsp;(and special _value) to create custom column names.  

&emsp;&emsp;`names_sort`: Should the column names be sorted? If FALSE, the default,  
&emsp;&emsp;&emsp;&emsp;column names are ordered by first appearance.  

&emsp;&emsp;`names_repair`: todo  
&emsp;&emsp;`values_fill`: Optionally, a (scalar) value that specifies what  
&emsp;&emsp;&emsp;&emsp;each value should be filled in with when missing.  

&emsp;&emsp;`values_fn`: Optionally, a function applied to the value in each cell  
&emsp;&emsp;&emsp;&emsp;in the output. You will typically use this when the combination  
&emsp;&emsp;&emsp;&emsp;of `id_cols` and value column does not uniquely identify  
&emsp;&emsp;&emsp;&emsp;an observation.  
&emsp;&emsp;&emsp;&emsp;This can be a dict you want to apply different aggregations to  
&emsp;&emsp;&emsp;&emsp;different value columns.  
&emsp;&emsp;&emsp;&emsp;If not specified, will be `numpy.mean`  

&emsp;&emsp;`base0_`: Whether `id_cols`, `names_from` and `values_from`  
&emsp;&emsp;&emsp;&emsp;are 0-based if given by indexes.  
&emsp;&emsp;&emsp;&emsp;If not provided, will use `datar.base.get_option('index.base.0')`  

##### Returns:
&emsp;&emsp;The pivoted dataframe.  


In [2]:
fish_encounters

Unnamed: 0,fish,station,seen
0,4842,Release,1
1,4842,I80_1,1
2,4842,Lisbon,1
3,4842,Rstr,1
4,4842,Base_TD,1
...,...,...,...
109,4864,Release,1
110,4864,I80_1,1
111,4865,Release,1
112,4865,I80_1,1


In [3]:
fish_encounters >> \
  pivot_wider(names_from=f.station, values_from=f.seen)

Unnamed: 0,fish,BCE,BCE2,BCW,BCW2,Base_TD,I80_1,Lisbon,MAE,MAW,Release,Rstr
0,4842,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,4843,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,4844,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,4845,,,,,1.0,1.0,1.0,,,1.0,1.0
4,4847,,,,,,1.0,1.0,,,1.0,
5,4848,,,,,,1.0,1.0,,,1.0,1.0
6,4849,,,,,,1.0,,,,1.0,
7,4850,1.0,,1.0,,1.0,1.0,,,,1.0,1.0
8,4851,,,,,,1.0,,,,1.0,
9,4854,,,,,,1.0,,,,1.0,


In [4]:
fish_encounters >> \
  pivot_wider(names_from=f.station, values_from=f.seen, values_fill=0)

Unnamed: 0,fish,BCE,BCE2,BCW,BCW2,Base_TD,I80_1,Lisbon,MAE,MAW,Release,Rstr
0,4842,1,1,1,1,1,1,1,1,1,1,1
1,4843,1,1,1,1,1,1,1,1,1,1,1
2,4844,1,1,1,1,1,1,1,1,1,1,1
3,4845,0,0,0,0,1,1,1,0,0,1,1
4,4847,0,0,0,0,0,1,1,0,0,1,0
5,4848,0,0,0,0,0,1,1,0,0,1,1
6,4849,0,0,0,0,0,1,0,0,0,1,0
7,4850,1,0,1,0,1,1,0,0,0,1,1
8,4851,0,0,0,0,0,1,0,0,0,1,0
9,4854,0,0,0,0,0,1,0,0,0,1,0


In [5]:
us_rent_income

Unnamed: 0,GEOID,NAME,variable,estimate,moe
0,1,Alabama,income,24476.0,136.0
1,1,Alabama,rent,747.0,3.0
2,2,Alaska,income,32940.0,508.0
3,2,Alaska,rent,1200.0,13.0
4,4,Arizona,income,27517.0,148.0
...,...,...,...,...,...
99,55,Wisconsin,rent,813.0,3.0
100,56,Wyoming,income,30854.0,342.0
101,56,Wyoming,rent,828.0,11.0
102,72,Puerto Rico,income,,


In [6]:
us_rent_income >> \
  pivot_wider(names_from=f.variable, values_from=c(f.estimate, f.moe))

Unnamed: 0,GEOID,NAME,estimate_income,estimate_rent,moe_income,moe_rent
0,1,Alabama,24476.0,747.0,136.0,3.0
1,2,Alaska,32940.0,1200.0,508.0,13.0
2,4,Arizona,27517.0,972.0,148.0,4.0
3,5,Arkansas,23789.0,709.0,165.0,5.0
4,6,California,29454.0,1358.0,109.0,3.0
5,8,Colorado,32401.0,1125.0,109.0,5.0
6,9,Connecticut,35326.0,1123.0,195.0,5.0
7,10,Delaware,31560.0,1076.0,247.0,10.0
8,11,District of Columbia,43198.0,1424.0,681.0,17.0
9,12,Florida,25952.0,1077.0,70.0,3.0


In [7]:
us_rent_income >> \
  pivot_wider(
    names_from=f.variable,
    names_sep=".",
    values_from=c(f.estimate, f.moe)
  )

Unnamed: 0,GEOID,NAME,estimate.income,estimate.rent,moe.income,moe.rent
0,1,Alabama,24476.0,747.0,136.0,3.0
1,2,Alaska,32940.0,1200.0,508.0,13.0
2,4,Arizona,27517.0,972.0,148.0,4.0
3,5,Arkansas,23789.0,709.0,165.0,5.0
4,6,California,29454.0,1358.0,109.0,3.0
5,8,Colorado,32401.0,1125.0,109.0,5.0
6,9,Connecticut,35326.0,1123.0,195.0,5.0
7,10,Delaware,31560.0,1076.0,247.0,10.0
8,11,District of Columbia,43198.0,1424.0,681.0,17.0
9,12,Florida,25952.0,1077.0,70.0,3.0


In [8]:
us_rent_income >> \
  pivot_wider(
    names_from=f.variable,
    names_glue="{variable}_{_value}",
    values_from=c(f.estimate, f.moe)
  )

Unnamed: 0,GEOID,NAME,income_estimate,rent_estimate,income_moe,rent_moe
0,1,Alabama,24476.0,747.0,136.0,3.0
1,2,Alaska,32940.0,1200.0,508.0,13.0
2,4,Arizona,27517.0,972.0,148.0,4.0
3,5,Arkansas,23789.0,709.0,165.0,5.0
4,6,California,29454.0,1358.0,109.0,3.0
5,8,Colorado,32401.0,1125.0,109.0,5.0
6,9,Connecticut,35326.0,1123.0,195.0,5.0
7,10,Delaware,31560.0,1076.0,247.0,10.0
8,11,District of Columbia,43198.0,1424.0,681.0,17.0
9,12,Florida,25952.0,1077.0,70.0,3.0


In [9]:
warpbreaks

Unnamed: 0,breaks,wool,tension
0,26,A,L
1,30,A,L
2,54,A,L
3,25,A,L
4,70,A,L
5,52,A,L
6,51,A,L
7,26,A,L
8,67,A,L
9,18,A,M


In [10]:
warpbreaks >> \
  pivot_wider(
    names_from=f.wool,
    values_from=f.breaks,
    values_fn = mean
  )

Unnamed: 0,tension,A,B
0,H,24.555556,18.777778
1,L,44.555556,28.222222
2,M,24.0,28.777778
