In [1]:
# https://tidyr.tidyverse.org/reference/pivot_wider.html

from datar.datasets import fish_encounters, us_rent_income, warpbreaks 
from datar.all import *

%run nb_helpers.py
nb_header(pivot_wider)



### # pivot_wider  

 "widens" data, increasing the number of columns and decreasing
    the number of rows.

    Args:
        _data: A data frame to pivot.
        id_cols: A set of columns that uniquely identifies each observation.
            Defaults to all columns in data except for the columns specified
            in names_from and values_from.
        names_from: and
        values_from: A pair of arguments describing which column
            (or columns) to get the name of the output column (names_from),
            and which column (or columns) to get the cell values from
            (values_from).
        names_prefix: String added to the start of every variable name.
        names_sep: If names_from or values_from contains multiple variables,
            this will be used to join their values together into a single
            string to use as a column name.
        names_glue: Instead of names_sep and names_prefix, you can supply
            a glue specification that uses the names_from columns
            (and special _value) to create custom column names.
        names_sort: Should the column names be sorted? If FALSE, the default,
            column names are ordered by first appearance.
        names_repair: todo
        values_fill: Optionally, a (scalar) value that specifies what
            each value should be filled in with when missing.
        values_fn: Optionally, a function applied to the value in each cell
            in the output. You will typically use this when the combination
            of `id_cols` and value column does not uniquely identify
            an observation.
            This can be a dict you want to apply different aggregations to
            different value columns.
            If not specified, will be `numpy.mean`

    Returns:
        The pivoted dataframe.
    

In [2]:
fish_encounters

Unnamed: 0,fish,station,seen
,<int64>,<object>,<int64>
0,4842,Release,1
1,4842,I80_1,1
2,4842,Lisbon,1
3,4842,Rstr,1
...,...,...,...
4,4842,Base_TD,1
109,4864,Release,1
110,4864,I80_1,1
111,4865,Release,1


In [3]:
fish_encounters >> \
  pivot_wider(names_from=f.station, values_from=f.seen)

Unnamed: 0,fish,BCE,BCE2,BCW,BCW2,Base_TD,I80_1,Lisbon,MAE,MAW,Release,Rstr
,<int64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>
0.0,4842,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1.0,4843,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2.0,4844,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3.0,4845,,,,,1.0,1.0,1.0,,,1.0,1.0
4.0,4847,,,,,,1.0,1.0,,,1.0,
5.0,4848,,,,,,1.0,1.0,,,1.0,1.0
6.0,4849,,,,,,1.0,,,,1.0,
7.0,4850,1.0,,1.0,,1.0,1.0,,,,1.0,1.0
8.0,4851,,,,,,1.0,,,,1.0,


In [4]:
fish_encounters >> \
  pivot_wider(names_from=f.station, values_from=f.seen, values_fill=0)

Unnamed: 0,fish,BCE,BCE2,BCW,BCW2,Base_TD,I80_1,Lisbon,MAE,MAW,Release,Rstr
,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>
0.0,4842,1,1,1,1,1,1,1,1,1,1,1
1.0,4843,1,1,1,1,1,1,1,1,1,1,1
2.0,4844,1,1,1,1,1,1,1,1,1,1,1
3.0,4845,0,0,0,0,1,1,1,0,0,1,1
4.0,4847,0,0,0,0,0,1,1,0,0,1,0
5.0,4848,0,0,0,0,0,1,1,0,0,1,1
6.0,4849,0,0,0,0,0,1,0,0,0,1,0
7.0,4850,1,0,1,0,1,1,0,0,0,1,1
8.0,4851,0,0,0,0,0,1,0,0,0,1,0


In [5]:
us_rent_income

Unnamed: 0,GEOID,NAME,variable,estimate,moe
,<int64>,<object>,<object>,<float64>,<float64>
0,1,Alabama,income,24476.0,136.0
1,1,Alabama,rent,747.0,3.0
2,2,Alaska,income,32940.0,508.0
3,2,Alaska,rent,1200.0,13.0
...,...,...,...,...,...
4,4,Arizona,income,27517.0,148.0
99,55,Wisconsin,rent,813.0,3.0
100,56,Wyoming,income,30854.0,342.0
101,56,Wyoming,rent,828.0,11.0


In [6]:
us_rent_income >> \
  pivot_wider(names_from=f.variable, values_from=c(f.estimate, f.moe))

Unnamed: 0,GEOID,NAME,estimate_income,estimate_rent,moe_income,moe_rent
,<int64>,<object>,<float64>,<float64>,<float64>,<float64>
0.0,1,Alabama,24476.0,747.0,136.0,3.0
1.0,2,Alaska,32940.0,1200.0,508.0,13.0
2.0,4,Arizona,27517.0,972.0,148.0,4.0
3.0,5,Arkansas,23789.0,709.0,165.0,5.0
4.0,6,California,29454.0,1358.0,109.0,3.0
5.0,8,Colorado,32401.0,1125.0,109.0,5.0
6.0,9,Connecticut,35326.0,1123.0,195.0,5.0
7.0,10,Delaware,31560.0,1076.0,247.0,10.0
8.0,11,District of Columbia,43198.0,1424.0,681.0,17.0


In [7]:
us_rent_income >> \
  pivot_wider(
    names_from=f.variable,
    names_sep=".",
    values_from=c(f.estimate, f.moe)
  )

Unnamed: 0,GEOID,NAME,estimate.income,estimate.rent,moe.income,moe.rent
,<int64>,<object>,<float64>,<float64>,<float64>,<float64>
0.0,1,Alabama,24476.0,747.0,136.0,3.0
1.0,2,Alaska,32940.0,1200.0,508.0,13.0
2.0,4,Arizona,27517.0,972.0,148.0,4.0
3.0,5,Arkansas,23789.0,709.0,165.0,5.0
4.0,6,California,29454.0,1358.0,109.0,3.0
5.0,8,Colorado,32401.0,1125.0,109.0,5.0
6.0,9,Connecticut,35326.0,1123.0,195.0,5.0
7.0,10,Delaware,31560.0,1076.0,247.0,10.0
8.0,11,District of Columbia,43198.0,1424.0,681.0,17.0


In [8]:
us_rent_income >> \
  pivot_wider(
    names_from=f.variable,
    names_glue="{variable}_{_value}",
    values_from=c(f.estimate, f.moe)
  )

Unnamed: 0,GEOID,NAME,income_estimate,rent_estimate,income_moe,rent_moe
,<int64>,<object>,<float64>,<float64>,<float64>,<float64>
0.0,1,Alabama,24476.0,747.0,136.0,3.0
1.0,2,Alaska,32940.0,1200.0,508.0,13.0
2.0,4,Arizona,27517.0,972.0,148.0,4.0
3.0,5,Arkansas,23789.0,709.0,165.0,5.0
4.0,6,California,29454.0,1358.0,109.0,3.0
5.0,8,Colorado,32401.0,1125.0,109.0,5.0
6.0,9,Connecticut,35326.0,1123.0,195.0,5.0
7.0,10,Delaware,31560.0,1076.0,247.0,10.0
8.0,11,District of Columbia,43198.0,1424.0,681.0,17.0


In [9]:
warpbreaks

Unnamed: 0,breaks,wool,tension
,<int64>,<object>,<object>
0.0,26,A,L
1.0,30,A,L
2.0,54,A,L
3.0,25,A,L
4.0,70,A,L
5.0,52,A,L
6.0,51,A,L
7.0,26,A,L
8.0,67,A,L


In [10]:
warpbreaks >> \
  pivot_wider(
    names_from=f.wool,
    values_from=f.breaks,
    values_fn = mean
  )

Unnamed: 0,tension,A,B
,<object>,<float64>,<float64>
0.0,H,24.555556,18.777778
1.0,L,44.555556,28.222222
2.0,M,24.000000,28.777778
