In [1]:
# https://dplyr.tidyverse.org/reference/mutate.html
from datar.datasets import starwars
from datar.all import *

%run nb_helpers.py
nb_header(mutate, transmute)



### # mutate  

##### Adds new variables and preserves existing ones

The original API:  
https://dplyr.tidyverse.org/reference/mutate.html  

##### Args:
&emsp;&emsp;`_data`: A data frame  
&emsp;&emsp;`_keep`: allows you to control which columns from _data are retained  
&emsp;&emsp;&emsp;&emsp;in the output:  

&emsp;&emsp;&emsp;&emsp;- "all", the default, retains all variables.

&emsp;&emsp;&emsp;&emsp;- "used" keeps any variables used to make new variables;
&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;it's useful for checking your work as it displays inputs and  
&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;outputs side-by-side.  

&emsp;&emsp;&emsp;&emsp;- "unused" keeps only existing variables not used to make new
&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;variables.  

&emsp;&emsp;&emsp;&emsp;- "none", only keeps grouping keys (like transmute()).

&emsp;&emsp;`_before`: and  
&emsp;&emsp;`_after`: Optionally, control where new columns should appear  
&emsp;&emsp;&emsp;&emsp;(the default is to add to the right hand side).  
&emsp;&emsp;&emsp;&emsp;See relocate() for more details.  

&emsp;&emsp;`*args`: and  
&emsp;&emsp;`**kwargs`: Name-value pairs. The name gives the name of the column  
&emsp;&emsp;&emsp;&emsp;in the output. The value can be:  

&emsp;&emsp;&emsp;&emsp;- A vector of length 1, which will be recycled to the correct
&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;length.  

&emsp;&emsp;&emsp;&emsp;- A vector the same length as the current group (or the whole
&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;data frame if ungrouped).  

&emsp;&emsp;&emsp;&emsp;- None to remove the column

##### Returns:
&emsp;&emsp;An object of the same type as _data. The output has the following  
&emsp;&emsp;properties:  
&emsp;&emsp;- Rows are not affected.
&emsp;&emsp;- Existing columns will be preserved according to the _keep
&emsp;&emsp;&emsp;&emsp;argument. New columns will be placed according to the  
&emsp;&emsp;&emsp;&emsp;_before and _after arguments. If _keep = "none"  
&emsp;&emsp;&emsp;&emsp;(as in transmute()), the output order is determined only  
&emsp;&emsp;&emsp;&emsp;by ..., not the order of existing columns.  

&emsp;&emsp;- Columns given value None will be removed
&emsp;&emsp;- Groups will be recomputed if a grouping variable is mutated.
&emsp;&emsp;- Data frame attributes are preserved.


### # transmute  

##### Mutate with _keep='none'

See Also:  
&emsp;&emsp;[`mutate()`](datar.dplyr.mutate.mutate).  


In [2]:
starwars >> \
 select(f.name, f.mass) >> \
 mutate(
  mass2 = f.mass * 2,
  mass2_squared = f.mass2 * f.mass2
)

Unnamed: 0,name,mass,mass2,mass2_squared
,<object>,<float64>,<float64>,<float64>
0,Luke Skywalker,77.0,154.0,23716.0
1,C-3PO,75.0,150.0,22500.0
2,R2-D2,32.0,64.0,4096.0
3,Darth Vader,136.0,272.0,73984.0
...,...,...,...,...
4,Leia Organa,49.0,98.0,9604.0
82,Rey,,,
83,Poe Dameron,,,
84,BB8,,,


In [3]:
starwars >> \
 select(f.name, f.height, f.mass, f.homeworld) >> \
 mutate(
  mass = NULL,
  height = f.height * 0.0328084 # convert to feet
)

Unnamed: 0,name,height,homeworld
,<object>,<float64>,<object>
0,Luke Skywalker,5.643045,Tatooine
1,C-3PO,5.479003,Tatooine
2,R2-D2,3.149606,Naboo
3,Darth Vader,6.627297,Tatooine
...,...,...,...
4,Leia Organa,4.921260,Alderaan
82,Rey,,
83,Poe Dameron,,
84,BB8,,


In [4]:
x = starwars >> \
 select(f.name, f.homeworld, f.species) >> \
 mutate(across(~f.name, as_categorical))

x.dtypes
x

name           object
homeworld    category
species      category
dtype: object

Unnamed: 0,name,homeworld,species
,<object>,<category>,<category>
0,Luke Skywalker,Tatooine,Human
1,C-3PO,Tatooine,Droid
2,R2-D2,Naboo,Droid
3,Darth Vader,Tatooine,Human
...,...,...,...
4,Leia Organa,Alderaan,Human
82,Rey,,Human
83,Poe Dameron,,Human
84,BB8,,Droid


In [5]:
starwars >> \
 select(f.name, f.mass, f.homeworld) >> \
 group_by(f.homeworld) >> \
 mutate(rank=min_rank(desc(f.mass))) 

Unnamed: 0,name,mass,homeworld,rank
,<object>,<float64>,<object>,<float64>
0,Luke Skywalker,77.0,Tatooine,5.0
1,C-3PO,75.0,Tatooine,6.0
2,R2-D2,32.0,Naboo,6.0
3,Darth Vader,136.0,Tatooine,1.0
...,...,...,...,...
4,Leia Organa,49.0,Alderaan,2.0
82,Rey,,,
83,Poe Dameron,,,
84,BB8,,,


In [6]:
df = tibble(x=1, y=2)

df >> mutate(z=f.x+f.y)

Unnamed: 0,x,y,z
,<int64>,<int64>,<int64>
0.0,1,2,3


In [7]:
df >> mutate(z=f.x+f.y, _before=1) 

Unnamed: 0,x,z,y
,<int64>,<int64>,<int64>
0.0,1,3,2


In [8]:
# use 0-based index
df >> mutate(z=f.x+f.y, _before=0, base0_=True)

Unnamed: 0,z,base0_,x,y
,<int64>,<bool>,<int64>,<int64>
0.0,3,True,1,2


In [9]:
df >> mutate(z=f.x+f.y, _after=f.x)

Unnamed: 0,x,z,y
,<int64>,<int64>,<int64>
0.0,1,3,2


In [11]:
# use a temporary column
df >> mutate(_z=f.x+f.y, z=f._z*2, _after=f.x)

Unnamed: 0,x,z,y
,<int64>,<int64>,<int64>
0.0,1,6,2


In [12]:
df = tibble(x=1, y=2, a="a", b="b")
df >> mutate(z=f.x+f.y, _keep='all')

Unnamed: 0,x,y,a,b,z
,<int64>,<int64>,<object>,<object>,<int64>
0.0,1,2,a,b,3


In [13]:
df >> mutate(z=f.x+f.y, _keep='used')

Unnamed: 0,x,y,z
,<int64>,<int64>,<int64>
0.0,1,2,3


In [14]:
df >> mutate(z=f.x+f.y, _keep='unused')

Unnamed: 0,a,b,z
,<object>,<object>,<int64>
0.0,a,b,3


In [15]:
df >> mutate(z=f.x+f.y, _keep='none')

Unnamed: 0,z
,<int64>
0.0,3


In [16]:
starwars >> \
  select(f.name, f.mass, f.species) >> \
  mutate(mass_norm=f.mass/mean(f.mass, na_rm=True)) 

Unnamed: 0,name,mass,species,mass_norm
,<object>,<float64>,<object>,<float64>
0,Luke Skywalker,77.0,Human,0.791270
1,C-3PO,75.0,Droid,0.770718
2,R2-D2,32.0,Droid,0.328840
3,Darth Vader,136.0,Human,1.397569
...,...,...,...,...
4,Leia Organa,49.0,Human,0.503536
82,Rey,,Human,
83,Poe Dameron,,Human,
84,BB8,,Droid,


In [17]:
starwars >> \
  select(f.name, f.mass, f.species) >> \
  group_by(f.species) >> \
  mutate(mass_norm=f.mass / mean(f.mass, na_rm=True)) >> \
  ungroup() 

Unnamed: 0,name,mass,species,mass_norm
,<object>,<float64>,<object>,<float64>
0,Luke Skywalker,77.0,Human,0.930156
1,C-3PO,75.0,Droid,1.075269
2,R2-D2,32.0,Droid,0.458781
3,Darth Vader,136.0,Human,1.642873
...,...,...,...,...
4,Leia Organa,49.0,Human,0.591917
82,Rey,,Human,
83,Poe Dameron,,Human,
84,BB8,,Droid,


In [18]:
vars = ["mass", "height"]
starwars >> mutate(starwars, prod=f[vars[0]] * f[vars[1]])

Unnamed: 0,name,height,mass,hair_color,skin_color,eye_color,birth_year,sex,gender,homeworld,species,prod
,<object>,<float64>,<float64>,<object>,<object>,<object>,<float64>,<object>,<object>,<object>,<object>,<float64>
0,Luke Skywalker,172.0,77.0,blond,fair,blue,19.0,male,masculine,Tatooine,Human,13244.0
1,C-3PO,167.0,75.0,,gold,yellow,112.0,none,masculine,Tatooine,Droid,12525.0
2,R2-D2,96.0,32.0,,"white, blue",red,33.0,none,masculine,Naboo,Droid,3072.0
3,Darth Vader,202.0,136.0,none,white,yellow,41.9,male,masculine,Tatooine,Human,27472.0
...,...,...,...,...,...,...,...,...,...,...,...,...
4,Leia Organa,150.0,49.0,brown,light,brown,19.0,female,feminine,Alderaan,Human,7350.0
82,Rey,,,brown,light,hazel,,female,feminine,,Human,
83,Poe Dameron,,,brown,light,brown,,male,masculine,,Human,
84,BB8,,,none,none,black,,none,masculine,,Droid,
