In [1]:
# https://dplyr.tidyverse.org/reference/case_when.html
from datar.datasets import starwars 
from datar.all import *

%run nb_helpers.py
nb_header(case_when)



### # case_when  

##### Vectorise multiple `if_else()` statements.

##### Args:
&emsp;&emsp;`*when_cases`: A even-size sequence, with 2n-th element values to match,  
&emsp;&emsp;&emsp;&emsp;and 2(n+1)-th element the values to replace.  
&emsp;&emsp;&emsp;&emsp;When matching value is True, then next value will be default to  
&emsp;&emsp;&emsp;&emsp;replace  

##### Returns:
&emsp;&emsp;A series with values replaced  


In [2]:
df = tibble(x=range(1,51))
df >> mutate(y=case_when(
    f.x % 35 == 0, "fizz buzz",
    f.x % 5 == 0,  "fizz",
    f.x % 7 == 0,  "buzz",
    True,          as_character(f.x)
)) >> pull(f.y)

0             1
1             2
2             3
3             4
4          fizz
5             6
6          buzz
7             8
8             9
9          fizz
10           11
11           12
12           13
13         buzz
14         fizz
15           16
16           17
17           18
18           19
19         fizz
20         buzz
21           22
22           23
23           24
24         fizz
25           26
26           27
27         buzz
28           29
29         fizz
30           31
31           32
32           33
33           34
34    fizz buzz
35           36
36           37
37           38
38           39
39         fizz
40           41
41         buzz
42           43
43           44
44         fizz
45           46
46           47
47           48
48         buzz
49         fizz
Name: y, dtype: object

In [3]:
df >> mutate(y=case_when(
    True,          as_character(f.x),
    f.x % 5 == 0,  "fizz",
    f.x % 7 == 0,  "buzz",
    f.x % 35 == 0, "fizz buzz"
)) >> pull(f.y)

0      1
1      2
2      3
3      4
4      5
5      6
6      7
7      8
8      9
9     10
10    11
11    12
12    13
13    14
14    15
15    16
16    17
17    18
18    19
19    20
20    21
21    22
22    23
23    24
24    25
25    26
26    27
27    28
28    29
29    30
30    31
31    32
32    33
33    34
34    35
35    36
36    37
37    38
38    39
39    40
40    41
41    42
42    43
43    44
44    45
45    46
46    47
47    48
48    49
49    50
Name: y, dtype: object

In [4]:
df >> mutate(y=case_when(
    f.x % 5 == 0,  "fizz",
    f.x % 7 == 0,  "buzz",
    f.x % 35 == 0, "fizz buzz"
)) >> pull(f.y)

0      NaN
1      NaN
2      NaN
3      NaN
4     fizz
5      NaN
6     buzz
7      NaN
8      NaN
9     fizz
10     NaN
11     NaN
12     NaN
13    buzz
14    fizz
15     NaN
16     NaN
17     NaN
18     NaN
19    fizz
20    buzz
21     NaN
22     NaN
23     NaN
24    fizz
25     NaN
26     NaN
27    buzz
28     NaN
29    fizz
30     NaN
31     NaN
32     NaN
33     NaN
34    fizz
35     NaN
36     NaN
37     NaN
38     NaN
39    fizz
40     NaN
41    buzz
42     NaN
43     NaN
44    fizz
45     NaN
46     NaN
47     NaN
48    buzz
49    fizz
Name: y, dtype: object

In [5]:
df.iloc[[1,2,3], 0] = NA

df >> mutate(y=case_when(
    f.x % 35 == 0, "fizz buzz",
    f.x % 5 == 0,  "fizz",
    f.x % 7 == 0,  "buzz",
    is_na(f.x),    "nope",
    True,          as_character(f.x)
)) >> pull(f.y)

0           1.0
1          nope
2          nope
3          nope
4          fizz
5           6.0
6          buzz
7           8.0
8           9.0
9          fizz
10         11.0
11         12.0
12         13.0
13         buzz
14         fizz
15         16.0
16         17.0
17         18.0
18         19.0
19         fizz
20         buzz
21         22.0
22         23.0
23         24.0
24         fizz
25         26.0
26         27.0
27         buzz
28         29.0
29         fizz
30         31.0
31         32.0
32         33.0
33         34.0
34    fizz buzz
35         36.0
36         37.0
37         38.0
38         39.0
39         fizz
40         41.0
41         buzz
42         43.0
43         44.0
44         fizz
45         46.0
46         47.0
47         48.0
48         buzz
49         fizz
Name: y, dtype: object

In [6]:
df >> mutate(y=case_when(
    f.x % 35 == 0, NA,
    f.x % 5 == 0,  "fizz",
    f.x % 7 == 0,  "buzz",
    True,          as_character(f.x)
)) >> pull(f.y)

0      1.0
1      NaN
2      NaN
3      NaN
4     fizz
5      6.0
6     buzz
7      8.0
8      9.0
9     fizz
10    11.0
11    12.0
12    13.0
13    buzz
14    fizz
15    16.0
16    17.0
17    18.0
18    19.0
19    fizz
20    buzz
21    22.0
22    23.0
23    24.0
24    fizz
25    26.0
26    27.0
27    buzz
28    29.0
29    fizz
30    31.0
31    32.0
32    33.0
33    34.0
34     NaN
35    36.0
36    37.0
37    38.0
38    39.0
39    fizz
40    41.0
41    buzz
42    43.0
43    44.0
44    fizz
45    46.0
46    47.0
47    48.0
48    buzz
49    fizz
Name: y, dtype: object

In [7]:
df >> mutate(y=case_when(
    f.x % 35 == 0, 35,
    f.x % 5 == 0,  5,
    f.x % 7 == 0,  7,
    True,          NA)
) >> pull(f.y)

0      NaN
1      NaN
2      NaN
3      NaN
4      5.0
5      NaN
6      7.0
7      NaN
8      NaN
9      5.0
10     NaN
11     NaN
12     NaN
13     7.0
14     5.0
15     NaN
16     NaN
17     NaN
18     NaN
19     5.0
20     7.0
21     NaN
22     NaN
23     NaN
24     5.0
25     NaN
26     NaN
27     7.0
28     NaN
29     5.0
30     NaN
31     NaN
32     NaN
33     NaN
34    35.0
35     NaN
36     NaN
37     NaN
38     NaN
39     5.0
40     NaN
41     7.0
42     NaN
43     NaN
44     5.0
45     NaN
46     NaN
47     NaN
48     7.0
49     5.0
Name: y, dtype: float64

In [8]:
df = tibble(x=seq(-2, 2.1, by=.5))
df >> mutate(y=case_when(
    f.x >= 0, sqrt(f.x),
    True,     f.x
)) >> pull(f.y)

  result = getattr(ufunc, method)(*inputs, **kwargs)


0   -2.000000
1   -1.500000
2   -1.000000
3   -0.500000
4    0.000000
5    0.707107
6    1.000000
7    1.224745
8    1.414214
Name: y, dtype: float64

In [9]:
starwars >> \
  select(f[f.name:f.mass], f.gender, f.species) >> \
  mutate(
    type = case_when(
      f.height > 200 or f.mass > 200, "large",
      f.species == "Droid"          , "robot",
      True                          , "other"
    )
  )

Unnamed: 0,name,height,gender,species,type
,<object>,<float64>,<object>,<object>,<object>
0,Luke Skywalker,172.0,masculine,Human,other
1,C-3PO,167.0,masculine,Droid,robot
2,R2-D2,96.0,masculine,Droid,robot
3,Darth Vader,202.0,masculine,Human,large
...,...,...,...,...,...
4,Leia Organa,150.0,feminine,Human,other
82,Rey,,feminine,Human,other
83,Poe Dameron,,masculine,Human,other
84,BB8,,masculine,Droid,robot


In [10]:
starwars >> \
  mutate(type=case_when(
    f.height > 200 or f.mass > 200, "large",
    f.species == "Droid",           "robot",
    True,                           "other"
  )) >> \
  pull(f.type)

0     other
1     robot
2     robot
3     large
4     other
      ...  
82    other
83    other
84    robot
85    other
86    other
Name: type, Length: 87, dtype: object