In [1]:
%run nb_helpers.py
from datar.all import *

nb_header(
    fct_expand,
    fct_explicit_na,
    fct_drop,
    fct_unify,
    book="forcat_lvl_addrm",
)


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ fct_expand</div>

##### Add additional levels to a factor

##### Args:
&emsp;&emsp;`_f`: A factor  
&emsp;&emsp;`*additional_levels`: Additional levels to add to the factor.  
&emsp;&emsp;&emsp;&emsp;Levels that already exist will be silently ignored.  

##### Returns:
&emsp;&emsp;The factor with levels expanded  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ fct_explicit_na</div>

##### Make missing values explicit

This gives missing values an explicit factor level, ensuring that they  
appear in summaries and on plots.  

##### Args:
&emsp;&emsp;`_f`: A factor  
&emsp;&emsp;`na_level`: Level to use for missing values.  
&emsp;&emsp;&emsp;&emsp;This is what NAs will be changed to.  

##### Returns:
&emsp;&emsp;The factor with explict na_levels  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ fct_drop</div>

##### Drop unused levels

##### Args:
&emsp;&emsp;`_f`: A factor  
&emsp;&emsp;`only`: A character vector restricting the set of levels to be dropped.  
&emsp;&emsp;&emsp;&emsp;If supplied, only levels that have no entries and appear in  
&emsp;&emsp;&emsp;&emsp;this vector will be removed.  

##### Returns:
&emsp;&emsp;The factor with unused levels dropped  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ fct_unify</div>

##### Unify the levels in a list of factors

##### Args:
&emsp;&emsp;`fs`: A list of factors  
&emsp;&emsp;`levels`: Set of levels to apply to every factor. Default to union  
&emsp;&emsp;&emsp;&emsp;of all factor levels  

##### Returns:
&emsp;&emsp;A list of factors with the levels expanded  


## fct_expand

In [2]:
fct = factor(sample(letters[:3], 20, replace = TRUE))
fct

['c', 'a', 'a', 'c', 'b', ..., 'c', 'a', 'c', 'a', 'b']
Length: 20
Categories (3, object): ['a', 'b', 'c']

In [3]:
fct_expand(fct, "d", "e", "f")

['c', 'a', 'a', 'c', 'b', ..., 'c', 'a', 'c', 'a', 'b']
Length: 20
Categories (6, object): ['a', 'b', 'c', 'd', 'e', 'f']

In [4]:
fct_expand(fct, letters[:6])

['c', 'a', 'a', 'c', 'b', ..., 'c', 'a', 'c', 'a', 'b']
Length: 20
Categories (6, object): ['a', 'b', 'c', 'd', 'e', 'f']

## fct_explicit_na

In [5]:
f1 = factor(c("a", "a", NA, NA, "a", "b", NA, "c", "a", "c", "b"))
fct_count(f1)

Unnamed: 0,f,n
,<category>,<float64>
0.0,a,4.0
1.0,b,2.0
2.0,c,2.0
3.0,,3.0


In [6]:
f2 = fct_explicit_na(f1)
fct_count(f2)

Unnamed: 0,f,n
,<category>,<int64>
0.0,a,4
1.0,b,2
2.0,c,2
3.0,(Missing),3


## fct_drop

In [7]:
fct = factor(c("a", "b"), levels = c("a", "b", "c"))
fct

['a', 'b']
Categories (3, object): ['a', 'b', 'c']

In [8]:
fct_drop(fct)

['a', 'b']
Categories (2, object): ['a', 'b']

In [9]:
fct_drop(fct, only = "a")

['a', 'b']
Categories (3, object): ['a', 'b', 'c']

In [10]:
fct_drop(fct, only = "c")

['a', 'b']
Categories (2, object): ['a', 'b']

## fct_unify

In [11]:
fs = [factor("a"), factor("b"), factor(c("a", "b"))]
fct_unify(fs)

[['a']
 Categories (2, object): ['a', 'b'],
 ['b']
 Categories (2, object): ['a', 'b'],
 ['a', 'b']
 Categories (2, object): ['a', 'b']]