In [1]:
%run nb_helpers.py

import numpy
from datar.all import *
from datar.data import gss_cat


nb_header(
    as_factor,
    fct_count,
    fct_match,
    fct_unique,
    lvls_reorder,
    lvls_revalue,
    lvls_expand,
    lvls_union,
    book="forcat_lvl_addrm",
)


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ as_factor</div>

##### Convert a vector to a factor vector

##### Args:
&emsp;&emsp;`x`: A numeric vector  

##### Returns:
&emsp;&emsp;The factor vector  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ fct_count</div>

##### Count entries in a factor

##### Args:
&emsp;&emsp;`_f`: A factor  
&emsp;&emsp;`sort`: If True, sort the result so that the most common values float to  
&emsp;&emsp;&emsp;&emsp;the top  

&emsp;&emsp;`prop`: If True, compute the fraction of marginal table.  

##### Returns:
&emsp;&emsp;A data frame with columns `f`, `n` and `p`, if prop is True  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ fct_match</div>

##### Test for presence of levels in a factor

Do any of `lvls` occur in `_f`?  

##### Args:
&emsp;&emsp;`_f`: A factor  
&emsp;&emsp;`lvls`: A vector specifying levels to look for.  

##### Returns:
&emsp;&emsp;A logical factor  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ fct_unique</div>

##### Unique values of a factor

##### Args:
&emsp;&emsp;`_f`: A factor  

##### Returns:
&emsp;&emsp;The factor with the unique values in `_f`  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ lvls_reorder</div>

##### Leaves values of a factor as they are, but changes the order by
given indices  

##### Args:
&emsp;&emsp;`f`: A factor (or character vector).  
&emsp;&emsp;`idx`: A integer index, with one integer for each existing level.  
&emsp;&emsp;`new_levels`: A character vector of new levels.  
&emsp;&emsp;`ordered`: A logical which determines the "ordered" status of the  
&emsp;&emsp;&emsp;&emsp;output factor. `None` preserves the existing status of the factor.  

##### Returns:
&emsp;&emsp;The factor with levels reordered  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ lvls_revalue</div>

##### changes the values of existing levels; there must
be one new level for each old level  

##### Args:
&emsp;&emsp;`_f`: A factor  
&emsp;&emsp;`new_levels`: A character vector of new levels.  

##### Returns:
&emsp;&emsp;The factor with the new levels  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ lvls_expand</div>

##### Expands the set of levels; the new levels must
include the old levels.  

##### Args:
&emsp;&emsp;`_f`: A factor  
&emsp;&emsp;`new_levels`: The new levels. Must include the old ones  

##### Returns:
&emsp;&emsp;The factor with the new levels  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ lvls_union</div>

##### Find all levels in a list of factors

##### Args:
&emsp;&emsp;`fs`: A list of factors  

##### Returns:
&emsp;&emsp;A list of all levels  


## as_factor

In [2]:
x = c("a", "z", "g")
as_factor(x)

['a', 'z', 'g']
Categories (3, object): ['a', 'g', 'z']

In [3]:
y = c("1.1", "11", "2.2", "22")
as_factor(y)

['1.1', '11', '2.2', '22']
Categories (4, object): ['1.1', '11', '2.2', '22']

In [4]:
z = as_numeric(y)
as_factor(z)



[1.1, 11.0, 2.2, 22.0]
Categories (4, float64): [1.1, 2.2, 11.0, 22.0]

## fct_count

In [5]:
fct = factor(sample(letters)[rpois(1000, 10)])
table(fct)

Unnamed: 0,b,c,d,e,i,k,l,m,n,o,...,q,r,s,t,u,v,w,x,y,z
,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>,...,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>
count,8,88,37,1,45,67,2,14,105,4,...,5,22,1,87,49,17,134,128,72,112


In [6]:
fct_count(fct)

Unnamed: 0,f,n
,<category>,<int64>
0.0,b,8
1.0,c,88
2.0,d,37
3.0,e,1
4.0,i,45
5.0,k,67
6.0,l,2
7.0,m,14
8.0,n,105


In [7]:
fct_count(fct, sort = TRUE)

Unnamed: 0,f,n
,<category>,<int64>
17.0,w,134
18.0,x,128
20.0,z,112
8.0,n,105
1.0,c,88
14.0,t,87
19.0,y,72
5.0,k,67
15.0,u,49


In [8]:
fct_count(fct, sort = TRUE, prop = TRUE)

Unnamed: 0,f,n,p
,<category>,<int64>,<float64>
17.0,w,134,0.134
18.0,x,128,0.128
20.0,z,112,0.112
8.0,n,105,0.105
1.0,c,88,0.088
14.0,t,87,0.087
19.0,y,72,0.072
5.0,k,67,0.067
15.0,u,49,0.049


## fct_match

In [9]:
table(fct_match(gss_cat.marital, c("Married", "Divorced")))

Unnamed: 0,False,True
,<int64>,<int64>
count,7983,13500


In [10]:
table(numpy.isin(gss_cat.marital, c("Maried", "Davorced")))

Unnamed: 0,False
,<int64>
count,21483


In [11]:
with try_catch():
    table(fct_match(gss_cat.marital, c("Maried", "Davorced")))

[ValueError] Levels not present in factor: ['Maried' 'Davorced'].


## fct_unique

In [12]:
fct = factor(letters[rpois(100, 10)-1])

unique(fct)

array(['p', 'k', 'i', 'j', 'e', 'r', 'm', 'g', 'n', 'f', 'o', 'h', 'l',
       'd', 'c'], dtype=object)

In [13]:
fct_unique(fct)

['c', 'd', 'e', 'f', 'g', ..., 'm', 'n', 'o', 'p', 'r']
Length: 15
Categories (15, object): ['c', 'd', 'e', 'f', ..., 'n', 'o', 'p', 'r']

## lvls_reorder, lvls_revalue and lvls_expand

In [14]:
fct = factor(c("a", "b", "c"))
lvls_reorder(fct, [2,1,0])

['a', 'b', 'c']
Categories (3, object): ['c', 'b', 'a']

In [15]:
lvls_revalue(fct, c("apple", "banana", "carrot"))

['apple', 'banana', 'carrot']
Categories (3, object): ['apple', 'banana', 'carrot']

In [16]:
lvls_expand(fct, c("a", "b", "c", "d"))

['a', 'b', 'c']
Categories (4, object): ['a', 'b', 'c', 'd']

## lvls_union

In [17]:
fs = [factor("a"), factor("b"), factor(c("a", "b"))]
lvls_union(fs)

array(['a', 'b'], dtype=object)