In [1]:
import numpy
from datar.all import *
from datar.datasets import gss_cat

%run nb_helpers.py

nb_header(
    as_factor,
    fct_count,
    fct_match,
    fct_unique,
    lvls_reorder,
    lvls_revalue,
    lvls_expand,
    lvls_union,
    book="forcat_lvl_addrm",
)




### # fct_inorder  

##### Reorder factor levels by first appearance

##### Args:
&emsp;&emsp;`_f`: A factor  
&emsp;&emsp;`ordered`: A logical which determines the "ordered" status of the  
&emsp;&emsp;&emsp;&emsp;output factor.  

##### Returns:
&emsp;&emsp;The factor with levels reordered  


### # fct_count  

##### Count entries in a factor

##### Args:
&emsp;&emsp;`_f`: A factor  
&emsp;&emsp;`sort`: If True, sort the result so that the most common values float to  
&emsp;&emsp;&emsp;&emsp;the top  

&emsp;&emsp;`prop`: If True, compute the fraction of marginal table.  

##### Returns:
&emsp;&emsp;A data frame with columns `f`, `n` and `p`, if prop is True  


### # fct_match  

##### Test for presence of levels in a factor

Do any of `lvls` occur in `_f`?  

##### Args:
&emsp;&emsp;`_f`: A factor  
&emsp;&emsp;`lvls`: A vector specifying levels to look for.  

##### Returns:
&emsp;&emsp;A logical factor  


### # fct_unique  

##### Unique values of a factor

##### Args:
&emsp;&emsp;`_f`: A factor  

##### Returns:
&emsp;&emsp;The factor with the unique values in `_f`  


### # lvls_reorder  

##### Leaves values of a factor as they are, but changes the order by
given indices  

##### Args:
&emsp;&emsp;`f`: A factor (or character vector).  
&emsp;&emsp;`idx`: A integer index, with one integer for each existing level.  
&emsp;&emsp;`new_levels`: A character vector of new levels.  
&emsp;&emsp;`ordered`: A logical which determines the "ordered" status of the  
&emsp;&emsp;&emsp;&emsp;output factor. `None` preserves the existing status of the factor.  

##### Returns:
&emsp;&emsp;The factor with levels reordered  


### # lvls_revalue  

##### changes the values of existing levels; there must
be one new level for each old level  

##### Args:
&emsp;&emsp;`_f`: A factor  
&emsp;&emsp;`new_levels`: A character vector of new levels.  

##### Returns:
&emsp;&emsp;The factor with the new levels  


### # lvls_expand  

##### Expands the set of levels; the new levels must
include the old levels.  

##### Args:
&emsp;&emsp;`_f`: A factor  
&emsp;&emsp;`new_levels`: The new levels. Must include the old ones  

##### Returns:
&emsp;&emsp;The factor with the new levels  


### # lvls_union  

##### Find all levels in a list of factors

##### Args:
&emsp;&emsp;`fs`: A list of factors  

##### Returns:
&emsp;&emsp;A list of all levels  


## as_factor

In [2]:
x = c("a", "z", "g")
as_factor(x)

['a', 'z', 'g']
Categories (3, object): ['a', 'z', 'g']

In [3]:
y = c("1.1", "11", "2.2", "22")
as_factor(y)

['1.1', '11', '2.2', '22']
Categories (4, object): ['1.1', '11', '2.2', '22']

In [4]:
z = as_numeric(y)
as_factor(z)

  output = repr(obj)


[1.1, 11.0, 2.2, 22.0]
Categories (4, float64): [1.1, 11.0, 2.2, 22.0]

## fct_count

In [5]:
fct = factor(sample(letters)[rpois(1000, 10)])
table(fct)

Unnamed: 0,a,c,d,e,f,g,h,i,j,k,...,p,q,r,s,t,u,v,w,y,z
,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>,...,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>,<int64>
count,22,26,4,5,1,71,41,91,12,78,...,1,2,119,90,8,1,3,25,125,99


In [6]:
fct_count(fct)

Unnamed: 0,f,n
,<category>,<int64>
0.0,a,22
1.0,c,26
2.0,d,4
3.0,e,5
4.0,f,1
5.0,g,71
6.0,h,41
7.0,i,91
8.0,j,12


In [7]:
fct_count(fct, sort = TRUE)

Unnamed: 0,f,n
,<category>,<int64>
10.0,m,125
20.0,y,125
14.0,r,119
21.0,z,99
7.0,i,91
15.0,s,90
9.0,k,78
5.0,g,71
11.0,o,51


In [8]:
fct_count(fct, sort = TRUE, prop = TRUE)

Unnamed: 0,f,n,p
,<category>,<int64>,<float64>
10.0,m,125,0.125
20.0,y,125,0.125
14.0,r,119,0.119
21.0,z,99,0.099
7.0,i,91,0.091
15.0,s,90,0.090
9.0,k,78,0.078
5.0,g,71,0.071
11.0,o,51,0.051


## fct_match

In [9]:
table(fct_match(gss_cat.marital, c("Married", "Divorced")))

Unnamed: 0,False,True
,<int64>,<int64>
count,7983,13500


In [10]:
table(numpy.isin(gss_cat.marital, c("Maried", "Davorced")))

Unnamed: 0,False
,<int64>
count,21483


In [11]:
with try_catch():
    table(fct_match(gss_cat.marital, c("Maried", "Davorced")))

[ValueError] Levels not present in factor: ['Maried' 'Davorced'].


## fct_unique

In [12]:
fct = factor(letters[rpois(100, 10)-1])

unique(fct)

['g', 'i', 'f', 'k', 'd', ..., 'o', 'j', 'e', 'm', 'n']
Length: 13
Categories (13, object): ['d', 'e', 'f', 'g', ..., 'm', 'n', 'o', 'p']

In [13]:
fct_unique(fct)

['d', 'e', 'f', 'g', 'h', ..., 'l', 'm', 'n', 'o', 'p']
Length: 13
Categories (13, object): ['d', 'e', 'f', 'g', ..., 'm', 'n', 'o', 'p']

## lvls_reorder, lvls_revalue and lvls_expand

In [18]:
fct = factor(c("a", "b", "c"))
lvls_reorder(fct, [2,1,0])

['a', 'b', 'c']
Categories (3, object): ['c', 'b', 'a']

In [15]:
lvls_revalue(fct, c("apple", "banana", "carrot"))

['apple', 'banana', 'carrot']
Categories (3, object): ['apple', 'banana', 'carrot']

In [16]:
lvls_expand(fct, c("a", "b", "c", "d"))

['a', 'b', 'c']
Categories (4, object): ['a', 'b', 'c', 'd']

## lvls_union

In [17]:
fs = [factor("a"), factor("b"), factor(c("a", "b"))]
lvls_union(fs)

array(['a', 'b'], dtype=object)