## Working with Booleans

Demonstrates some simple data processing primitives that use booleans.

In [17]:
from opendp.transformations import *
from opendp.measurements import *
from opendp.combinators import *
from opendp.typing import *
from opendp.mod import enable_features

enable_features("contrib")

trans = (
    make_is_equal("T")
    >> make_count_by_categories([True, False], null_category=False)
)

data = ["T", "F", "NA", ""]
trans(data)

[1, 3]

In [25]:
# relabeling
data = ["A", "A", "B", "E"]

categories = ["A", "B", "C", "D"]
trans = (
    make_find(categories) >> 
    make_impute_constant(len(categories), "OptionNullDomain<AllDomain<usize>>")
    >> make_index(["Good", "Good", "Bad", "Bad"], "Unknown")
)
trans(data)


['Good', 'Good', 'Good', 'Unknown']

In [7]:
# when you have an uncategorized value
make_count_by_categories(["T", "F"])(data)

[1, 1, 2]

In [11]:
data = ["A", "B", "C", "D", "1", "2", "1.", "a1"]
# is null example
trans = (
    make_cast(str, int)
    >> make_is_null(OptionNullDomain[AllDomain[int]])
    >> make_count_by_categories([True, False], null_category=False)
)
trans(data)

[6, 2]

In [28]:
# check if is not in a set
data = ["A", "A", "B", "E"]

categories = ["A", "B", "C", "D"]
trans = (
    make_find(categories) >> 
    make_is_null(OptionNullDomain[AllDomain[usize]])
)
trans(data)

[False, False, False, True]

In [20]:
path = "../data/PUMS_california_demographics_1000/data.csv"

columns = ["age", "sex", "educ", "race", "income", "married"]
trans_married = (
    make_split_dataframe(",", columns)
    >> make_df_is_equal("married", "1")
    >> make_subset_by("married", columns)
)

counter = (
    make_select_column("age", str)
    >> make_cast_default(str, int)
    >> make_count(int)
    >> make_base_discrete_laplace(1.0)
)

meas = trans_married >> make_basic_composition([counter, counter])

meas(open(path).read())

meas.map(1)


2.0