In [20]:
# This is a less verbose notebook intended to be used for technical demos.

# SETUP
from opendp.trans import *
from opendp.meas import *
from opendp.comb import *

from opendp.mod import enable_features

enable_features("floating-point", "contrib")

In [21]:
# PARSING DATA
data = """
1
2
3
4
5
x
"""[1:]

# Parse lines as integers
preprocessor = (
    make_split_lines() >>
    make_cast(TIA=str, TOA=int) >>
    make_impute_constant(constant=0)
)
res = preprocessor(data)
print(res)

[1, 2, 3, 4, 5, 0]


In [22]:
# STABILITY RELATION
assert preprocessor.check(d_in=2, d_out=2)

In [23]:
# Parse lines as floats
preprocessor = (
    make_split_lines() >>
    make_cast(TIA=str, TOA=float) >>
    make_impute_constant(constant=0.)
)
res = preprocessor(data)
print(res)


[1.0, 2.0, 3.0, 4.0, 5.0, 0.0]


In [24]:
# LOADING DATA
data = """
a
b
c
d
e
"""[1:]

# Split file by line
count = make_split_lines() >> make_count(TIA=str)
res = count(data)
print(res)


5


In [25]:
# DATAFRAMES
data = """
ant, 1, 10.0
bat, 2, 20.0
cat, 3, 30.0
foo, 4, 40.0
bar, 5, 50.0
"""[1:]
# Split a dataframe
split_dataframe = make_split_dataframe(separator=",", col_names=["A", "B", "C"])

# Chain splitting and column selection
split_and_select = split_dataframe >> make_select_column(key="B", TOA=str)

# look at column 1
res = split_and_select(data)
print(res)

['1', '2', '3', '4', '5']


In [26]:
# SIMPLE OPERATIONS
data = [1., 2., 3., 4., 5.]

# Split and parse lines, chain into sum query
query = (
    make_clamp(bounds=(0., 10.)) >>
    make_bounded_sum(bounds=(0., 10.))
)

In [28]:
# CHECK PRIVACY
query_laplace = query >> make_base_laplace(scale=1.)
max_user_contribution = 1
budget = 10. + 1e-6 # the 1e-6 fudge factor accounts for float rounding error
assert query_laplace.check(d_in=max_user_contribution, d_out=budget)
print(query_laplace(data))

15.232002250002067


In [None]:
# TRANSFORMATION REUSE
query_gaussian = query >> make_base_gaussian(scale=1.)
print(query_gaussian(data))


14.401209981614546


In [29]:
# PUTTING IT ALL TOGETHER
data = """
ant, 1, 10.0
bat, 2, 20.0
cat, 3, 30.0
foo, 4, 40.0
bar, 5, 50.0
"""[1:]

parse_dataframe = make_split_dataframe(separator=",", col_names=["A", "B", "C"])

noisy_sum_col_1 = (
    make_select_column(key="B", TOA=str) >>
    make_cast_default(TIA=str, TOA=int) >>
    make_clamp((0, 10)) >>
    make_bounded_sum(bounds=(0, 10)) >>
    make_base_geometric(scale=1.0)
)

noisy_sum_col_2 = (
    make_select_column(key="C", TOA=str) >>
    make_cast_default(TIA=str, TOA=float) >>
    make_clamp(bounds=(0., 10.)) >>
    make_bounded_sum(bounds=(0., 10.)) >>
    make_base_laplace(scale=1.0)
)

noisy_count = (
    make_select_column(key="A", TOA=str) >>
    make_count(TIA=str) >>
    make_base_geometric(scale=1.0)
)

# Compose & chain
composition = make_basic_composition(make_basic_composition(
    noisy_sum_col_1, noisy_sum_col_2), noisy_count)

everything = parse_dataframe >> composition

print(everything(data))

MASKED ERROR: 'AnyObject'
using string fallback
((14, 48.489422335990554), 5)
