In [1]:
# This is a less verbose notebook intended to be used for technical demos.

# SETUP
from opendp.transformations import *
from opendp.measurements import *
from opendp.combinators import *

from opendp.mod import enable_features

enable_features("floating-point", "contrib")

In [2]:
# PARSING DATA
data = """
1
2
3
4
5
x
"""[1:]

# Parse lines as integers
preprocessor = (
    make_split_lines() >>
    make_cast(TIA=str, TOA=int) >>
    make_impute_constant(constant=0)
)
res = preprocessor(data)
print(res)

[1, 2, 3, 4, 5, 0]


In [3]:
# STABILITY RELATION
assert preprocessor.check(d_in=2, d_out=2)

In [4]:
# Parse lines as floats
preprocessor = (
    make_split_lines() >>
    make_cast(TIA=str, TOA=float) >>
    make_impute_constant(constant=0.)
)
res = preprocessor(data)
print(res)


[1.0, 2.0, 3.0, 4.0, 5.0, 0.0]


In [5]:
# LOADING DATA
data = """
a
b
c
d
e
"""[1:]

# Split file by line
count = make_split_lines() >> make_count(TIA=str)
res = count(data)
print(res)


5


In [6]:
# DATAFRAMES
data = """
ant, 1, 10.0
bat, 2, 20.0
cat, 3, 30.0
foo, 4, 40.0
bar, 5, 50.0
"""[1:]
# Split a dataframe
split_dataframe = make_split_dataframe(separator=",", col_names=["A", "B", "C"])

# Chain splitting and column selection
split_and_select = split_dataframe >> make_select_column(key="B", TOA=str)

# look at column 1
res = split_and_select(data)
print(res)

['1', '2', '3', '4', '5']


In [7]:
# SIMPLE OPERATIONS
data = [1., 2., 3., 4., 5.]

# Split and parse lines, chain into sum query
query = (
    make_clamp(bounds=(0., 10.)) >>
    make_bounded_sum(bounds=(0., 10.))
)

In [8]:
# CHECK PRIVACY
query_laplace = query >> make_base_laplace(scale=1.)
max_user_contribution = 1
budget = 10. + 1e-6 # the 1e-6 fudge factor accounts for float rounding error
assert query_laplace.check(d_in=max_user_contribution, d_out=budget)
print(query_laplace(data))

14.645264042392858


In [9]:
# TRANSFORMATION REUSE
query_gaussian = query >> make_base_gaussian(scale=1.)
print(query_gaussian(data))


15.241476772598702


In [10]:
# PUTTING IT ALL TOGETHER
data = """
ant, 1, 10.0
bat, 2, 20.0
cat, 3, 30.0
foo, 4, 40.0
bar, 5, 50.0
"""[1:]

parse_dataframe = make_split_dataframe(separator=",", col_names=["A", "B", "C"])

noisy_sum_col_1 = (
    make_select_column(key="B", TOA=str) >>
    make_cast_default(TIA=str, TOA=int) >>
    make_clamp((0, 10)) >>
    make_bounded_sum(bounds=(0, 10)) >>
    make_base_discrete_laplace(scale=1.0)
)

noisy_sum_col_2 = (
    make_select_column(key="C", TOA=str) >>
    make_cast_default(TIA=str, TOA=float) >>
    make_clamp(bounds=(0., 10.)) >>
    make_bounded_sum(bounds=(0., 10.)) >>
    make_base_laplace(scale=1.0)
)

noisy_count = (
    make_select_column(key="A", TOA=str) >>
    make_count(TIA=str) >>
    make_base_discrete_laplace(scale=1.0)
)

# Compose & chain
composition = make_basic_composition([
    noisy_sum_col_1, noisy_sum_col_2, noisy_count
])

everything = parse_dataframe >> composition

print(everything(data))

[17, 49.8146323772866, 4]
