In [8]:
# SETUP
%load_ext autoreload
%autoreload 2
from opendp.v1.trans import *
from opendp.v1.meas import *
from opendp.v1.core import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
# PARSING DATA
data = """
1
2
3
4
5
x
"""[1:]

# Parse lines as integers
preprocessor = (
    make_split_lines() >>
    make_cast(TI=str, TO=int) >>
    make_impute_constant(constant=0)
)
res = preprocessor(data)
print(res)

[1, 2, 3, 4, 5, 0]


In [10]:
# Parse lines as floats
preprocessor = (
    make_split_lines() >>
    make_cast(TI=str, TO=float) >>
    make_impute_constant(constant=0.)
)
res = preprocessor(data)
print(res)


[1.0, 2.0, 3.0, 4.0, 5.0, 0.0]


In [11]:
# LOADING DATA
data = """
a
b
c
d
e
"""[1:]

# Split file by line
count = make_split_lines() >> make_count(TIA=str)
res = count(data)
print(res)


5


In [12]:
# DATAFRAMES
data = """
ant, 1, 10.0
bat, 2, 20.0
cat, 3, 30.0
foo, 4, 40.0
bar, 5, 50.0
"""[1:]
# Split a dataframe
split_dataframe = make_split_dataframe(separator=",", col_names=[0, 1, 2])
parse = (
    make_parse_column(key=1, T=int) >>
    make_parse_column(key=2, T=float)
)

# Chain splitting and parsing
split_and_parse = split_dataframe >> parse

# look at column 1
res = (split_and_parse >> make_select_column(key=1, T=int))(data)
print(res)

[1, 2, 3, 4, 5]


In [13]:
# SIMPLE OPERATIONS
data = """
1
2
3
4
5
"""[1:]

# Split and parse lines, chain into sum query
query = (
    make_split_lines() >>
    make_cast_default(TI=str, TO=float) >>
    make_clamp(lower=0., upper=10.) >>
    make_bounded_sum(lower=0., upper=10.)
)

query_laplace = query >> make_base_laplace(scale=1.)
print(query_laplace(data))

query_gaussian = query >> make_base_gaussian(scale=1.)
print(query_gaussian(data))


15.75857743319612
15.707649272078864


In [14]:
# PUTTING IT ALL TOGETHER
data = """
ant, 1, 10.0
bat, 2, 20.0
cat, 3, 30.0
foo, 4, 40.0
bar, 5, 50.0
"""[1:]

parse_dataframe = (
    make_split_dataframe(separator=",", col_names=[0, 1, 2]) >>
    make_parse_column(key=1, T=int) >>
    make_parse_column(key=2, T=float)
)

noisy_sum_col_1 = (
    make_select_column(key=1, T=int) >>
    make_clamp(lower=0, upper=10) >>
    make_bounded_sum(lower=0, upper=10) >>
    make_base_geometric(scale=1.0)
)

noisy_sum_col_2 = (
    make_select_column(key=2, T=float) >>
    make_clamp(lower=0., upper=10.) >>
    make_bounded_sum(lower=0., upper=10.) >>
    make_base_laplace(scale=1.0)
)

noisy_count = (
    make_select_column(key=0, T=str) >>
    make_count(TIA=str) >>
    make_base_geometric(scale=1.0)
)

# Compose & chain
composition = make_basic_composition(make_basic_composition(
    noisy_sum_col_1, noisy_sum_col_2), noisy_count)

everything = parse_dataframe >> composition

print(everything(data))

using string fallback
((15, 51.79854396238542), 5)
