In [None]:
# SETUP
%load_ext autoreload
%autoreload 2
import opendp

lib_path = "../rust/target/debug/libopendp_ffi.dylib"
odp = opendp.OpenDP(lib_path)

In [None]:
# HELLO, WORLD!

# Create an identity transformation, (String) -> String
identity = odp.trans.make_identity(b"<String>")

# Load a string
data = odp.data.from_string(b"hello, world!")

# Invoke
res = odp.core.transformation_invoke(identity, data)

# Dump results
print(odp.to_str(res))

In [None]:
# LOADING DATA
file = """
a
b
c
d
e
"""[1:].encode("utf-8")
data = odp.data.from_string(file)

# Split file by line
split_lines = odp.trans.make_split_lines()
res = odp.core.transformation_invoke(split_lines, data)
print(odp.to_str(res))

In [None]:
# PARSING DATA
file = """
1
2
3
4
5
"""[1:].encode("utf-8")
data = odp.data.from_string(file)

# Parse lines as integers
split_lines = odp.trans.make_split_lines()
parse_series = odp.trans.make_parse_series(b"<i32>", True)  # First arg is type, second arg is whether to impute
split_and_parse = odp.core.make_chain_tt(parse_series, split_lines)
res = odp.core.transformation_invoke(split_and_parse, data)
print(odp.to_str(res))

# Parse lines as floats
split_lines = odp.trans.make_split_lines()
parse_series = odp.trans.make_parse_series(b"<f64>", True)
split_and_parse = odp.core.make_chain_tt(parse_series, split_lines)
res = odp.core.transformation_invoke(split_and_parse, data)
print(odp.to_str(res))

In [None]:
# DATAFRAMES
file = """
ant, 1, 10.0
bat, 2, 20.0
cat, 3, 30.0
foo, 4, 40.0
bar, 5, 50.0
"""[1:].encode("utf-8")
data = odp.data.from_string(file)

# Split a dataframe
split_dataframe = odp.trans.make_split_dataframe(b",", 3)  # First arg is separator, second arg is number of columns

# Parse columns
parse_col_1 = odp.trans.make_parse_column(b"<i32>", b"1", True)  # First arg is type, second arg is input domain (via op),
parse_col_2 = odp.trans.make_parse_column(b"<f64>", b"2", True)  # third arg is column index, fourth arg is whether to impute

# Chain splitting and parsing
split_and_parse = odp.make_chain_tt_multi(parse_col_2, parse_col_1, split_dataframe)
res = odp.core.transformation_invoke(split_and_parse, data)
print(odp.to_str(res))

In [None]:
# SIMPLE OPERATIONS
file = """
1
2
3
4
5
"""[1:].encode("utf-8")
data = odp.data.from_string(file)

# Split and parse lines
split_lines = odp.trans.make_split_lines()
parse_series = odp.trans.make_parse_series(b"<f64>", True)

# Noisy sum
clamp = odp.trans.make_clamp(b"<f64>", odp.f64_p(0.0), odp.f64_p(10.0))
bounded_sum = odp.trans.make_bounded_sum_l1(b"<f64>", odp.f64_p(0.0), odp.f64_p(10.0))
parse_and_sum = odp.make_chain_tt_multi(bounded_sum, clamp, parse_series, split_lines)
base_laplace = odp.meas.make_base_laplace(b"<f64>", 1.0)
noisy_sum = odp.core.make_chain_mt(base_laplace, parse_and_sum)
res = odp.core.measurement_invoke(noisy_sum, data)
print(odp.to_str(res))

In [None]:
# PUTTING IT ALL TOGETHER
file = """
ant, 1, 10.0
bat, 2, 20.0
cat, 3, 30.0
foo, 4, 40.0
bar, 5, 50.0
"""[1:].encode("utf-8")
data = odp.data.from_string(file)


# Parse dataframe
split_dataframe = odp.trans.make_split_dataframe(b",", 3)
parse_column_1 = odp.trans.make_parse_column(b"<f64>", b"1", True)
parse_column_2 = odp.trans.make_parse_column(b"<f64>", b"2", True)
parse_dataframe = odp.make_chain_tt_multi(parse_column_2, parse_column_1, split_dataframe)

# Noisy sum, col 1
select_col_1 = odp.trans.make_select_column(b"<f64>", b"1")
clamp_col_1 = odp.trans.make_clamp(b"<f64>", odp.f64_p(0), odp.f64_p(10))
bounded_sum_col_1 = odp.trans.make_bounded_sum_l1(b"<f64>", odp.f64_p(0), odp.f64_p(10))
base_laplace_col_1 = odp.meas.make_base_laplace(b"<f64>", 1.0)
noisy_sum_col_1 = odp.core.make_chain_mt(base_laplace_col_1, odp.make_chain_tt_multi(bounded_sum_col_1, clamp_col_1, select_col_1))

# Noisy sum, col 2
select_col_2 = odp.trans.make_select_column(b"<f64>", b"2")
clamp_col_2 = odp.trans.make_clamp(b"<f64>", odp.f64_p(0.0), odp.f64_p(10.0))
bounded_sum_col_2 = odp.trans.make_bounded_sum_l1(b"<f64>", odp.f64_p(0.0), odp.f64_p(10.0))
base_laplace_col_2 = odp.meas.make_base_laplace(b"<f64>", 1.0)
noisy_sum_col_2 = odp.core.make_chain_mt(base_laplace_col_2, odp.make_chain_tt_multi(bounded_sum_col_2, clamp_col_2, select_col_2))

# Noisy count
select_col_1 = odp.trans.make_select_column(b"<f64>", b"1")
clamp_col_1 = odp.trans.make_clamp(b"<f64>", odp.f64_p(1), odp.f64_p(1))
bounded_sum_col_1 = odp.trans.make_bounded_sum_l1(b"<f64>", odp.f64_p(1), odp.f64_p(1))
base_laplace_col_1 = odp.meas.make_base_laplace(b"<f64>", 1.0)
noisy_count = odp.core.make_chain_mt(base_laplace_col_1, odp.make_chain_tt_multi(bounded_sum_col_1, clamp_col_1, select_col_1))

# Compose & chain
composition = odp.core.make_composition(odp.core.make_composition(noisy_sum_col_1, noisy_sum_col_2), noisy_count)
everything = odp.core.make_chain_mt(composition, parse_dataframe)

# Do it!!!
res = odp.core.measurement_invoke(everything, data)
print(odp.to_str(res))

## Observations
* On the Python side, there's a bit of ceremony and repetition
  - Tagging of concrete types
  - Pointers for primitive values
* On the Rust side, there's some boilerplate
  - Marshalling in and out of FFI
  - Dealing with type-erased values
* Overall, seems like a reasonable compromise