In [92]:
from opendp.trans import *
from opendp.meas import *
from opendp.core import *

enable_features("floating-point")

data_path = os.path.join('.', 'data', 'PUMS_california_demographics_1000', 'data.csv')
var_names = ["age", "sex", "educ", "race", "income", "married"]

with open(data_path) as input_data:
    data = input_data.read()

In [93]:
def check_scale(scale, preprocessor, dataset_distance, epsilon):
    """
    Return T/F
    :param scale:
    :param preprocessor:
    :param dataset_distance:
    :param epsilon:
    :return:
    """
    return (preprocessor >> make_base_laplace(scale)).check(dataset_distance, epsilon)


In [94]:
def make_dp_mean(col_names, index, lower, upper, n, epsilon):
    """
    Draft of a function to be used on the backend for DPCreator
    :param index: Column index to select data from
    :param data_type: Type to cast data to
    :param lower: Lower bound for clamp
    :param upper: Upper bound for clamp
    :param n: Estimated number of values in data
    :param epsilon: Privacy budget
    :return:
    """
    preprocessor = (
        # Convert data into Vec<Vec<String>>
        make_split_dataframe(separator=",", col_names=col_names) >>
        # Selects a column of df, Vec<str>
        make_select_column(key=index, T=str) >>
        # Cast the column as Vec<Optional<Float>>
        make_cast(TI=str, TO=float) >>
        # Impute missing values to 0 Vec<Float>
        make_impute_constant(0.) >>
        # Clamp age values
        make_clamp(lower, upper) >>
        make_resize_bounded(0., n, lower, upper) >>
        make_bounded_mean(lower, upper, n=n, T=float)
    )
    scale = binary_search(lambda s: check_scale(s, preprocessor, 1, epsilon), (0., 10.))
    preprocessor = preprocessor >> make_base_laplace(scale)
    return preprocessor(data)



In [95]:
epsilon = 1.
column = "age"
res = make_dp_mean(var_names, column, 0., 200., 1000, epsilon)

print(f"Epsilon: {epsilon}\nColumn: {column}\nDP Mean: {res}")


Epsilon: 1.0
Column: age
DP Mean: 42.72032988502685
