In [14]:
import pandas as pd
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import tensorflow_privacy

# Load dataset
url = "~/Downloads/adult.csv"
column_names = ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation',
                'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income']
data = pd.read_csv(url, names=column_names, sep=r'\s*,\s*', engine='python')

# Preprocess data
X = data.drop('income', axis=1)
y = data['income'].apply(lambda x: 1 if x == '>50K' else 0)
y = np.eye(2)[y]

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Define preprocessor
numeric_features = ['age', 'fnlwgt', 'education-num',
                    'capital-gain', 'capital-loss', 'hours-per-week']
categorical_features = ['workclass', 'education', 'marital-status',
                        'occupation', 'relationship', 'race', 'sex', 'native-country']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(), categorical_features)])

pipeline = Pipeline([
    ('preprocessor', preprocessor)])

X_train = pipeline.fit_transform(X_train).toarray()
X_test = pipeline.transform(X_test).toarray()

# Define model
# model = tf.keras.models.Sequential([
#     tf.keras.layers.Dense(64, activation='relu',
#                           input_shape=(X_train.shape[1],)),
#     tf.keras.layers.Dense(32, activation='relu'),
#     tf.keras.layers.Dense(1, activation='sigmoid')
# ])
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu',
                          kernel_initializer='glorot_normal', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(32, kernel_initializer='glorot_normal',activation='relu'),
    tf.keras.layers.Dense(2, activation='softmax')
])

optimizer = tensorflow_privacy.VectorizedDPKerasSGDOptimizer(
    l2_norm_clip=1,
    noise_multiplier=2.0,
    num_microbatches=1,
    learning_rate=0.15)

loss = tf.keras.losses.CategoricalCrossentropy(
    from_logits=True, reduction=tf.losses.Reduction.NONE)

model.compile(optimizer=optimizer, loss='binary_crossentropy',
              metrics=['accuracy'])

# Train model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy: 81.15%


In [21]:
def e_th():
    import math
    # Given parameters
    D_size = 6000
    b = 250
    epochs = 24
    C = 1
    sigma = 0.73
    delta = 1e-8

    # Compute q, the sampling ratio
    q = b / D_size

    # Compute Delta f (sensitivity of the function)
    Delta_f = C

    # Compute epsilon for each step
    epsilon_per_step = q * Delta_f / sigma

    # Compute the number of steps T across all epochs
    T = (D_size / b) * epochs

    # Compute epsilon_total using advanced composition theorem
    epsilon_total = math.sqrt(2 * T * math.log(1/delta)) * \
    epsilon_per_step + T * epsilon_per_step**2

    return epsilon_per_step, epsilon_total


e_th()


(0.05707762557077625, 8.449847843680425)