In [1]:
!pip install -q tensorflow==2.15.0 tensorflow-privacy==0.9.0

In [8]:
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow.keras import layers, Sequential
import tensorflow_privacy as tfp
from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [12]:
train_path = '/content/drive/My Drive/Colab Datasets/v2_train.csv'
test_path = '/content/drive/My Drive/Colab Datasets/v2_test.csv'

# Hyper-params
EPOCHS            = 30
BATCH_SIZE        = 32
NUM_MICROBATCHES  = 32
LEARNING_RATE     = 0.15
L2_NORM_CLIP      = 1.0
NOISE_MULTIPLIER  = 1.1

train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)

FEATURES = ['age', 'height', 'weight']
TARGET   = 'total_lift'

X_train = train_df[FEATURES].values.astype(np.float32)
y_train = train_df[TARGET ].values.astype(np.float32)

X_test  = test_df [FEATURES].values.astype(np.float32)
y_test  = test_df [TARGET ].values.astype(np.float32)

num_train_samples = X_train.shape[0]
remainder = num_train_samples % BATCH_SIZE
if remainder != 0:
    X_train = X_train[:-remainder]
    y_train = y_train[:-remainder]


scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

print(f"Train: {X_train.shape},  Test: {X_test.shape}")

Train: (24000, 3),  Test: (6003, 3)


In [9]:
model = Sequential([
    layers.Input(shape=(X_train.shape[1],)),
    layers.Dense(64, activation='relu'),
    layers.Dense(1)
])

In [10]:
assert BATCH_SIZE % NUM_MICROBATCHES == 0, "batch_size % num_microbatches must be 0"

# Differential-privacy SGD optimizer
DPKerasSGDOptimizer = tfp.privacy.optimizers.dp_optimizer_keras.DPKerasSGDOptimizer
optimizer = DPKerasSGDOptimizer(
    l2_norm_clip=L2_NORM_CLIP,
    noise_multiplier=NOISE_MULTIPLIER,
    num_microbatches=NUM_MICROBATCHES,
    learning_rate=LEARNING_RATE
)

loss_fn = tf.keras.losses.MeanSquaredError(
    reduction=tf.losses.Reduction.NONE
)

model.compile(optimizer=optimizer, loss=loss_fn, metrics=['mae'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                256       
                                                                 
 dense_1 (Dense)             (None, 1)                 65        
                                                                 
Total params: 321 (1.25 KB)
Trainable params: 321 (1.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [13]:
history = model.fit(
    X_train, y_train,
    epochs=EPOCHS,
    validation_split=0.1,
    batch_size=BATCH_SIZE,
    verbose=2
)

Epoch 1/30
675/675 - 3s - loss: 44108.0625 - mae: 162.9318 - val_loss: 44089.3750 - val_mae: 164.1151 - 3s/epoch - 4ms/step
Epoch 2/30
675/675 - 4s - loss: 44923.4648 - mae: 162.4322 - val_loss: 45118.0781 - val_mae: 164.6207 - 4s/epoch - 6ms/step
Epoch 3/30
675/675 - 3s - loss: 45431.5391 - mae: 162.5769 - val_loss: 45089.1367 - val_mae: 164.3876 - 3s/epoch - 4ms/step
Epoch 4/30
675/675 - 3s - loss: 45238.3555 - mae: 162.3970 - val_loss: 45410.4648 - val_mae: 164.8640 - 3s/epoch - 4ms/step
Epoch 5/30
675/675 - 3s - loss: 45262.0547 - mae: 162.4331 - val_loss: 45755.0586 - val_mae: 164.9971 - 3s/epoch - 4ms/step
Epoch 6/30
675/675 - 3s - loss: 45449.3594 - mae: 162.4882 - val_loss: 44994.3281 - val_mae: 164.0223 - 3s/epoch - 5ms/step
Epoch 7/30
675/675 - 4s - loss: 45198.7227 - mae: 162.1665 - val_loss: 45308.4062 - val_mae: 164.3114 - 4s/epoch - 6ms/step
Epoch 8/30
675/675 - 3s - loss: 45266.2852 - mae: 162.1854 - val_loss: 45828.4805 - val_mae: 164.6889 - 3s/epoch - 4ms/step
Epoch 9/

In [16]:
epsilon, _ = compute_dp_sgd_privacy(
    n=len(X_train),
    batch_size=BATCH_SIZE,
    noise_multiplier=NOISE_MULTIPLIER,
    epochs=EPOCHS,
    delta=1e-5
)

print(f"DP guarantee: ε = {epsilon:.2f}  (δ = 1e-5)")



DP guarantee: ε = 0.97  (δ = 1e-5)


In [15]:
# Predictions
y_pred = model.predict(X_test, batch_size=BATCH_SIZE).flatten()

# MSE & R²
mse = np.mean((y_pred - y_test) ** 2)
r2  = 1.0 - np.sum((y_test - y_pred) ** 2) / np.sum((y_test - y_test.mean()) ** 2)

print(f"MSE  : {mse:.3f}")
print(f"R²   : {r2:.3f}")

MSE  : 40577.715
R²   : 0.478
