#MODEL INVERSION ATTACK ON TABULAR DATA

**Dataset** : **ADULT**

**Target Model** : Predicts 'income' using the features 'work', 'education', 'marital', 'occupation', 'sex', 'capitalgain', 'capitalloss', and 'hoursperweek'.

**Attack Model** : Attempts to reconstruct the sensitive attribut 'race'.

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error, classification_report
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam

# Data load and Preprocessing for the Target Model

In [2]:
file_path = 'Adult_35222.csv'
dt_adult = pd.read_csv(file_path)

dt_adult['income_binary'] = dt_adult['income'].map({'<=50K': 0, '>50K': 1})

X = dt_adult[['work', 'education', 'marital', 'occupation', 'sex', 'capitalgain', 'capitalloss', 'hoursperweek']]
sensitive_feature = dt_adult['race']
y = dt_adult['income_binary']

cat_features = ['work', 'education', 'marital', 'occupation', 'sex']
num_features = ['capitalgain', 'capitalloss', 'hoursperweek']

preprocessor = ColumnTransformer([
    ('num', StandardScaler(), num_features),
    ('cat', OneHotEncoder(), cat_features)
])

X_preprocessed = preprocessor.fit_transform(X)

sensitive_encoder = OneHotEncoder()
sensitive_feature_encoded = sensitive_encoder.fit_transform(sensitive_feature.values.reshape(-1, 1)).toarray()


In [3]:
X_train, X_test, y_train, y_test, sensitive_train, sensitive_test = train_test_split(
    X_preprocessed, y, sensitive_feature_encoded, test_size=0.2, random_state=42
)

**Neural Network**

Neural network contains layers of interconnected nodes which is inspired by the human brain structure which learns from different patterns of data.

Optimizer Used: Adam Optimizer(Adaptive Moment Estimation)

In [4]:
input_dim = X_train.shape[1]
target_model = models.Sequential([
    layers.Input(shape=(input_dim,)),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

target_model.compile(optimizer=Adam(learning_rate=0.001),
                   loss='binary_crossentropy',
                   metrics=['accuracy'])

target_model.fit(X_train, y_train, epochs=20, batch_size=32, verbose=1)

test_loss, test_accuracy = target_model.evaluate(X_test, y_test, verbose=1)
print(f"Target Model - Test Loss: {test_loss:.2f}, Test Accuracy: {test_accuracy:.2f}")

Epoch 1/20
[1m881/881[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.8040 - loss: 0.3973
Epoch 2/20
[1m881/881[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8436 - loss: 0.3398
Epoch 3/20
[1m881/881[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8415 - loss: 0.3378
Epoch 4/20
[1m881/881[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8456 - loss: 0.3339
Epoch 5/20
[1m881/881[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8486 - loss: 0.3314
Epoch 6/20
[1m881/881[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8475 - loss: 0.3286
Epoch 7/20
[1m881/881[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8486 - loss: 0.3292
Epoch 8/20
[1m881/881[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8487 - loss: 0.3295
Epoch 9/20
[1m881/881[0m [32m━━━━━━━━

# Attack

In [6]:
def attack_with_neural_net(target_model, X, sensitive_true, sensitive_dim, epochs=20, batch_size=32, learning_rate=0.001):

    model_outputs = target_model.predict(X)

    inversion_model = models.Sequential([
        layers.Input(shape=(1,)),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(128, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Dense(sensitive_dim, activation='softmax')
    ])

    inversion_model.compile(optimizer=Adam(learning_rate=learning_rate),
                            loss='categorical_crossentropy',
                            metrics=['accuracy'])

    inversion_model.fit(model_outputs, sensitive_true, epochs=epochs, batch_size=batch_size, verbose=1)

    reconstructed_sensitive = inversion_model.predict(model_outputs)
    return reconstructed_sensitive

reconstructed_sens_attr = attack_with_neural_net(
    target_model, X_test, sensitive_test, sensitive_dim=sensitive_test.shape[1], epochs=20, batch_size=32, learning_rate=0.001
)

mae = mean_absolute_error(sensitive_test, reconstructed_sens_attr)
print(f"Mean Absolute Error (MAE) for Sensitive Feature Reconstruction: {mae:.4f}")



[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Epoch 1/20
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8439 - loss: 0.8331
Epoch 2/20
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8540 - loss: 0.5451
Epoch 3/20
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8611 - loss: 0.5307
Epoch 4/20
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8600 - loss: 0.5264
Epoch 5/20
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8537 - loss: 0.5356
Epoch 6/20
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8524 - loss: 0.5428
Epoch 7/20
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8594 - loss: 0.5193
Epoch 8/20
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Test