In [1]:

# Block 1: Import Libraries

import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score


# Block 2: Load Data and Create Target/Shadow Splits

iris = load_iris()
X, y = iris.data, iris.target

# First split: Target vs Shadow (attacker uses shadow)
X_target, X_shadow, y_target, y_shadow = train_test_split(
    X, y, test_size=0.5, random_state=42, stratify=y
)

# Target model: Train/Test
X_target_train, X_target_test, y_target_train, y_target_test = train_test_split(
    X_target, y_target, test_size=0.5, random_state=42, stratify=y_target)

# Shadow model: Train/Test
X_shadow_train, X_shadow_test, y_shadow_train, y_shadow_test = train_test_split(
    X_shadow, y_shadow, test_size=0.5, random_state=42, stratify=y_shadow)


# Block 3: Train Target Model (Victim)

target_model = LogisticRegression(max_iter=1000)
target_model.fit(X_target_train, y_target_train)

# Block 4: Train Shadow Model (Attacker uses this model to learn)

shadow_model = LogisticRegression(max_iter=1000)
shadow_model.fit(X_shadow_train, y_shadow_train)


# Block 5: Prepare Attack Model Training Data Using Shadow Model

# Shadow model predictions: Members (label 1)
shadow_train_conf = shadow_model.predict_proba(X_shadow_train)
shadow_train_labels = np.ones(len(shadow_train_conf))

# Shadow model predictions: Non-members (label 0)
shadow_test_conf = shadow_model.predict_proba(X_shadow_test)
shadow_test_labels = np.zeros(len(shadow_test_conf))

# Combine to form attack training set
X_attack_train = np.vstack([shadow_train_conf, shadow_test_conf])
y_attack_train = np.concatenate([shadow_train_labels, shadow_test_labels])

# Block 6: Train Attack Model

attack_model = LogisticRegression()
attack_model.fit(X_attack_train, y_attack_train)

# Block 7: Run the Membership Attack on Target Model

# Target model confidence on its TRAIN data (members)
target_train_conf = target_model.predict_proba(X_target_train)
target_train_labels = np.ones(len(target_train_conf))

# Target model confidence on its TEST data (non-members)
target_test_conf = target_model.predict_proba(X_target_test)
target_test_labels = np.zeros(len(target_test_conf))

# Create final attack test set
X_attack_test = np.vstack([target_train_conf, target_test_conf])
y_attack_test_actual = np.concatenate([target_train_labels, target_test_labels])

# Attack model prediction
y_pred = attack_model.predict(X_attack_test)
y_pred_proba = attack_model.predict_proba(X_attack_test)[:, 1]

# Attack evaluation
accuracy = accuracy_score(y_attack_test_actual, y_pred)
auc = roc_auc_score(y_attack_test_actual, y_pred_proba)

print(f"Attack Accuracy: {accuracy:.2f}")
print(f"Attack AUC: {auc:.4f}")

print("\nClassification Report:\n")
print(classification_report(y_attack_test_actual, y_pred,
                            target_names=["Non-Member (0)", "Member (1)"]))

Attack Accuracy: 0.53
Attack AUC: 0.5587

Classification Report:

                precision    recall  f1-score   support

Non-Member (0)       0.56      0.39      0.46        38
    Member (1)       0.52      0.68      0.59        37

      accuracy                           0.53        75
     macro avg       0.54      0.54      0.52        75
  weighted avg       0.54      0.53      0.52        75

