In [422]:
import os

import numpy as np

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Activation, Dropout, BatchNormalization
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam

from collections import Counter

from sklearn.model_selection import StratifiedKFold, StratifiedShuffleSplit
from sklearn.metrics import accuracy_score

In [423]:
optimizer = "Adam"
loss_function = "categorical_crossentropy"
epochs = 60
batch_size = 128
learning_rate = .01
img_width = 32
img_height = 32
n = 5
num_class = 10
input_shape = (img_width, img_height, 3)

In [424]:
#load CIFAR-10 DATA
def load_data():
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    
    #normalize pixel values to 0-1 to simplify training
    x_train = x_train.astype("float32") / 255.0
    x_test = x_test.astype("float32") / 255.0
    
    #flatten and encode labels
    y_train = y_train.flatten()
    y_test =  y_test.flatten()
    
    return x_train, y_train, x_test, y_test

In [425]:
x_train, y_train, x_test, y_test = load_data()

In [426]:
#AlexNet Model setup - includes matching parameters with MIAShield
def create_model(input_shape, num_class):
    model = Sequential()

    #Layer 1 - Conv2D with MaxPooling
    model.add(Conv2D(48, (3, 3), strides = (2, 2), activation = "relu", padding = "same", input_shape = input_shape))
    model.add(MaxPooling2D(pool_size = (2, 2), strides = (2, 2)))
    model.add(BatchNormalization())

    #Layer 2 - Conv2D with MaxPooling 
    model.add(Conv2D(96, (3, 3), activation = "relu", padding = "same"))
    model.add(MaxPooling2D(pool_size = (3, 3), strides = (2, 2)))
    model.add(BatchNormalization())

    #Layer 3 - Conv2D
    model.add(Conv2D(192, (3, 3), activation = "relu", padding = "same"))

    #Layer 4 - Conv2D
    model.add(Conv2D(192, (3, 3), activation = "relu", padding = "same"))

    #Layer 5 - Conv2D with MaxPooling
    model.add(Conv2D(256, (3, 3), activation = "relu", padding = "same"))
    model.add(MaxPooling2D(pool_size = (3, 3), strides = (2, 2)))
    model.add(BatchNormalization())

    #flatten
    model.add(Flatten())

    #Fully Connected layer 1
    model.add(Dense(512, activation = "relu"))
    model.add(Dropout(.50))
            
    #Fully Connected layer 2
    model.add(Dense(256, activation = "relu"))
    model.add(Dropout(.50))

    #Output layer
    model.add(Dense(num_class, activation = "softmax"))

    return model


In [427]:
#base model uses 5 disjoint datasets for training
X = x_train
Y = y_train
idx = np.arange(len(X))

skf = StratifiedKFold(n_splits = n, shuffle = True, random_state = 42)
skf.get_n_splits(X, Y)
    
for i, (_, test_index) in enumerate(skf.split(X,Y)):
    Xi = X[test_index]
    Yi = Y[test_index]
    Y_onehot = to_categorical(Yi, num_classes = num_class)
    index_split = idx[test_index]
    
    globals()[f'x_train_split{i}'] = Xi
    globals()[f'y_train_split{i}'] = Yi
    globals()[f'y_onehot{i}'] = Y_onehot
    globals()[f'index_split{i}'] = index_split

In [428]:
#recombine indices from across all splits
all_indices = np.concatenate([globals()[f'index_split{i}'] for i in range(n)])
print(f"Total number of indices: {len(all_indices)}")

# check for duplicate values to ensure disjointedness
has_duplicates = len(all_indices) != len(np.unique(all_indices))
print("Duplicate Indices Present:", has_duplicates)

# check to ensure full dataset is represented
covers_all = len(all_indices) == len(X)
print("Coverage of Dataset Achieved:", covers_all)

Total number of indices: 50000
Duplicate Indices Present: False
Coverage of Dataset Achieved: True


In [429]:
#verify that label distribution is consistent across all data partitions
for i in range(n):
    labels = globals()[f'y_train_split{i}']
    counts = Counter(labels)
    print(f"Fold {i} label distribution:", dict(counts))

Fold 0 label distribution: {7: 1000, 9: 1000, 4: 1000, 6: 1000, 5: 1000, 0: 1000, 3: 1000, 2: 1000, 1: 1000, 8: 1000}
Fold 1 label distribution: {6: 1000, 9: 1000, 4: 1000, 8: 1000, 7: 1000, 1: 1000, 5: 1000, 3: 1000, 0: 1000, 2: 1000}
Fold 2 label distribution: {9: 1000, 1: 1000, 3: 1000, 6: 1000, 4: 1000, 7: 1000, 2: 1000, 0: 1000, 8: 1000, 5: 1000}
Fold 3 label distribution: {1: 1000, 2: 1000, 3: 1000, 4: 1000, 0: 1000, 9: 1000, 5: 1000, 8: 1000, 6: 1000, 7: 1000}
Fold 4 label distribution: {7: 1000, 2: 1000, 0: 1000, 9: 1000, 3: 1000, 1: 1000, 8: 1000, 5: 1000, 4: 1000, 6: 1000}


In [430]:
#EO uses 2.5k members from each Dtrain split
x_train_sample = []
y_train_sample = []
y_onehot_sample = []
index_original_eo = []
mem_per_split = 2500
nonmem = 5000

for i in range(n):
    Xi = globals()[f'x_train_split{i}']
    Yi = globals()[f'y_train_split{i}']
    y_onehot_i = globals()[f'y_onehot{i}']
    split_original_index = globals()[f'index_split{i}']
    
    stratified_choice = StratifiedShuffleSplit(n_splits = 1, test_size = mem_per_split, random_state = 42)
    
    for _, index_choice in stratified_choice.split(Xi, Yi):
        x_train_sample.append(Xi[index_choice])
        y_train_sample.append(Yi[index_choice])
        y_onehot_sample.append(y_onehot_i[index_choice])
        index_original_eo.extend(split_original_index[index_choice])

#EO uses 5k nonmembers (from Dtest)
stratified_choice_test = StratifiedShuffleSplit(n_splits = 1, test_size = nonmem, random_state = 42)
for _, index_choice_test in stratified_choice_test.split(x_test, y_test):
    x_test_eo = x_test[index_choice_test]
    y_test_eo = y_test[index_choice_test]
    index_test_eo = index_choice_test
    y_test_onehot = to_categorical(y_test_eo, num_classes = num_class)

x_train_eo = np.concatenate(x_train_sample + [x_test_eo], axis = 0)
y_train_eo = np.concatenate(y_train_sample + [y_test_eo], axis = 0)
y_onehot_eo = np.concatenate(y_onehot_sample + [y_test_onehot], axis=0)

members = mem_per_split * n
nonmembers = nonmem
membership_labels = np.concatenate([np.ones(members), np.zeros(nonmem)])

globals()[f'x_train_eo'] = x_train_eo
globals()[f'y_train_eo'] = y_train_eo
globals()[f'y_onehot_eo'] = y_onehot_eo
globals()[f'index_eo_original'] = index_original_eo
globals()[f'index_eo_test'] = index_test_eo
globals()[f'membership_labels_eo'] = membership_labels

In [431]:
# Split back into members and nonmembers to verify breakdowns
members = y_train_eo[:mem_per_split * n]
nonmembers = y_train_eo[mem_per_split * n:]

print(f'Expected Member Count = {n * mem_per_split}')
print(f'Actual Member Count = {len(members)}')
print(f'Expected Nonmember Count = {nonmem}')
print(f'Actual Nonmember Count = {len(nonmembers)}')

# Count stratification of classes and total counts
member_counts = Counter(members)
nonmember_counts = Counter(nonmembers)

print(f'Expected Member Count Per Class = {mem_per_split * n / num_class}')
print(f'Actual Member Count = {member_counts}')
print(f'Expected Nonmember Count Per Class = {nonmem / num_class}')
print(f'Actual Nonmember Count = {nonmember_counts}')


Expected Member Count = 12500
Actual Member Count = 12500
Expected Nonmember Count = 5000
Actual Nonmember Count = 5000
Expected Member Count Per Class = 1250.0
Actual Member Count = Counter({6: 1250, 4: 1250, 3: 1250, 9: 1250, 0: 1250, 2: 1250, 1: 1250, 7: 1250, 5: 1250, 8: 1250})
Expected Nonmember Count Per Class = 500.0
Actual Nonmember Count = Counter({8: 500, 9: 500, 3: 500, 1: 500, 6: 500, 7: 500, 5: 500, 0: 500, 2: 500, 4: 500})


In [432]:
#DTestMiashield uses 5k members from Dtrain - must be disjoint with EO
mems = 5000
nonmems = 5000
x_train_mia = []
y_train_mia = []
y_onehot_mia = []
index_original_mia = []

all_training_indices= np.arange(len(x_train))
remaining_training_indices = np.setdiff1d(all_training_indices, index_eo_original)

for i in range(n):
    Xi = globals()[f'x_train_split{i}']
    Yi = globals()[f'y_train_split{i}']
    y_onehot_i = globals()[f'y_onehot{i}']
    split_original_index = globals()[f'index_split{i}']

    Xi_remaining = Xi[np.isin(split_original_index, np.array(remaining_training_indices))]
    Yi_remaining = Yi[np.isin(split_original_index, np.array(remaining_training_indices))]
    index_remaining = split_original_index[np.isin(split_original_index, np.array(remaining_training_indices))]
    
    stratified_choice = StratifiedShuffleSplit(n_splits = 1, test_size = mems // n, random_state = 42)
    
    for _, index_choice in stratified_choice.split(Xi_remaining, Yi_remaining):
        x_train_mia.append(Xi_remaining[index_choice])
        y_train_mia.append(Yi_remaining[index_choice])
        y_onehot_mia.append(y_onehot_i[index_choice])
        index_original_mia.extend(index_remaining[index_choice])

#DtestMiashield uses 5k nonmembers (from Dtest) - must be disjoinst with EO
all_testing_indices= np.arange(len(x_test))
remaining_testing_indices = np.setdiff1d(all_testing_indices, index_eo_test)

x_test_rem = x_test[remaining_testing_indices]
y_test_rem = y_test[remaining_testing_indices]
index_text_mia = remaining_testing_indices
y_test_onehot = to_categorical(y_test_rem, num_classes = num_class)

x_test_mia = np.concatenate(x_train_mia + [x_test_rem], axis = 0)
y_test_mia = np.concatenate(y_train_mia + [y_test_rem], axis = 0)
y_onehot_mia = np.concatenate(y_onehot_mia + [y_test_onehot], axis=0)

membership_labels = np.concatenate([np.ones(mems), np.zeros(nonmem)])

globals()[f'x_test_mia'] = x_test_mia
globals()[f'y_test_mia'] = y_test_mia
globals()[f'y_onehot_mia'] = y_onehot_mia
globals()[f'index_original_mia'] = index_original_mia
globals()[f'index_test_mia'] = index_test_mia
globals()[f'membership_labels_eo'] = membership_labels


In [433]:
# Split back into members and nonmembers to verify breakdowns
members = y_test_mia[:mems]
nonmembers = y_test_mia[mems:]

print(f'Expected Member Count = {mems}')
print(f'Actual Member Count = {len(members)}')
print(f'Expected Nonmember Count = {nonmems}')
print(f'Actual Nonmember Count = {len(nonmembers)}')

# Count stratification of classes and total counts
member_counts = Counter(members)
nonmember_counts = Counter(nonmembers)

print(f'Expected Member Count Per Class = {mems / num_class}')
print(f'Actual Member Count = {member_counts}')
print(f'Expected Nonmember Count Per Class = {nonmems / num_class}')
print(f'Actual Nonmember Count = {nonmember_counts}')

#check for disjointedness with EO
overlap = np.intersect1d(index_original_eo, index_original_mia)
print(f'Expected overlap = 0')
print(f'Actual overlap = {len(overlap)}')


Expected Member Count = 5000
Actual Member Count = 5000
Expected Nonmember Count = 5000
Actual Nonmember Count = 5000
Expected Member Count Per Class = 500.0
Actual Member Count = Counter({4: 500, 3: 500, 1: 500, 9: 500, 5: 500, 8: 500, 2: 500, 0: 500, 6: 500, 7: 500})
Expected Nonmember Count Per Class = 500.0
Actual Nonmember Count = Counter({0: 500, 6: 500, 3: 500, 1: 500, 9: 500, 8: 500, 7: 500, 5: 500, 4: 500, 2: 500})
Expected overlap = 0
Actual overlap = 0


In [434]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range = 10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range = .2,
    zoom_range=0.2,
    horizontal_flip=True,
)

val_datagen = ImageDataGenerator(rescale=1./255)

In [435]:
for i in range(0, n):
    trained_model = create_model(input_shape, num_class)
    optimizer = tf.keras.optimizers.Adam(learning_rate=.01)
    loss_function = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
    trained_model.compile(optimizer = optimizer, loss = loss_function, metrics = ['accuracy'])
    trained_model.summary()
    model_data = globals()[f'x_train_split{i}']
    target_data = globals()[f'y_train_split{i}']
    print(target_data.shape)
    datagen = train_datagen
    steps_per_epoch = len(model_data) // batch_size

    globals()[f'history{i}'] = trained_model.fit(datagen.flow(model_data, target_data, batch_size=128, shuffle=True),
                    steps_per_epoch = steps_per_epoch, epochs=epochs, validation_data=(x_test, y_test))
    globals()[f'model{i}'] = trained_model

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


(10000,)
Epoch 1/60


  self._warn_if_super_not_called()


[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 73ms/step - accuracy: 0.1176 - loss: 2.7473 - val_accuracy: 0.1027 - val_loss: 2.6456
Epoch 2/60
[1m 1/78[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 24ms/step - accuracy: 0.1719 - loss: 2.3065

  self.gen.throw(value)


[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.1719 - loss: 2.3065 - val_accuracy: 0.1000 - val_loss: 2.7831
Epoch 3/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 71ms/step - accuracy: 0.1637 - loss: 2.1947 - val_accuracy: 0.1001 - val_loss: 4.5592
Epoch 4/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.1406 - loss: 2.1723 - val_accuracy: 0.1001 - val_loss: 4.5728
Epoch 5/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 74ms/step - accuracy: 0.1622 - loss: 2.1384 - val_accuracy: 0.1687 - val_loss: 3.1203
Epoch 6/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.2031 - loss: 2.0889 - val_accuracy: 0.1224 - val_loss: 2.6919
Epoch 7/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 78ms/step - accuracy: 0.1947 - loss: 2.1038 - val_accuracy: 0.1008 - val_loss: 12.3866
Epoch 8/60
[1m78/78[0m [32m━━━━━━━━━━━━━━

(10000,)
Epoch 1/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 71ms/step - accuracy: 0.1332 - loss: 2.6319 - val_accuracy: 0.1003 - val_loss: 2.3047
Epoch 2/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.1875 - loss: 2.1021 - val_accuracy: 0.1038 - val_loss: 2.2940
Epoch 3/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 67ms/step - accuracy: 0.1599 - loss: 2.1585 - val_accuracy: 0.1459 - val_loss: 2.4731
Epoch 4/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.1094 - loss: 2.2133 - val_accuracy: 0.1008 - val_loss: 4.0830
Epoch 5/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 67ms/step - accuracy: 0.1778 - loss: 2.0867 - val_accuracy: 0.1003 - val_loss: 3.2140
Epoch 6/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.2266 - loss: 1.9421 - val_accuracy: 0.1000 - val_loss: 3.4597
Epoch 7/60
[1m78/78[0m 

(10000,)
Epoch 1/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 71ms/step - accuracy: 0.1369 - loss: 2.7171 - val_accuracy: 0.1000 - val_loss: 9.5676
Epoch 2/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.1250 - loss: 2.1167 - val_accuracy: 0.1000 - val_loss: 10.1216
Epoch 3/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 67ms/step - accuracy: 0.1864 - loss: 2.1466 - val_accuracy: 0.1095 - val_loss: 4.3765
Epoch 4/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.2031 - loss: 2.0500 - val_accuracy: 0.0997 - val_loss: 10.4576
Epoch 5/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 67ms/step - accuracy: 0.2073 - loss: 2.0690 - val_accuracy: 0.1000 - val_loss: 122.1848
Epoch 6/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.2109 - loss: 2.1655 - val_accuracy: 0.1000 - val_loss: 95.0314
Epoch 7/60
[1m78/78

(10000,)
Epoch 1/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 71ms/step - accuracy: 0.1280 - loss: 2.7337 - val_accuracy: 0.1000 - val_loss: 6.1613
Epoch 2/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.1406 - loss: 2.2357 - val_accuracy: 0.1000 - val_loss: 6.5497
Epoch 3/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 67ms/step - accuracy: 0.1436 - loss: 2.2189 - val_accuracy: 0.1000 - val_loss: 9.5665
Epoch 4/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.2500 - loss: 2.0164 - val_accuracy: 0.1000 - val_loss: 11.0681
Epoch 5/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 68ms/step - accuracy: 0.1717 - loss: 2.1644 - val_accuracy: 0.1011 - val_loss: 5.1553
Epoch 6/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.1797 - loss: 2.2022 - val_accuracy: 0.1015 - val_loss: 5.4221
Epoch 7/60
[1m78/78[0m

(10000,)
Epoch 1/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 72ms/step - accuracy: 0.1220 - loss: 2.8320 - val_accuracy: 0.1000 - val_loss: 2.3058
Epoch 2/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.1953 - loss: 2.1770 - val_accuracy: 0.1000 - val_loss: 2.3059
Epoch 3/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - accuracy: 0.1659 - loss: 2.1579 - val_accuracy: 0.1000 - val_loss: 2.3124
Epoch 4/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.2578 - loss: 2.0385 - val_accuracy: 0.1000 - val_loss: 2.3126
Epoch 5/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 67ms/step - accuracy: 0.1856 - loss: 2.0816 - val_accuracy: 0.1000 - val_loss: 2.3144
Epoch 6/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.1875 - loss: 2.0528 - val_accuracy: 0.1000 - val_loss: 2.3144
Epoch 7/60
[1m78/78[0m 

In [436]:
import os

save_dir = "models"
os.makedirs(save_dir, exist_ok = True)

for i in range(0, n):
    
    model = globals()[f'model{i}']
    
    model_path = os.path.join(save_dir, f'model_{i}.h5')
    model.save(model_path)
    print(f"Saved model {i} to {model_path}")




Saved model 0 to models\model_0.h5
Saved model 1 to models\model_1.h5
Saved model 2 to models\model_2.h5




Saved model 3 to models\model_3.h5
Saved model 4 to models\model_4.h5
