In [322]:
import os

import numpy as np

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Activation, Dropout, BatchNormalization
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam

from collections import Counter

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

In [323]:
# for i in range(n):
#     for prefix in ['x_train_split', 'y_train_split', 'y_onehot', 'index_split', 'model', 'history']:
#         var_name = f'{prefix}{i}'
#         if var_name in globals():
#             del globals()[var_name]

In [324]:
optimizer = "Adam"
loss_function = "categorical_crossentropy"
epochs = 60
batch_size = 128
learning_rate = .01
img_width = 32
img_height = 32
n = 5
num_class = 10
input_shape = (img_width, img_height, 3)
steps_per_epoch = 10000 // batch_size

In [325]:
#load CIFAR-10 DATA
def load_data():
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    
    #normalize pixel values to 0-1 to simplify training
    x_train = x_train.astype("float32") / 255.0
    x_test = x_test.astype("float32") / 255.0
    
    #flatten and encode labels
    y_train = y_train.flatten()
    y_test =  y_test.flatten()
    
    # y_train = to_categorical(y_train)
    # y_test = to_categorical(y_test)
    
    return x_train, y_train, x_test, y_test

In [326]:
x_train, y_train, x_test, y_test = load_data()

In [327]:
#AlexNet Model setup - includes matching parameters with MIAShield
def create_model(input_shape, num_class):
    model = Sequential()

    #Layer 1 - Conv2D with MaxPooling
    model.add(Conv2D(48, (3, 3), strides = (2, 2), activation = "relu", padding = "same", input_shape = input_shape))
    model.add(MaxPooling2D(pool_size = (2, 2), strides = (2, 2)))
    model.add(BatchNormalization())

    #Layer 2 - Conv2D with MaxPooling 
    model.add(Conv2D(96, (3, 3), activation = "relu", padding = "same"))
    model.add(MaxPooling2D(pool_size = (3, 3), strides = (2, 2)))
    model.add(BatchNormalization())

    #Layer 3 - Conv2D
    model.add(Conv2D(192, (3, 3), activation = "relu", padding = "same"))

    #Layer 4 - Conv2D
    model.add(Conv2D(192, (3, 3), activation = "relu", padding = "same"))

    #Layer 5 - Conv2D with MaxPooling
    model.add(Conv2D(256, (3, 3), activation = "relu", padding = "same"))
    model.add(MaxPooling2D(pool_size = (3, 3), strides = (2, 2)))
    model.add(BatchNormalization())

    #flatten
    model.add(Flatten())

    #Fully Connected layer 1
    model.add(Dense(512, activation = "relu"))
    model.add(Dropout(.50))
            
    #Fully Connected layer 2
    model.add(Dense(256, activation = "relu"))
    model.add(Dropout(.50))

    #Output layer
    model.add(Dense(num_class, activation = "softmax"))

    return model


In [328]:
X = x_train
Y = y_train
idx = np.arange(len(X))

skf = StratifiedKFold(n_splits = n, shuffle = True, random_state = 42)
skf.get_n_splits(X, Y)
    
for i, (_, test_index) in enumerate(skf.split(X,Y)):
    Xi = X[test_index]
    Yi = Y[test_index]
    Y_onehot = to_categorical(Yi, num_classes = num_class)
    index_split = idx[test_index]
    
    globals()[f'x_train_split{i}'] = Xi
    globals()[f'y_train_split{i}'] = Yi
    globals()[f'y_onehot{i}'] = Y_onehot
    globals()[f'index_split{i}'] = index_split

In [329]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range = 10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range = .2,
    zoom_range=0.2,
    horizontal_flip=True,
)

val_datagen = ImageDataGenerator(rescale=1./255)

In [330]:
# train_generator = train_datagen.flow(
#     x_,
#     target_size=(img_width, img_height),
#     batch_size=batch_size,
#     class_mode='categorical'
# )

# val_generator = val_datagen.flow_from_directory(
#     y_test,
#     target_size=(img_width, img_height),
#     batch_size=batch_size,
#     class_mode='categorical'
# )

In [331]:
#recombine indices from across all splits
all_indices = np.concatenate([globals()[f'index_split{i}'] for i in range(n)])
print(f"Total number of indices: {len(all_indices)}")

# check for duplicate values to ensure disjointedness
has_duplicates = len(all_indices) != len(np.unique(all_indices))
print("Duplicate Indices Present:", has_duplicates)

# check to ensure full dataset is represented
covers_all = len(all_indices) == len(X)
print("Coverage of Dataset Achieved:", covers_all)

Total number of indices: 50000
Duplicate Indices Present: False
Coverage of Dataset Achieved: True


In [332]:
#verify that label distribution is consistent across all data partitions
for i in range(n):
    labels = globals()[f'y_train_split{i}']
    counts = Counter(labels)
    print(f"Fold {i} label distribution:", dict(counts))

Fold 0 label distribution: {7: 1000, 9: 1000, 4: 1000, 6: 1000, 5: 1000, 0: 1000, 3: 1000, 2: 1000, 1: 1000, 8: 1000}
Fold 1 label distribution: {6: 1000, 9: 1000, 4: 1000, 8: 1000, 7: 1000, 1: 1000, 5: 1000, 3: 1000, 0: 1000, 2: 1000}
Fold 2 label distribution: {9: 1000, 1: 1000, 3: 1000, 6: 1000, 4: 1000, 7: 1000, 2: 1000, 0: 1000, 8: 1000, 5: 1000}
Fold 3 label distribution: {1: 1000, 2: 1000, 3: 1000, 4: 1000, 0: 1000, 9: 1000, 5: 1000, 8: 1000, 6: 1000, 7: 1000}
Fold 4 label distribution: {7: 1000, 2: 1000, 0: 1000, 9: 1000, 3: 1000, 1: 1000, 8: 1000, 5: 1000, 4: 1000, 6: 1000}


In [333]:
for i in range(0, n):
    trained_model = create_model(input_shape, num_class)
    optimizer = tf.keras.optimizers.Adam(learning_rate=.01)
    loss_function = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    trained_model.compile(optimizer = optimizer, loss = loss_function, metrics = ['accuracy'])
    trained_model.summary()
    model_data = globals()[f'x_train_split{i}']
    target_data = globals()[f'y_train_split{i}']
    print(target_data.shape)
    datagen = train_datagen

    globals()[f'history{i}'] = trained_model.fit(datagen.flow(model_data, target_data, batch_size=128, shuffle=False),
                    steps_per_epoch = steps_per_epoch, epochs=epochs, validation_data=(x_test, y_test))
    globals()[f'model{i}'] = trained_model

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


(10000,)
Epoch 1/60


  output, from_logits = _get_logits(
  self._warn_if_super_not_called()


[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 72ms/step - accuracy: 0.1327 - loss: 2.6520 - val_accuracy: 0.1329 - val_loss: 2.2810
Epoch 2/60
[1m 1/78[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 25ms/step - accuracy: 0.1719 - loss: 2.1888

  self.gen.throw(value)


[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.1719 - loss: 2.1888 - val_accuracy: 0.1104 - val_loss: 2.2889
Epoch 3/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - accuracy: 0.1789 - loss: 2.1618 - val_accuracy: 0.0999 - val_loss: 2.3175
Epoch 4/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.1641 - loss: 2.0933 - val_accuracy: 0.0995 - val_loss: 2.3114
Epoch 5/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - accuracy: 0.1719 - loss: 2.1228 - val_accuracy: 0.0944 - val_loss: 2.2944
Epoch 6/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.1719 - loss: 2.0540 - val_accuracy: 0.1033 - val_loss: 2.3336
Epoch 7/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - accuracy: 0.2031 - loss: 2.0635 - val_accuracy: 0.0998 - val_loss: 13.3985
Epoch 8/60
[1m78/78[0m [32m━━━━━━━━━━━━━━

(10000,)
Epoch 1/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 73ms/step - accuracy: 0.1364 - loss: 2.7794 - val_accuracy: 0.1024 - val_loss: 2.3041
Epoch 2/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.2031 - loss: 2.1360 - val_accuracy: 0.0936 - val_loss: 2.3090
Epoch 3/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - accuracy: 0.1914 - loss: 2.0949 - val_accuracy: 0.1001 - val_loss: 4.8367
Epoch 4/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.2422 - loss: 1.9761 - val_accuracy: 0.1001 - val_loss: 4.4851
Epoch 5/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - accuracy: 0.2006 - loss: 2.0405 - val_accuracy: 0.1000 - val_loss: 6.9380
Epoch 6/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.2812 - loss: 2.0621 - val_accuracy: 0.1000 - val_loss: 7.5909
Epoch 7/60
[1m78/78[0m 

(10000,)
Epoch 1/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 78ms/step - accuracy: 0.1362 - loss: 2.6862 - val_accuracy: 0.1000 - val_loss: 2.3033
Epoch 2/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.1875 - loss: 2.0923 - val_accuracy: 0.1240 - val_loss: 2.2682
Epoch 3/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - accuracy: 0.1838 - loss: 2.1422 - val_accuracy: 0.0982 - val_loss: 3.3364
Epoch 4/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.1562 - loss: 2.2896 - val_accuracy: 0.1000 - val_loss: 4.8813
Epoch 5/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 71ms/step - accuracy: 0.2007 - loss: 2.0641 - val_accuracy: 0.1000 - val_loss: 15.9994
Epoch 6/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.2734 - loss: 1.9494 - val_accuracy: 0.1000 - val_loss: 14.5696
Epoch 7/60
[1m78/78[

(10000,)
Epoch 1/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 78ms/step - accuracy: 0.1033 - loss: 2.6787 - val_accuracy: 0.1000 - val_loss: 2.3042
Epoch 2/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.0859 - loss: 2.3077 - val_accuracy: 0.1000 - val_loss: 2.3043
Epoch 3/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 71ms/step - accuracy: 0.1288 - loss: 2.2666 - val_accuracy: 0.1000 - val_loss: 2.3114
Epoch 4/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.0938 - loss: 2.2209 - val_accuracy: 0.1000 - val_loss: 2.3116
Epoch 5/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 72ms/step - accuracy: 0.1548 - loss: 2.2038 - val_accuracy: 0.1000 - val_loss: 2.3377
Epoch 6/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.1641 - loss: 2.2386 - val_accuracy: 0.1000 - val_loss: 2.3378
Epoch 7/60
[1m78/78[0m 

(10000,)
Epoch 1/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 73ms/step - accuracy: 0.1233 - loss: 2.7756 - val_accuracy: 0.1000 - val_loss: 2.3042
Epoch 2/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.1562 - loss: 2.2961 - val_accuracy: 0.1000 - val_loss: 2.3042
Epoch 3/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - accuracy: 0.1348 - loss: 2.2509 - val_accuracy: 0.1000 - val_loss: 2.3037
Epoch 4/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.1406 - loss: 2.2287 - val_accuracy: 0.1000 - val_loss: 2.3038
Epoch 5/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - accuracy: 0.1728 - loss: 2.1530 - val_accuracy: 0.1000 - val_loss: 8.3861
Epoch 6/60
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.1328 - loss: 2.1408 - val_accuracy: 0.1014 - val_loss: 3.6581
Epoch 7/60
[1m78/78[0m 

In [334]:
import os

save_dir = "models"
os.makedirs(save_dir, exist_ok = True)

for i in range(0, n):
    
    model = globals()[f'model{i}']
    
    model_path = os.path.join(save_dir, f'model_{i}.h5')
    model.save(model_path)
    print(f"Saved model {i} to {model_path}")




Saved model 0 to models\model_0.h5
Saved model 1 to models\model_1.h5
Saved model 2 to models\model_2.h5
Saved model 3 to models\model_3.h5
Saved model 4 to models\model_4.h5
