# Generation of num_NN neural networks trained for num_epochs epochs on FASHION-MNIST dataset

In [1]:
num_NN = 100              # number of neural networks to be generated
num_epochs = 200       # number of epochs for which to train the networks
b_size = 32               # batch size

In [2]:
# Get FASHION-MNIST data
from keras.datasets import fashion_mnist
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

# data reshaping
X_train = X_train.reshape(X_train.shape[0],28*28)
X_test = X_test.reshape(X_test.shape[0],28*28)

# data normalization
X_train = X_train/X_train.max()
X_test = X_test/X_test.max()


In [3]:
X_train.shape, X_test.shape

((60000, 784), (10000, 784))

In [4]:
# One-hot encoding of the label
from keras.utils import to_categorical
num_classes = 10
y_train_dummy = to_categorical(y_train,num_classes)
y_test_dummy = to_categorical(y_test, num_classes)

In [5]:
y_train[0], y_train_dummy[0]

(9, array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.], dtype=float32))

In [6]:
# to create the directory in which to save the files
def create_dir(path):
    if not os.path.exists(os.path.dirname(path)):
        try:
            os.makedirs(os.path.dirname(path))
        except OSError as exc: # Guard against race condition
            if exc.errno != errno.EEXIST:
                raise

## Creation of a basic model to be called to have the same initialization of the networks

In [None]:
import os
import numpy as np
import pandas as pd
from time import time
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.callbacks import ModelCheckpoint
from keras.models import load_model

model = Sequential([Dropout(0, input_dim=X_train.shape[1]),
                    Dense(512, activation='relu'),
                    Dropout(0),
                    Dense(128,activation='relu'),
                    Dropout(0),
                    Dense(num_classes,activation='softmax')])

model.compile(optimizer='sgd',loss='categorical_crossentropy',metrics=['accuracy'])

path_same_init = 'E:/Fonti di Variazione/test approssimazione salvataggio/NN_epoca0_copia_'+str(i)+'.hdf5'
create_dir(path_same_init)
model.save(path_same_init)

## Random Initialization ensemble

In [None]:
evaluated = []            # list containing the accuracy and loss results of each network at the end of the training

for i in range(num_NN):
    # Checkpoint
    cwd = os.getcwd()
    
    # path in which to save the parameters (weights and biases) of the i-th network
    path_parametres = 'E:/Fonti di Variazione/Rand. Init./epoca0-200/Checkpoint/model_NN'+str(i)+'_ep{epoch:d}.hdf5'
    
    # path in which to save the loss and accuracy of each network at the end of each epoch
    path_history = 'E:/Fonti di Variazione/Rand. Init./epoca0-200/loss_accuracy/history'+str(i)+'.xlsx'
    
    # creation of directories associated with paths
    create_dir(path_parametres)   
    create_dir(path_history)
    
    
    # checkpoint necessary to save the parameters of the i-th network at the end of each epoch
    my_checkpoint = [
        ModelCheckpoint(filepath = path_parametres, monitor='val_loss', 
                        verbose=0, save_best_only=False, save_weights_only=False, 
                        mode='auto', save_freq='epoch')
    ]

    
    # creation of networks with random initialization
    
    model = Sequential([Dropout(0, input_dim=X_train.shape[1]),
                    Dense(512, activation='relu'),
                    Dropout(0),
                    Dense(128,activation='relu'),
                    Dropout(0),
                    Dense(num_classes,activation='softmax')])
    
    model.compile(optimizer='sgd',loss='categorical_crossentropy',metrics=['accuracy'])
    
    # to save the network before starting training
    model.save('E:/Fonti di Variazione/Rand. Init./epoca0-200/Checkpoint/model_NN'+str(i)+'_ep0.hdf5')
    
    
    # network fit process with performance saving on the training set
    pd.DataFrame(model.fit(X_train, y_train_dummy, validation_split=0.2, epochs=num_epochs, 
                batch_size=b_size, shuffle=False,callbacks=[my_checkpoint]).history).to_excel(path_history)
    
    
    evaluated.append(model.evaluate(X_test, y_test_dummy))

path_evaluation = 'E:/Fonti di Variazione/Rand. Init./epoca0-200/Evaluation/evaluated_models.xlsx'
create_dir(path_evaluation)
pd.DataFrame(evaluated).to_excel(path_evaluation)

## Batch Shuffle ensemble

In [None]:
evaluated = []            # list containing the accuracy and loss results of each network at the end of the training


for i in range(num_NN):
    # Checkpoint
    cwd = os.getcwd()
    
    # path in which to save the parameters (weights and biases) of the i-th network
    path_parametres = 'E:/Fonti di Variazione/Batch Shuffle 32/epoca0-200/Checkpoint/model_NN'+str(i)+'_ep{epoch:d}.hdf5'
    
    # path in which to save the loss and accuracy of each network at the end of each epoch
    path_history = 'E:/Fonti di Variazione/Batch Shuffle 32/epoca0-200/loss_accuracy/history'+str(i)+'.xlsx'
    
    # creation of directories associated with paths
    create_dir(path_parametres)   
    create_dir(path_history)
    
    
    # checkpoint necessary to save the parameters of the i-th network at the end of each epoch
    my_checkpoint = [
        ModelCheckpoint(filepath = path_parametres, monitor='val_loss', 
                        verbose=0, save_best_only=False, save_weights_only=False, 
                        mode='auto', save_freq='epoch')
    ]

    # to create networks that are always initialized in the same way
    model = load_model(path_same_init)
    
    model.compile(optimizer='sgd',loss='categorical_crossentropy',metrics=['accuracy'])
    
    # to save the network before starting training
    model.save('E:/Fonti di Variazione/Batch Shuffle 32/epoca0-200/Checkpoint/model_NN'+str(i)+'_ep0.hdf5')
    
    
    # network fit process with performance saving on the training set
    pd.DataFrame(model.fit(X_train, y_train_dummy, validation_split=0.2, epochs=num_epochs, 
                batch_size=b_size, shuffle=True,callbacks=[my_checkpoint]).history).to_excel(path_history)
    
    
    evaluated.append(model.evaluate(X_test, y_test_dummy))

path_evaluation = 'E:/Fonti di Variazione/Batch Shuffle 32/epoca0-200/Evaluation/evaluated_models.xlsx'
create_dir(path_evaluation)
pd.DataFrame(evaluated).to_excel(path_evaluation)

## Dropout ensemble

In [None]:
evaluated = []            # list containing the accuracy and loss results of each network at the end of the training


for i in range(num_NN):
    # Checkpoint
    cwd = os.getcwd()
    
    # path in which to save the parameters (weights and biases) of the i-th network
    path_parametres = 'E:/Fonti di Variazione/Dropout 0.5/epoca0-200/Checkpoint/model_NN'+str(i)+'_ep{epoch:d}.hdf5'
    
    # path in which to save the loss and accuracy of each network at the end of each epoch
    path_history = 'E:/Fonti di Variazione/Dropout 0.5/epoca0-200/loss_accuracy/history'+str(i)+'.xlsx'
    
    # creation of directories associated with paths
    create_dir(path_parametres)   
    create_dir(path_history)
    
    
    # checkpoint necessary to save the parameters of the i-th network at the end of each epoch
    my_checkpoint = [
        ModelCheckpoint(filepath = path_parametres, monitor='val_loss', 
                        verbose=0, save_best_only=False, save_weights_only=False, 
                        mode='auto', save_freq='epoch')
    ]

    # to create networks that are always initialized in the same way
    model = load_model(path_same_init)

    model.compile(optimizer='sgd',loss='categorical_crossentropy',metrics=['accuracy'])
    
    
    # to modify the dropout rates of the loaded network starting from the fixed initialization
    model.layers[0].rate = 0.5
    model.layers[2].rate = 0.5
    model.layers[4].rate = 0.5
    
    # to save the network before starting training
    model.save('E:/Fonti di Variazione/Dropout 0.5/epoca0-200/Checkpoint/model_NN'+str(i)+'_ep0.hdf5')
    
    # network fit process with performance saving on the training set
    pd.DataFrame(model.fit(X_train, y_train_dummy, validation_split=0.2, epochs=num_epochs, 
                batch_size=b_size, shuffle=False,callbacks=[my_checkpoint]).history).to_excel(path_history)
    
    
    evaluated.append(model.evaluate(X_test, y_test_dummy))

path_evaluation = 'E:/Fonti di Variazione/Dropout 0.5/epoca0-200/Evaluation/evaluated_models.xlsx'
create_dir(path_evaluation)
pd.DataFrame(evaluated).to_excel(path_evaluation)

## Distorted ensemble

In [None]:
# specify the arguments
rotation_range_val = 5
width_shift_val = 0.25
height_shift_val = 0.25
shear_range_val = 45
zoom_range_val=[1.0,1.5]
# import relevant library
from tensorflow.keras.preprocessing.image import ImageDataGenerator

for i in range(num_NN):
    # Checkpoint
    cwd = os.getcwd()
    
    # path in which to save the parameters (weights and biases) of the i-th network
    path_parametres = 'E:/Fonti di Variazione/Distorted/epoca0-200/Checkpoint/model_NN'+str(i)+'_ep{epoch:d}.hdf5'
    
    # path in which to save the loss and accuracy of each network at the end of each epoch
    path_history = 'E:/Fonti di Variazione/Distorted/epoca0-200/loss_accuracy/history'+str(i)+'.xlsx'
    
    # creation of directories associated with paths
    create_dir(path_parametres)   
    create_dir(path_history)
    
    
    # distorted dataset creation
    datagen = ImageDataGenerator(rotation_range=rotation_range_val, width_shift_range=width_shift_val,
                             height_shift_range=height_shift_val, zoom_range=zoom_range_val,
                             shear_range=shear_range_val)

    datagen.fit(X_train.reshape(X_train.shape[0], 28, 28, 1))
    data_dist = datagen.flow(X_train.reshape(X_train.shape[0], 28, 28, 1),y_train.reshape(y_train.shape[0], 1),
                      batch_size=X_train.shape[0],shuffle=False)
    
    X, y = data_dist.next()
    X_dist = X.reshape(X.shape[0], 28, 28)
    y_dist = y.reshape(y.shape[0])
    
    # data reshaping
    X_dist = X_dist.reshape(X_dist.shape[0],28*28)

    # data normalization
    X_dist = X_dist/X_dist.max()
    
    # One-hot encoding of the label
    y_dist_dummy = to_categorical(y_dist,num_classes)
    y_val_dummy = to_categorical(y_val,num_classes)
    y_test_dummy = to_categorical(y_test, num_classes)
    
    
    # checkpoint necessary to save the parameters of the i-th network at the end of each epoch
    my_checkpoint = [
        ModelCheckpoint(filepath = path_parametres, monitor='val_loss', 
                        verbose=0, save_best_only=False, save_weights_only=False, 
                        mode='auto', save_freq='epoch')
    ]
    

    # to create networks that are always initialized in the same way
    model = load_model(path_same_init)
    
    model.compile(optimizer='sgd',loss='categorical_crossentropy',metrics=['accuracy'])
    
    # to save the network before starting training
    model.save('E:/Fonti di Variazione/Distorted/epoca0-200/Checkpoint/model_NN'+str(i)+'_ep0.hdf5')
    
    # network fit process with performance saving on the training set
    pd.DataFrame(model.fit(X_dist, y_dist_dummy, validation_data=(X_val, y_val_dummy), epochs=num_epochs, 
                batch_size=b_size, shuffle=False,callbacks=[my_checkpoint]).history).to_excel(path_history)
    
    
    evaluated.append(model.evaluate(X_test, y_test_dummy))

path_evaluation = 'E:/Fonti di Variazione/Distorted/epoca0-200/Evaluation/evaluated_models.xlsx'
create_dir(path_evaluation)
pd.DataFrame(evaluated).to_excel(path_evaluation)

## Rand. Init. + BS ensemble

In [None]:
evaluated = []            # list containing the accuracy and loss results of each network at the end of the training


for i in range(num_NN):
    # Checkpoint
    cwd = os.getcwd()
    
    # path in which to save the parameters (weights and biases) of the i-th network
    path_parametres = 'E:/Fonti di Variazione/Rand. Init. + BS/epoca0-200/Checkpoint/model_NN'+str(i)+'_ep{epoch:d}.hdf5'
    
    # path in which to save the loss and accuracy of each network at the end of each epoch
    path_history = 'E:/Fonti di Variazione/Rand. Init. + BS/epoca0-200/loss_accuracy/history'+str(i)+'.xlsx'
    
    # creation of directories associated with paths
    create_dir(path_parametres)   
    create_dir(path_history)
    
    
    # checkpoint necessary to save the parameters of the i-th network at the end of each epoch
    my_checkpoint = [
        ModelCheckpoint(filepath = path_parametres, monitor='val_loss', 
                        verbose=0, save_best_only=False, save_weights_only=False, 
                        mode='auto', save_freq='epoch')
    ]

    
    # creation of networks with random initialization

    model = Sequential([Dropout(0, input_dim=X_train.shape[1]),
                    Dense(512, activation='relu'),
                    Dropout(0),
                    Dense(128,activation='relu'),
                    Dropout(0),
                    Dense(num_classes,activation='softmax')])

    model.compile(optimizer='sgd',loss='categorical_crossentropy',metrics=['accuracy'])
    
    # to save the network before starting training
    model.save('E:/Fonti di Variazione/Rand. Init. + BS/epoca0-200/Checkpoint/model_NN'+str(i)+'_ep0.hdf5')
    
    
    # network fit process with performance saving on the training set
    pd.DataFrame(model.fit(X_train, y_train_dummy, validation_split=0.2, epochs=num_epochs, 
                batch_size=b_size, shuffle=True,callbacks=[my_checkpoint]).history).to_excel(path_history)
    
    
    evaluated.append(model.evaluate(X_test, y_test_dummy))

path_evaluation = 'E:/Fonti di Variazione/Rand. Init. + BS/epoca0-200/Evaluation/evaluated_models.xlsx'
create_dir(path_evaluation)
pd.DataFrame(evaluated).to_excel(path_evaluation)

## Rand. Init. + BS + Drop ensemble

In [None]:
evaluated = []            # list containing the accuracy and loss results of each network at the end of the training


for i in range(num_NN):
    # Checkpoint
    cwd = os.getcwd()
    
    # path in which to save the parameters (weights and biases) of the i-th network
    path_parametres = 'E:/Fonti di Variazione/Rand. Init. + BS + Drop/epoca0-200/Checkpoint/model_NN'+str(i)+'_ep{epoch:d}.hdf5'
    
    # path in which to save the loss and accuracy of each network at the end of each epoch
    path_history = 'E:/Fonti di Variazione/Rand. Init. + BS + Drop/epoca0-200/loss_accuracy/history'+str(i)+'.xlsx'
    
    # creation of directories associated with paths
    create_dir(path_parametres)   
    create_dir(path_history)
    
    
    # checkpoint necessary to save the parameters of the i-th network at the end of each epoch
    my_checkpoint = [
        ModelCheckpoint(filepath = path_parametres, monitor='val_loss', 
                        verbose=0, save_best_only=False, save_weights_only=False, 
                        mode='auto', save_freq='epoch')
    ]


    # creation of networks with random initialization

    model = Sequential([Dropout(0.5, input_dim=X_train.shape[1]),
                    Dense(512, activation='relu'),
                    Dropout(0.5),
                    Dense(128,activation='relu'),
                    Dropout(0.5),
                    Dense(num_classes,activation='softmax')])

    model.compile(optimizer='sgd',loss='categorical_crossentropy',metrics=['accuracy'])
    
    # to save the network before starting training
    model.save('E:/Fonti di Variazione/Rand. Init. + BS + Drop/epoca0-200/Checkpoint/model_NN'+str(i)+'_ep0.hdf5')
    
    
    # to modify the dropout rates of the loaded network starting from the fixed initialization
    model.layers[0].rate = 0.5
    model.layers[2].rate = 0.5
    model.layers[4].rate = 0.5
    
    
    # network fit process with performance saving on the training set
    pd.DataFrame(model.fit(X_train, y_train_dummy, validation_split=0.2, epochs=num_epochs, 
                batch_size=b_size, shuffle=True,callbacks=[my_checkpoint]).history).to_excel(path_history)
    
    
    evaluated.append(model.evaluate(X_test, y_test_dummy))

path_evaluation = 'E:/Fonti di Variazione/Rand. Init. + BS + Drop/epoca0-200/Evaluation/evaluated_models.xlsx'
create_dir(path_evaluation)
pd.DataFrame(evaluated).to_excel(path_evaluation)

## Distorted + Rand. Init. + BS ensemble

In [None]:
# specify the arguments
rotation_range_val = 5
width_shift_val = 0.25
height_shift_val = 0.25
shear_range_val = 45
zoom_range_val=[1.0,1.5]
# import relevant library
from tensorflow.keras.preprocessing.image import ImageDataGenerator

for i in range(num_NN):
    # Checkpoint
    cwd = os.getcwd()
    
    # path in which to save the parameters (weights and biases) of the i-th network
    path_parametres = 'E:/Fonti di Variazione/Distorted + Rand. Init. + BS/epoca0-200/Checkpoint/model_NN'+str(i)+'_ep{epoch:d}.hdf5'
    
    # path in which to save the loss and accuracy of each network at the end of each epoch
    path_history = 'E:/Fonti di Variazione/Distorted + Rand. Init. + BS/epoca0-200/loss_accuracy/history'+str(i)+'.xlsx'
    
    # creation of directories associated with paths
    create_dir(path_parametres)   
    create_dir(path_history)
    
    
    # distorted dataset creation
    datagen = ImageDataGenerator(rotation_range=rotation_range_val, width_shift_range=width_shift_val,
                             height_shift_range=height_shift_val, zoom_range=zoom_range_val,
                             shear_range=shear_range_val)

    datagen.fit(X_train.reshape(X_train.shape[0], 28, 28, 1))
    data_dist = datagen.flow(X_train.reshape(X_train.shape[0], 28, 28, 1),y_train.reshape(y_train.shape[0], 1),
                      batch_size=X_train.shape[0],shuffle=False)
    
    X, y = data_dist.next()
    X_dist = X.reshape(X.shape[0], 28, 28)
    y_dist = y.reshape(y.shape[0])
    
    # data reshaping
    X_dist = X_dist.reshape(X_dist.shape[0],28*28)

    # data normalization
    X_dist = X_dist/X_dist.max()
    
    # One-hot encoding of the label
    y_dist_dummy = to_categorical(y_dist,num_classes)
    y_val_dummy = to_categorical(y_val,num_classes)
    y_test_dummy = to_categorical(y_test, num_classes)
    
    
    # checkpoint necessary to save the parameters of the i-th network at the end of each epoch
    my_checkpoint = [
        ModelCheckpoint(filepath = path_parametres, monitor='val_loss', 
                        verbose=0, save_best_only=False, save_weights_only=False, 
                        mode='auto', save_freq='epoch')
    ]
    

    # creation of networks with random initialization

    model = Sequential([Dropout(0, input_dim=X_train.shape[1]),
                    Dense(512, activation='relu'),
                    Dropout(0),
                    Dense(128,activation='relu'),
                    Dropout(0),
                    Dense(num_classes,activation='softmax')])

    model.compile(optimizer='sgd',loss='categorical_crossentropy',metrics=['accuracy'])
    
    # to save the network before starting training
    model.save('E:/Fonti di Variazione/Distorted + Rand. Init. + BS/epoca0-200/Checkpoint/model_NN'+str(i)+'_ep0.hdf5')
    
    # network fit process with performance saving on the training set
    pd.DataFrame(model.fit(X_dist, y_dist_dummy, validation_data=(X_val, y_val_dummy), epochs=num_epochs, 
                batch_size=b_size, shuffle=True,callbacks=[my_checkpoint]).history).to_excel(path_history)
    
    
    evaluated.append(model.evaluate(X_test, y_test_dummy))

path_evaluation = 'E:/Fonti di Variazione/Distorted + Rand. Init. + BS/epoca0-200/Evaluation/evaluated_models.xlsx'
create_dir(path_evaluation)
pd.DataFrame(evaluated).to_excel(path_evaluation)