In [54]:
from __future__ import print_function
import keras
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
import tensorflow
import os
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import normalize

In [64]:
#Define some parameters for training
batch_size = 128
num_classes = 10
epochs = 30
num_iterators = 50

In [56]:
#Load in the data (If you want f)
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

#Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

#Process
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples


In [57]:
#Load in the data (If you want undersampled dataset)
undersample_path = '/home/ec2-user/SageMaker/efs/Data/UnderSamples/'
(_,_), (x_test, y_test) = cifar10.load_data()
x_train = np.load(undersample_path + 'X9Prop0.1.npy')
x_train = x_train.astype('float32')
x_train /= 255
x_test = x_test.astype('float32')
x_test /= 255

y_train = np.load(undersample_path + 'Y9Prop0.1.npy')
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

In [58]:
#Set up the model function
def model_func():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), padding='same',
                     input_shape=x_train.shape[1:]))
    model.add(Activation('relu'))
    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), padding='same'))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))
    
    # initiate RMSprop optimizer
    opt = tensorflow.keras.optimizers.RMSprop(lr=0.0001, decay=1e-6)
    
    # Let's train the model using RMSprop
    model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])
    return model

In [None]:
model_store = []
history_store = []
alpha_store = []
weight_store = [np.array(x_train.shape[0]*[1/x_train.shape[0]])] #Initialize as 1/n for all
performance_matrix = np.zeros((num_classes,num_iterators)) #Accuracy of nth model on mth class

for i in range(num_iterators):
    #GAN
    from_idx = i * 4500
    to_idx = (i+1) * 4500
    gan_fakes = fakes[from_idx:to_idx]
    
    #augment x
    augmented_x_train = np.concatenate((x_train,gan_fakes),axis=0)
    augmented_y_train = np.concatenate((np.argmax(y_train,axis=1), np.array(4500*[9])))
    augmented_y_train = keras.utils.to_categorical(augmented_y_train, num_classes)
    augmented_weight_store = np.concatenate((weight_store[i],np.array(4500*[np.mean(weight_store[i])])))
    
    #Create current model, train it, store it and it's history.
    curr_model = model_func()
    curr_history = curr_model.fit(augmented_x_train, augmented_y_train, batch_size=batch_size, epochs=epochs, sample_weight=augmented_weight_store, validation_data=(x_test, y_test), shuffle=True)
    model_store.append(curr_model)
    history_store.append(curr_history)
    
    #Recalculate the weights, update weights
    prediction = curr_model.predict(x_train) #predict    
    predict_matrix = (prediction == prediction.max(axis=1)[:,None]).astype(int) #Convert to 1-0 matrix
    match_matrix = y_train - predict_matrix #Difference to eventually generate match_vector
    match_vector = np.sum(np.abs(match_matrix), axis = 1)/2 #1 indicates no match
    error = np.sum(weight_store[i] * match_vector)/np.sum(weight_store[i]) #Calculate error
    alpha = np.log((1-error)/error) + np.log(num_classes-1) #Calulate alpha
    alpha_store.append(alpha) #Append
    new_weight = weight_store[i] * np.exp(alpha * match_vector) #Calculate new weights
    new_weight /= np.sum(new_weight) #Normalize
    weight_store.append(new_weight) #Append
    
    #Update our performance matrix
    prediction_vector = np.argmax(prediction, axis = 1)
    actual_vector = np.argmax(y_train, axis = 1)
    for j in range(len(actual_vector)):
        if actual_vector[j]==prediction_vector[j]:
            performance_matrix[actual_vector[j]][i] += 1
            
    if i%10==0 or i == num_iterators-1:
        np.save('/home/ec2-user/SageMaker/efs/Data/GANBoostStore/Weights/new_weight_re_GB' + str(i) + '.npy', new_weight)
        np.save('/home/ec2-user/SageMaker/efs/Data/GANBoostStore/Alpha/alpha_re_GB' + str(i) + '.npy', alpha_store)
        curr_model.save_weights('/home/ec2-user/SageMaker/efs/Data/GANBoostStore/Model/model_re_GB' + str(i) + '.h5')

Train on 50000 samples, validate on 10000 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Train on 50000 samples, validate on 10000 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30




In [None]:
#Change performance matrix to accuracy figures:
normed_perf_matrix = np.zeros_like(performance_matrix)

for i in range(10):
    i_count = list(np.argmax(y_train,axis=1)).count(i)
    normed_perf_matrix[i] = performance_matrix[i]/i_count

#Normalize for algorithm:
normed_perf_matrix /= 0.5

In [None]:
#Make the final prediction
final_matrix = np.ones((len(x_test), num_classes))

for i in range(len(model_store)):
    prediction_vector = np.argmax(model_store[i].predict(x_test), axis = 1)
    for j in range(len(prediction_vector)):
        final_matrix[j][prediction_vector[j]] = final_matrix[j][prediction_vector[j]] * normed_perf_matrix[prediction_vector[j]][i]