In [1]:
 ##%config Completer.use_jedi = False 
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec
import sklearn as skl
import sklearn.preprocessing as skl_pre
import sklearn.linear_model as skl_lin
from keras.models import Sequential
from keras.layers import Flatten, Dense, InputLayer
from keras.losses import SparseCategoricalCrossentropy
from keras.utils import np_utils
import sklearn.mixture as skl_mix
import copy
from keras.datasets import mnist


def matrix_to_vect(mnist_digits):
    return np.reshape(mnist_digits, (-1, 784))

## Load In Data ##
(X_train_28x28, y_train), (X_test_28x28, y_test) = mnist.load_data()
X_train_28x28 = X_train_28x28.astype('float32') / 255.
X_test_28x28 = X_test_28x28.astype('float32') / 255.
## Standardize Features to Standard Gaussians, on the flattened vector ##
X_train = matrix_to_vect(X_train_28x28)
X_test = matrix_to_vect(X_test_28x28)

scaler = skl_pre.StandardScaler()
X_train = scaler.fit_transform(X_train)
# Note we use the same transformation on the test set.
X_test = scaler.transform(X_test)



X_train_28x28x1 = X_train_28x28[..., None]
X_test_28x28x1 = X_test_28x28[..., None]


Training data split: For each model m_i with a corresponding cluster c_i, we first give it all the data in c_i, then bag/resample the remaining, 80% data from main clustering, 20% from all clusters (inclusive).

Idea: Resample and train using the generated gaussians.

In [2]:
X_data_resampled = []
Y_data_resampled = []
X_range = np.arange(0, X_train.shape[0])
k_models = 15
for i in range(k_models):
    idx = np.random.choice(X_range,X_train.shape[0])
    X_i = X_train_28x28x1[idx]
    Y_i = y_train[idx]
    X_data_resampled.append(X_i)
    Y_data_resampled.append(Y_i)

In [6]:
## Do LeNet-5 Architecture for EACH data set
##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~##
## LeNet-5 Architecture keras code is taken from RPI FALL 2020 CSCI 4961 - Machine Learning & Optimization notes, by Prof. Alex Gittens ##
## Note that the Architecture itself is from "Gradient Based Learning Applied to Document Recognition", (LeCun et al., 1998)            ##
##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~##
from keras.models import Sequential
from keras.layers import Conv2D, AveragePooling2D, Flatten, Dense, InputLayer
from keras.losses import SparseCategoricalCrossentropy
from keras.utils import np_utils

models = []
for m in range(k_models):
    lenet = Sequential([
    InputLayer(input_shape=(28, 28, 1)),
    Conv2D(6, kernel_size=(5,5), strides=(1,1), activation='tanh', padding="same", name="C1"),
    AveragePooling2D(pool_size=(2,2), strides=(1,1), padding='valid', name="A1"), # no padding before pooling,
    Conv2D(16, kernel_size=(5,5), strides=(1,1), activation='tanh', name="C2"), # by default padding is "valid",
    AveragePooling2D(pool_size=(2,2), strides=(2,2), padding='valid', name="A2"),
    Conv2D(120, kernel_size=(5,5), strides=(1,1), activation='tanh', padding='valid', name="C3"),
    Flatten(name="F"),
    Dense(84, activation='tanh', name="D1"),
    Dense(10, activation='softmax', name="D2")])
    lenet.compile(loss=SparseCategoricalCrossentropy(), optimizer='adam', metrics=['accuracy'])
    models.append(lenet)
    

In [None]:
hist = []
for m in range(k_models):
    history = (models[m]).fit(X_data_resampled[m], Y_data_resampled[m], epochs=10, batch_size=128, verbose=1)
    hist.append(history)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
E

In [9]:


test_pred = np.zeros([X_test.shape[0], 10]) ## 10 for 10 classes in fashion mnist
for m in range(k_models):
    test_pred += (models[m]).predict(X_test_28x28x1) ## 10000 x 10?
test_pred /= X_test.shape[0]
Y_test_pred = np.argmax(test_pred, axis = 1) ## get the class!
error = np.mean(np.where(y_test - Y_test_pred != 0, 1, 0)) ## different classes
print(error)

0.0109
