In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import f1_score

import matplotlib.pyplot as plt

In [None]:
# Model / data parameters
num_classes = 10
input_shape = (28, 28, 1)
ndim=28

# Load the data and split it between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")


# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

In [None]:
fashion_mnist = tf.keras.datasets.fashion_mnist

(X_train_fs, y_train_fs), (X_test_fs, y_test_fs) = fashion_mnist.load_data()
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

In [None]:
fig, axarr = plt.subplots(1,6, figsize=(10,8))
for i in range(len(axarr)-3):
    axarr[i].imshow(x_train[i].reshape(ndim,ndim), cmap='gist_gray')
    axarr[i].set_title(f"Label={y_train[i].argmax()}")
    axarr[i].yaxis.set_visible(False)
    axarr[i].xaxis.set_visible(False)
    
for i in range(len(axarr)-3, len(axarr)):
    axarr[i].imshow(X_train_fs[i].reshape(ndim,ndim), cmap='gist_gray')
    axarr[i].set_title(f"Label={class_names[y_train_fs[i]]}")
    axarr[i].yaxis.set_visible(False)
    axarr[i].xaxis.set_visible(False)


In [None]:
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

In [None]:
from sklearn.neighbors import KNeighborsClassifier
def run_classifier(x_train_classifer, y_train_classifer, x_test_classifer, y_test_classifer):
        clf = KNeighborsClassifier(10)
        clf.fit(x_train_classifer.reshape((len(x_train_classifer), ndim*ndim)), y_train_classifer)

        y_pred = clf.predict(x_train_classifer.reshape((len(x_train_classifer), ndim*ndim)))
        clf_train_f1 = f1_score(y_train_classifer, y_pred, average='macro')
        y_pred = clf.predict(x_test_classifer.reshape((len(x_test_classifer), ndim*ndim)))
        clf_test_f1 = f1_score(y_test_classifer, y_pred, average='macro')

        print(f"Train F1-score= {(clf_train_f1):.3e}")
        print(f"Test F1-score= {(clf_test_f1):.3e}")
        
        return clf_train_f1, clf_test_f1

In [None]:
step=1
mean=0
std=0.05
noise_factor=1.0

In [None]:
autoencoder=keras.models.load_model('mnist_50epoch_128batch_50000train_autoencoder.h5')
encoder=keras.models.load_model('mnist_50epoch_128batch_50000train_encoder.h5')
decoder=keras.models.load_model('mnist_50epoch_128batch_50000train_decoder.h5')

In [None]:
img_per_class=1

labeled_train_data_counter=np.zeros(10)
labeled_train_data_indices=np.ones((10,img_per_class))*-1
for i, y in enumerate(y_train[50000:]):
    if labeled_train_data_counter.sum() == img_per_class*10:
        break
    y_arg = y.argmax()
    counter= int(labeled_train_data_counter[y_arg])
    if counter >= img_per_class:
        continue
    if labeled_train_data_indices[y_arg, counter] == -1:
        labeled_train_data_counter[y_arg] +=1
        labeled_train_data_indices[y_arg, counter] = i
        
labeled_train_data_indices=labeled_train_data_indices.astype(int)+50000

indices=labeled_train_data_indices.flatten('F')
x_train_classifer=x_train[indices].copy()
y_train_classifer=y_train[indices].copy().argmax(axis=1)

x_test_classifer=x_test.copy()
y_test_classifer=y_test.copy().argmax(axis=1)

print(x_train_classifer.shape)

hist_clf_train_f1, hist_clf_test_f1=[], []

clf_train_f1, clf_test_f1=run_classifier(x_train_classifer, y_train_classifer,
                                         x_test_classifer, y_test_classifer)
hist_clf_train_f1.append(clf_train_f1)
hist_clf_test_f1.append(clf_test_f1)

In [None]:
labeled_train_data_indices

In [None]:
def plot_orig_recon_imgs(labeled_img, noised_img, iteration, y_true):
    fig, axarr = plt.subplots(1,2)
    axarr[0].imshow(labeled_img, cmap='gist_gray')
    axarr[0].set_title("Original Image")
    axarr[0].yaxis.set_visible(False)
    axarr[0].xaxis.set_visible(False)
    axarr[1].imshow(noised_img[0], cmap='gist_gray')
    axarr[1].set_title("Reconstructed Image")
    axarr[1].yaxis.set_visible(False)
    axarr[1].xaxis.set_visible(False)
    fig.suptitle(f"Iteration {iteration}, Image Label: {y_true}")
    fig.savefig(f"plots/iteration{iteration}")
    # plt.show()
    plt.close()

In [None]:
idx=0
iteration=0

In [None]:
# x_unseen=x_train[50000:].copy()
# y_unseen=y_train[50000:].copy().argmax(axis=1)
rejected_counter=0
accepted_counter=0

while iteration < 10000:

    iteration+=1
    print(f"Iteration {iteration}:")
    
    train_size=x_train_classifer.shape[0]
    print("Train dataset size=", train_size)
    
    random_idx=np.random.choice(train_size, 1)[0]
    #idx=idx%(10*img_per_class)
    random_idx=idx
    print(random_idx)
    labeled_img = x_train_classifer[random_idx]
    encoded_data = encoder.predict(np.array([labeled_img]), verbose=0)
    noise_data = noise_factor * np.random.normal(mean, std, size=encoded_data.shape)
    perturbed_data = noise_data+encoded_data
    noised_img = decoder.predict(perturbed_data, verbose=0)
        
    plot_orig_recon_imgs(labeled_img, noised_img, iteration, y_train_classifer[random_idx])
    
    x_train_classifer = np.concatenate([x_train_classifer, noised_img], axis=0)
    y_train_classifer = np.concatenate([y_train_classifer, [y_train_classifer[random_idx]]])
    
    if iteration % 1000==0:
        clf_train_f1, clf_test_f1=run_classifier(x_train_classifer, y_train_classifer,
                                                 x_test_classifer, y_test_classifer)
        hist_clf_train_f1.append(clf_train_f1)
        hist_clf_test_f1.append(clf_test_f1)

    idx+=1

# Plots

In [None]:
np.savez('train_dataset_KNN', x_train=x_train_classifer, y_train=y_train_classifer)

In [None]:
plt.plot(hist_clf_train_f1)
plt.xlabel("Iteration")
plt.ylabel("F1-score")
plt.title("Classifier Train F1-score")

In [None]:
plt.plot(hist_clf_test_f1)
plt.xlabel("Iteration")
plt.ylabel("F1-score")
plt.title("Classifier Test F1-score")
# plt.yscale('log')
plt.show()