In [1]:
import keras
import numpy as np
from keras import backend as K
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
from keras.regularizers import l2
from keras.wrappers.scikit_learn import KerasClassifier
from keras.models import model_from_yaml
from modAL.models import ActiveLearner
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.python.keras.backend import eager_learning_phase_scope
from tensorflow.keras.models import Model

# tf.logging.set_verbosity(tf.logging.ERROR)

In [2]:
tf.config.list_physical_devices('GPU')

[]

In [3]:
def create_mlp_model():
    model = Sequential()
    model.add(Dense(256, input_dim=20, activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"])
    return model

In [4]:
def create_keras_model():
    model = Sequential()
    model.add(Conv2D(32, (4, 4), activation='relu'))
    model.add(Conv2D(32, (4, 4), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"])
    return model

In [5]:
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

### create the classifier

In [6]:
classifier = KerasClassifier(create_keras_model)

  classifier = KerasClassifier(create_keras_model)


### read training data

In [7]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

### preprocessing

In [8]:
X_train = X_train.reshape(60000, 28, 28, 1).astype('float32') / 255.
X_test = X_test.reshape(10000, 28, 28, 1).astype('float32') / 255.


In [9]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

from keras.layers import Input, Dense, Lambda, Layer
from keras.models import Model
from keras import backend as K
from keras import metrics
from keras.datasets import mnist

batch_size = 256
original_dim = 784
latent_dim = 20
intermediate_dim = 512
epochs = 60
epsilon_std = 1.0

# encoder architecture
x = Input(shape=(original_dim,))
encoder_h = Dense(intermediate_dim, activation='relu')(x)
z_mean = Dense(latent_dim)(encoder_h)
z_log_var = Dense(latent_dim)(encoder_h)

# sampling layer from latent distribution
def sampling(args):
    z_mean, z_log_var = args
    epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0., stddev=epsilon_std)
    return z_mean + K.exp(z_log_var / 2) * epsilon

#z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])
z = Lambda(sampling)([z_mean, z_log_var])

# decoder / generator architecture
decoder_h = Dense(intermediate_dim, activation='relu')
decoder_mean = Dense(original_dim, activation='sigmoid')
h_decoded = decoder_h(z)
x_decoded_mean = decoder_mean(h_decoded)

# Custom loss layer
class CustomVariationalLayer(Layer):
    def __init__(self, **kwargs):
        self.is_placeholder = True
        super(CustomVariationalLayer, self).__init__(**kwargs)

    def vae_loss(self, x, x_decoded_mean):
        xent_loss = original_dim * metrics.binary_crossentropy(x, x_decoded_mean)
        kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
        return K.mean(xent_loss + kl_loss)

    def call(self, inputs):
        x = inputs[0]
        x_decoded_mean = inputs[1]
        loss = self.vae_loss(x, x_decoded_mean)
        self.add_loss(loss, inputs=inputs)
        # We won't actually use the output.
        return x

# encoder to generate latent variables from input
encoder = Model(x, z_mean)

encoder.load_weights('mnist_vae_encoder_32.h5')

In [10]:
X_train_enhanced = encoder.predict(X_train.reshape((X_train.shape[0], 784)))
X_test_enhanced = encoder.predict(X_test.reshape((X_test.shape[0], 784)))

z = np.load("z.npy")
X_train_enhanced = z[:60000, :]
X_test_enhanced = z[60000:]

### initial labelled data
We initialize the labelled set with 20 balanced randomly sampled examples

In [11]:
initial_idx = np.array([],dtype=np.int)
for i in range(10):
    idx = np.random.choice(np.where(y_train==i)[0], size=2, replace=False)
    initial_idx = np.concatenate((initial_idx, idx))

X_initial = X_train[initial_idx]
y_initial = y_train[initial_idx]

X_initial_enhanced = X_train_enhanced[initial_idx]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  initial_idx = np.array([],dtype=np.int)


### initial unlabelled pool

In [12]:
X_pool = np.delete(X_train, initial_idx, axis=0)
y_pool = np.delete(y_train, initial_idx, axis=0)

In [13]:
X_pool_enhanced = np.delete(X_train_enhanced, initial_idx, axis=0)

## Query Strategies

### Uniform
All the acquisition function we will use will be compared to the uniform acquisition function $\mathbb{U}_{[0,1]}$ which will be our baseline that we would like to beat.

In [14]:
def uniform(learner, X, n_instances=1):
    query_idx = np.random.choice(range(len(X)), size=n_instances, replace=False)
    return query_idx, X[query_idx]

### Entropy
Our first acquisition function is the entropy:
$$ \mathbb{H} = - \sum_{c} p_c \log(p_c)$$
where $p_c$ is the probability predicted for class c. This is approximated by:
\begin{align}
p_c &= \frac{1}{T} \sum_t p_{c}^{(t)} 
\end{align}
where $p_{c}^{t}$ is the probability predicted for class c at the t th feedforward pass.

In [15]:
def max_entropy(learner, X, n_instances=1, T=100):
    random_subset = np.random.choice(X.shape[0], 2000, replace=False)
    fn = K.function([learner.estimator.model.layers[0].input,], [learner.estimator.model.layers[-1].output])
    with eager_learning_phase_scope(value=1):
        MC_samples = [fn(X[random_subset])[0] for _ in range(T)]
    MC_samples = np.array(MC_samples)  # [#samples x batch size x #classes]
    expected_p = np.mean(MC_samples, axis=0)
    acquisition = - np.sum(expected_p * np.log(expected_p + 1e-10), axis=-1)  # [batch size]
    idx = (-acquisition).argsort()[:n_instances]
    query_idx = random_subset[idx]
    return query_idx, X[query_idx]

### Variation Ratio
the Variation ratio is computed according to:
\begin{align}

\end{align}

In [16]:
def var_ratio(learner, X, n_instances=1, T=100):
    random_subset = np.random.choice(X.shape[0], 2000, replace=False)
    fn = K.function([learner.estimator.model.layers[0].input,], [learner.estimator.model.layers[-1].output])
    with eager_learning_phase_scope(value=1):
        MC_samples = [fn(X[random_subset])[0] for _ in range(T)]
    MC_samples = np.array(MC_samples)  # [#samples x batch size x #classes]
    preds = np.argmax(a, axis=2)
    mode, count = stats.mode(preds, axis=0)
    acquisition = (1 - count / preds.shape[1]).reshape((-1,))
    idx = (-acquisition).argsort()[:n_instances]
    query_idx = random_subset[idx]
    return query_idx, X[query_idx]

In [17]:
def bald(learner, X, n_instances=1, T=100):
    random_subset = np.random.choice(X.shape[0], 2000, replace=False)
    fn = K.function([learner.estimator.model.layers[0].input,], [learner.estimator.model.layers[-1].output])
    with eager_learning_phase_scope(value=1):
        MC_samples = [fn(X[random_subset])[0] for _ in range(T)]
    MC_samples = np.array(MC_samples)  # [#samples x batch size x #classes]
    expected_entropy = - np.mean(np.sum(MC_samples * np.log(MC_samples + 1e-10), axis=-1), axis=0)  # [batch size]
    expected_p = np.mean(MC_samples, axis=0)
    entropy_expected_p = - np.sum(expected_p * np.log(expected_p + 1e-10), axis=-1)  # [batch size]
    acquisition = entropy_expected_p - expected_entropy
    idx = (-acquisition).argsort()[:n_instances]
    query_idx = random_subset[idx]
    return query_idx, X[query_idx]

### Active Learning Procedure

In [18]:
def active_learning_procedure(query_strategy,
                              X_test,
                              y_test,
                              X_pool,
                              y_pool,
                              X_initial,
                              y_initial,
                              estimator,
                              epochs=50,
                              batch_size=128,
                              n_queries=400,
                              n_instances=10,
                              verbose=0):
    learner = ActiveLearner(estimator=estimator,
                            X_training=X_initial,
                            y_training=y_initial,
                            query_strategy=query_strategy,
                            verbose=verbose
                           )
    perf_hist = [learner.score(X_test, y_test, verbose=verbose)]
    for index in range(n_queries):
        query_idx, query_instance = learner.query(X_pool, n_instances)
        learner.teach(X_pool[query_idx], y_pool[query_idx], epochs=epochs, batch_size=batch_size, verbose=verbose)
        X_pool = np.delete(X_pool, query_idx, axis=0)
        y_pool = np.delete(y_pool, query_idx, axis=0)
        model_accuracy = learner.score(X_test, y_test, verbose=0)
        print('Accuracy after query {n}: {acc:0.4f}'.format(n=index + 1, acc=model_accuracy))
        perf_hist.append(model_accuracy)
    return perf_hist

In [18]:
estimator = KerasClassifier(create_mlp_model)
vae_bald_perf_hist = active_learning_procedure(bald,
                                           X_test_enhanced,
                                           y_test,
                                           X_pool_enhanced,
                                           y_pool,
                                           X_initial_enhanced,
                                           y_initial,
                                           estimator,)
np.save("/content/drive/My Drive/DBAL-master/keras_vae_bald.npy", vae_bald_perf_hist)

  estimator = KerasClassifier(create_mlp_model)


Accuracy after query 1: 0.5711
Accuracy after query 2: 0.5707
Accuracy after query 3: 0.6751
Accuracy after query 4: 0.7169
Accuracy after query 5: 0.7458
Accuracy after query 6: 0.7717
Accuracy after query 7: 0.7771
Accuracy after query 8: 0.7987


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "c:\users\pinar\onedrive\masaüstü\masterthesis\msthesisenv\lib\site-packages\IPython\core\interactiveshell.py", line 3444, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\pinar\AppData\Local\Temp/ipykernel_22880/2361012038.py", line 2, in <module>
    vae_bald_perf_hist = active_learning_procedure(bald,
  File "C:\Users\pinar\AppData\Local\Temp/ipykernel_22880/3064233826.py", line 26, in active_learning_procedure
    model_accuracy = learner.score(X_test, y_test, verbose=0)
  File "c:\users\pinar\onedrive\masaüstü\masterthesis\msthesisenv\lib\site-packages\modAL\models\base.py", line 275, in score
    return self.estimator.score(X, y, **score_kwargs)
  File "c:\users\pinar\onedrive\masaüstü\masterthesis\msthesisenv\lib\site-packages\keras\wrappers\scikit_learn.py", line 313, in score
    outputs = self.model.evaluate(x, y, **kwargs)
  File "c:\users\pinar\onedrive\masaüstü\masterthesis\msthesisenv\lib\site-pac

TypeError: object of type 'NoneType' has no len()

In [None]:
estimator = KerasClassifier(create_keras_model)
entropy_perf_hist = active_learning_procedure(max_entropy,
                                              X_test,
                                              y_test,
                                              X_pool,
                                              y_pool,
                                              X_initial,
                                              y_initial,
                                              estimator,)
np.save("/content/drive/My Drive/DBAL-master/keras_max_entropy.npy", entropy_perf_hist)

entropy_perf_hist = np.load("/content/drive/My Drive/DBAL-master/keras_max_entropy.npy")

In [None]:
estimator = KerasClassifier(create_keras_model)
bald_perf_hist = active_learning_procedure(bald,
                                           X_test,
                                           y_test,
                                           X_pool,
                                           y_pool,
                                           X_initial,
                                           y_initial,
                                           estimator,)

np.save("/content/drive/My Drive/DBAL-master/keras_bald.npy", bald_perf_hist)

bald_perf_hist = np.load("/content/drive/My Drive/DBAL-master/keras_bald.npy")

estimator = KerasClassifier(create_keras_model)
var_ratio_perf_hist = active_learning_procedure(var_ratio,
                                               X_test,
                                               y_test,
                                               X_pool,
                                               y_pool,
                                               X_initial,
                                               y_initial,
                                               estimator,)
np.save("keras_var_ratio.npy", var_ratio_perf_hist)

In [None]:
estimator = KerasClassifier(create_keras_model)
uniform_perf_hist = active_learning_procedure(uniform,
                                              X_test,
                                              y_test,
                                              X_pool,
                                              y_pool,
                                              X_initial,
                                              y_initial,
                                              estimator,)
np.save("/content/drive/My Drive/DBAL-master/keras_uniform.npy", uniform_perf_hist)

uniform_perf_hist = np.load("/content/drive/My Drive/DBAL-master/keras_uniform.npy")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
plt.xlabel("Number of Instances")
plt.ylabel("Accuracy")
plt.plot(entropy_perf_hist, label="entropy")
plt.plot(bald_perf_hist, label="bald")
plt.plot(uniform_perf_hist, label="uniform")
plt.plot(vae_bald_perf_hist, label="vae-bald")
plt.ylim([0.7,1])
plt.legend()