In [72]:
import numpy as np
import h5py
from tqdm import tqdm_notebook
from sklearn.model_selection import train_test_split
from keras.layers import Input, Dense
from keras.models import Model
from keras.callbacks import EarlyStopping

In [None]:
# Read the features
h5f = h5py.File('features.h5', 'r')
features = h5f['ResNet_features'][:]
h5f.close()

In [None]:
# Split the data up
train_size = 0.8
features_train, features_validate = train_test_split(features,
                                                     train_size=train_size,
                                                     test_size=1-train_size,
                                                     random_state=42)

In [40]:
def train_encoder(x_train, x_validate,
                  n_latent_vars, latent_activation, decoder_activation,
                  optimizer, loss, epochs, batch_size):
    # Make the tensors so that we can define the *coders
    n_dimen = x_train.shape[1]
    input_shape = (n_dimen,)
    input_tensor = Input(shape=input_shape)
    latent_tensor = Dense(n_latent_vars, activation=latent_activation)(input_tensor)
    output_tensor = Dense(n_dimen, activation=decoder_activation)(latent_tensor)

    # Make and train the autoencoder
    encoder = Model(input_tensor, latent_tensor)
    autoencoder = Model(input_tensor, output_tensor)
    autoencoder.compile(optimizer=optimizer, loss=loss)
    stopper = EarlyStopping(patience=3)
    autoencoder.fit(x_train, x_train,
                    epochs=epochs,
                    batch_size=batch_size,
                    shuffle=True,
                    validation_data=(x_validate, x_validate),
                    callbacks=[stopper])
    return autoencoder, encoder

In [80]:
# Train shallow [auto]encoder
n_latent_vars = 16
autoencoder, encoder = train_encoder(x_train=features_train,
                                     x_validate=features_validate,
                                     n_latent_vars=n_latent_vars,
                                     latent_activation='softplus',
                                     decoder_activation='softplus',
                                     optimizer='adam',
                                     loss='mean_squared_error',
                                     epochs=40,
                                     batch_size=16)

Train on 800 samples, validate on 200 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40


In [81]:
# Save the autoencoder and encoder
autoencoder.compile(optimizer='adam', loss='mean_squared_error')
autoencoder.save('autoencoder.h5')
encoder.compile(optimizer='adam', loss='mean_squared_error')
encoder.save('encoder.h5')

In [82]:
# Now let's actually use the encoder on all the images
encoded_features = np.empty((0, n_latent_vars))
for feature in features:
    encoded_features_ = encoder.predict(feature)
    encoded_features = np.append(encoded_features, encoded_features_, axis=0)

ValueError: Error when checking input: expected input_39 to have shape (2048,) but got array with shape (1,)

In [None]:
# And then save the encoded features
h5f = h5py.File('features.h5', 'w')
h5f.create_dataset('ResNet_features', data=features)
h5f.close()