### Pipeline
* Function data_processing: return X_train, y_train, X_test, y_test (numpy arrays)
* Train Full_NN on data and saving weights
* Extract base_CNN with saved_weights
* Function features_extraction(sub_NN, ...): see keras example
* Function output_layer (features, period_snapshot): top_layers training with snapshot of weights 
* Function predictive_distribution (x_test, weight_snapshot_folder, model)

### Imports

In [3]:
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.models import Model
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Input
from keras import backend as K

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [4]:
# parameters
batch_size = 128
num_classes = 10
epochs = 3

metrics=['accuracy']
loss=keras.losses.categorical_crossentropy
optimizer=keras.optimizers.Adadelta()
SGD=keras.optimizers.SGD

#from mnist import SGLD
#import mnist

### Data Pre-processing

In [5]:
# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# subset of mnist
x_train=x_train[:6000]
y_train=y_train[:6000]
x_test=x_test[:1000]
y_test=y_test[:1000]

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


### pre-training phase on CNN model (From F.Chollet book > 95% accuracy with 12 epochs)

In [6]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                     activation='relu',
                     input_shape=input_shape, name='conv_base1'))
model.add(Conv2D(64, (3, 3), activation='relu', name='conv_base2'))
model.add(MaxPooling2D(pool_size=(2, 2), name='conv_base3'))
model.add(Dropout(0.25, name='conv_base4'))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv_base1 (Conv2D)          (None, 26, 26, 32)        320       
_________________________________________________________________
conv_base2 (Conv2D)          (None, 24, 24, 64)        18496     
_________________________________________________________________
conv_base3 (MaxPooling2D)    (None, 12, 12, 64)        0         
_________________________________________________________________
conv_base4 (Dropout)         (None, 12, 12, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 9216)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               1179776   
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
__________

In [7]:
model.compile(loss=loss,
              optimizer=optimizer, metrics=metrics)

In [8]:
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Train on 6000 samples, validate on 1000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Test loss: 0.15278889936208725
Test accuracy: 0.953


In [9]:
model_path='./output/model/mnist_cnn.h5'
model.save(model_path)

### feature_extraction from conv_base model

In [10]:
# extract the base_cnn and load_weights from pre-trained network
output_layer=model.get_layer(index=3)

# avoir si obligé de recréer le modèle
sub_model=Model(inputs=model.input, outputs=output_layer.output) # only take the conv base of the model
print(sub_model.summary())

sub_model.load_weights(model_path,by_name=True)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv_base1_input (InputLayer (None, 28, 28, 1)         0         
_________________________________________________________________
conv_base1 (Conv2D)          (None, 26, 26, 32)        320       
_________________________________________________________________
conv_base2 (Conv2D)          (None, 24, 24, 64)        18496     
_________________________________________________________________
conv_base3 (MaxPooling2D)    (None, 12, 12, 64)        0         
_________________________________________________________________
conv_base4 (Dropout)         (None, 12, 12, 64)        0         
Total params: 18,816
Trainable params: 18,816
Non-trainable params: 0
_________________________________________________________________
None


In [11]:
# extract features from conv_base model
features_train=sub_model.predict(x_train)
print(features_train.shape)
features_test=sub_model.predict(x_test)
print(features_test.shape)

# reshaping
import numpy as np
features_train=np.reshape(features_train, (6000,12*12*64))
features_test=np.reshape(features_test, (1000,12*12*64))
print(features_train.shape)
print(features_test.shape)

(6000, 12, 12, 64)
(1000, 12, 12, 64)
(6000, 9216)
(1000, 9216)


### Last_layer algo

In [12]:
# define top_layers
top_model=Sequential()
# add Flatten
top_model.add(Dense(128, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(num_classes, activation='softmax'))

In [13]:
# train top_layers with weights snapshots every snapshot interval
epochs=10
top_model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
mc = keras.callbacks.ModelCheckpoint('./output/weights/weights{epoch:08d}.h5', 
                                     save_weights_only=True, period=2)

top_model.fit(features_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(features_test, y_test),callbacks=[mc])
#score = model.evaluate(features_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
top_model_path='./output/model/mnist_top_layer.h5'
top_model.save(top_model_path)

Train on 6000 samples, validate on 1000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.15278889936208725
Test accuracy: 0.953


In [14]:
# get each h5 files with weights
import os
import glob
weight_path='./output/weights/'
os.chdir(weight_path)
h5_files = [i for i in glob.glob('*.h5')]

In [15]:
# calculate predictive_score_distribution
n_snapshots=len(h5_files)
predictive_scores_distribution=np.zeros(shape=(n_snapshots,1000,10))
for index, weight_snapshot in zip(range(n_snapshots),h5_files):
    top_model.load_weights(weight_snapshot)
    prediction_snapshot=top_model.predict(features_test)
    predictive_scores_distribution[index]=prediction_snapshot

In [16]:
predictive_scores_distribution.shape

(5, 1000, 10)

In [17]:
# calculate predictive_class_distribution
n_snapshots=5
predictive_class_distribution=np.zeros(shape=(n_snapshots,1000))
for index, weight_snapshot in enumerate(h5_files):
    top_model.load_weights(weight_snapshot)
    prediction_snapshot=top_model.predict_classes(features_test)
    predictive_class_distribution[index]=prediction_snapshot

In [18]:
predictive_class_distribution.shape # np.transpose to permute dim of the numpy array

(5, 1000)

### Functions to create
* function full_model: classic input + names for layers, OUTPUT=model with named layers for conv_base

* function pre_training: input_data, hyperparameters, model, model_path, OUTPUT=history (+ model_saving)
* function extract_features: INPUT=index(for sub_NN), input_data, model_path, OUTPUT=reshaped features
* function train_lastLayer_checkpoints, INPUT=top_model, interval for checkpoint, weight_path, model_path, training_parameters (optimizer, loss, accuracy)// OUTPUT=saved_snapshots
* function predictive_scores_distribution (model, weight_path, n_snapshot), OUTPUT: 3D-array (n_snapshots, n_samples, n_classes)
* function predictive_class_distribution (model, weight_path, n_snapshot), OUTPUT: 2D-array (n_snapshots, n_samples)

#### other alternative
* function split_NN(full_model, index) // OUTPUT: a tuple of the base_model, top_model

----------

### Out-of-distribution sampling

In [20]:
selected_index=[]
for index in range(y_train.shape[0]):
    if np.sum(y_train[index][:5])==1:
        selected_index.append(index)
len(selected_index)

3075

In [21]:
x_train_restricted=x_train[selected_index]
x_train_restricted.shape
y_train_restricted=y_train[selected_index]

In [22]:
ood_index=[]
for index in range(y_test.shape[0]):
    if np.sum(y_train[index][:5])==0:
        ood_index.append(index)
len(ood_index)

490

In [23]:
x_test_ood=x_test[ood_index]
y_test_ood=y_test[ood_index]

In [24]:
#!jupyter nbconvert --to script last_layer_AM_MNIST.ipynb