# Avoiding catastrophic interference with joint training

Neural network is consequtively trained on permutted MNIST data.

In **version 2b** only part of samples from previous tasks retained. Soft target values are used for these samples. This algorithm tries to reproduce Distilled Few-Shot Self Reminder (DFSR) of paper [J. Wen et al., Few-Shot Self Reminder to Overcome Catastrophic Forgetting].

In [1]:
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras import regularizers
from keras import utils
from keras.optimizers import SGD
import numpy as np
import matplotlib.pyplot as plt
#from scipy.misc import toimage
%matplotlib inline 
#import PIL.Image as pil
from progressbar import ProgressBar

Using TensorFlow backend.


In [2]:
num_epoch = 20
num_neur = 400 # number of neurons in hidden layers
num_class = 10
img_line = 784
#vspl = 0.2
bsize = 32
nb_retained = 5 # number of images per class to retain
nb_repeat = 1000 # number of times to repeat retained samples

In [3]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [4]:
X_train = X_train.reshape(60000, img_line)
X_train = X_train.astype('float32')
X_train /= 255
X_test = X_test.reshape(10000, img_line)
X_test = X_test.astype('float32')
X_test /= 255

Y_train = utils.to_categorical(y_train, 10)
Y_test = utils.to_categorical(y_test, 10)

model = Sequential()
l2 = 1e-4 # L2 regularization
model.add(Dense(num_neur, input_dim=img_line, activation="relu",kernel_regularizer=regularizers.l2(l2)))
model.add(Dense(num_neur, activation="relu",kernel_regularizer=regularizers.l2(l2)))
model.add(Dense(num_class, activation="softmax",kernel_regularizer=regularizers.l2(l2)))

model.compile(loss="categorical_crossentropy", optimizer=SGD(lr=0.001), metrics=["accuracy"])

## Training network  on data A

In [None]:
order_all = np.random.permutation(img_line)
tr_a = X_train[:,order_all]
ts_a = X_test[:,order_all]

score_a = np.zeros((num_epoch*3,2))
history = model.fit(tr_a, Y_train, batch_size=bsize, epochs=num_epoch, validation_data=(ts_a,Y_test), verbose=0)
score_a[:num_epoch,0] = range(num_epoch)
score_a[:num_epoch,1] = history.history['val_acc']

plt.plot(score_a[:num_epoch,0],score_a[:num_epoch,1])
plt.xlabel('Эпоха обучения')
plt.ylabel('Аккуратность')
plt.show()

## Training network on data B with episodes from data A

Pick several samples from dataset A

In [None]:
tr_a_part = np.empty([0,img_line])
for c in range(num_class): # pick nb_retained samples of each class
    ind = (Y_train[:,c]==1) # indices of samples of a particular class
    x_tmp = tr_a[ind,:]
    ind = np.random.permutation(x_tmp.shape[0]) # permute samples of class c
    x_tmp = x_tmp[ind,:]
    x_tmp = x_tmp[:nb_retained,:] # leave only some sample
    tr_a_part = np.concatenate((tr_a_part, x_tmp), axis=0) 
    
y_a_part = model.predict(tr_a_part) # find soft targets for the data
tr_a_part = np.tile(tr_a_part,(nb_repeat,1))# increase the number of dataset A samples 
y_a_part = np.tile(y_a_part,(nb_repeat,1))

Train network on a mix of datasets A and B

In [None]:
order_all = np.random.permutation(img_line)
tr_b = X_train[:,order_all]
ts_b = X_test[:,order_all]

tr_ab = np.concatenate((tr_b, tr_a_part), axis=0) # add samples from dataset A to the dataset B
Y_ab = np.concatenate((Y_train, y_a_part), axis=0) 

# permute training samples for proper validation set
order_all = np.random.permutation(tr_ab.shape[0])
tr_ab = tr_ab[order_all,:]
Y_ab = Y_ab[order_all,:]

score_b = np.zeros((num_epoch*2,2))
pbar = ProgressBar(maxval=num_epoch).start()
for epoch in range(num_epoch):
    model.fit(tr_ab, Y_ab, batch_size=bsize, epochs=1, verbose=0)
    score = model.evaluate(ts_a, Y_test, verbose=0)
    score_a[epoch+num_epoch,:] = [epoch+num_epoch,score[1]]
    score = model.evaluate(ts_b, Y_test, verbose=0)
    score_b[epoch,:] = [epoch+num_epoch,score[1]]
    pbar.update(epoch+1)
pbar.finish()

plt.plot(score_a[:num_epoch*2,0],score_a[:num_epoch*2,1],label='A')
plt.plot(score_b[:num_epoch,0],score_b[:num_epoch,1],label='B')
plt.xlabel('Эпоха обучения')
plt.ylabel('Аккуратность')
plt.legend()
plt.show()

## Training network on data C with episodes from datasets A and B

Pick several samples from dataset B

In [None]:
tr_b_part = np.empty([0,img_line])
for c in range(num_class): # pick nb_retained samples of each class
    ind = (Y_train[:,c]==1) # indices of samples of a particular class
    x_tmp = tr_b[ind,:]
    ind = np.random.permutation(x_tmp.shape[0]) # permute samples of class c
    x_tmp = x_tmp[ind,:]
    x_tmp = x_tmp[:nb_retained,:] # leave only some sample
    tr_b_part = np.concatenate((tr_b_part, x_tmp), axis=0) 
    
y_b_part = model.predict(tr_b_part) # find soft targets for the data
tr_b_part = np.tile(tr_b_part,(nb_repeat,1))# increase the number of dataset A samples 
y_b_part = np.tile(y_b_part,(nb_repeat,1))

In [None]:
# create dataset C
order_all = np.random.permutation(img_line)
tr_c = X_train[:,order_all]
ts_c = X_test[:,order_all]

# add A and B data to the training data C
tr_abc = np.concatenate((tr_c, tr_a_part, tr_b_part), axis=0) 
Y_abc = np.concatenate((Y_train, y_a_part, y_b_part), axis=0)
# permute training samples for proper validation set
order_all = np.random.permutation(tr_abc.shape[0])
tr_abc = tr_abc[order_all,:]
Y_abc = Y_abc[order_all,:]

score_c = np.zeros((num_epoch,2))
pbar = ProgressBar(maxval=num_epoch).start()
for epoch in range(num_epoch):
    model.fit(tr_abc, Y_abc, batch_size=bsize, epochs=1, verbose=0)
    score = model.evaluate(ts_a, Y_test, verbose=0)
    score_a[epoch+num_epoch*2,:] = [epoch+num_epoch*2,score[1]]
    score = model.evaluate(ts_b, Y_test, verbose=0)
    score_b[epoch+num_epoch,:] = [epoch+num_epoch*2,score[1]]
    score = model.evaluate(ts_c, Y_test, verbose=0)
    score_c[epoch,:] = [epoch+num_epoch*2,score[1]]
    pbar.update(epoch+1)
pbar.finish()

plt.plot(score_a[:,0],score_a[:,1],label='A')
plt.plot(score_b[:,0],score_b[:,1],label='B')
plt.plot(score_c[:,0],score_c[:,1],label='C')
plt.xlabel('Эпоха обучения')
plt.ylabel('Аккуратность')
plt.legend()
plt.show()

In [None]:
np.savez('Distilled_Few-Shots_4',score_a=score_a,score_b=score_b,score_c=score_c)