In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import ruggero_detector as rd
import string
import numpy as np
import matplotlib.pyplot as plt

# Keras imports
from keras.layers import multiply, Input, Dense
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from keras.callbacks import ModelCheckpoint
from keras.utils import to_categorical
import keras.backend as K

# 2019-02-15 Multiple Ruggero detector
I want to build a Ruggero detector in the case where there are multiple categories.

We need to prepare the data differently now.

In [None]:
def prepare_data(sentence_length, ntrain, nvalid, ntest, alpha_to_n, easter_eggs, noise=None) :
    
    # get data on the data set to build
    N = ntrain + nvalid + ntest
    n = len(easter_eggs)
    
    # build the sentences
    sentences = []
    targets = []
    for i, easter_egg in enumerate(easter_eggs) :
        sentences.append(rd.generate_sentences(N, sentence_length, alpha_to_n,
                                               easter_egg=easter_egg))
        targets.append(i*np.ones(N, dtype=np.int32))
        
    # now stack everything and shuffle
    data = np.vstack(tuple(sentences))
    targets = np.concatenate(tuple(targets))
    
    # shuffle the data
    data, targets = rd.shuffle_data(data, targets)
    
    # now we partition data and targets into train, valid, and test sets
    train_data = data[ : n*ntrain, :]
    train_targets = targets[ : n*ntrain]
    valid_data = data[n*ntrain : n*(ntrain+nvalid), :]
    valid_targets = targets[n*ntrain : n*(ntrain+nvalid)]
    test_data = data[n*(ntrain+nvalid) : , :]
    test_targets = targets[n*(ntrain+nvalid) : ]
    
    # one further step is required, then return
    train_data = to_categorical(np.expand_dims(train_data, axis = 2))
    valid_data = to_categorical(np.expand_dims(valid_data, axis = 2))
    test_data = to_categorical(np.expand_dims(test_data, axis = 2))
    return train_data, train_targets,\
           valid_data, valid_targets,\
           test_data, test_targets

In [None]:
# build our data set
easter_eggs = [None, 'ruggero', 'cortini']
ntrain = 10000
nvalid = 2000
ntest = 2000
sentence_length = 80
train_data, train_targets,\
valid_data, valid_targets,\
test_data, test_targets = prepare_data(sentence_length, ntrain, nvalid, ntest,
                                       rd.alpha_to_n, easter_eggs)

Let's quickly check that everything's working

In [None]:
for i in range(100) :
    s = test_data[i].argmax(axis=1)
    print(s)
    print(rd.decode_sentence(s, n_to_alpha=rd.n_to_alpha))
    print(test_targets[i])

Okay, so now let's build our AI, this time with several neurons in the output layer.

In [None]:
model = Sequential()

# LSTM layer
lstm_units = 32
model.add(LSTM(lstm_units, return_sequences=False, input_shape=(None, rd.nletters)))

# output layer
model.add(Dense(len(easter_eggs), activation='softmax'))

# compile
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# define checkpointer and fit the model
checkpointer = ModelCheckpoint(filepath='../data/multi-ruggero-detector.hdf5', 
                               verbose=1, save_best_only=True)
model.fit(train_data, to_categorical(train_targets),
          batch_size=32,
          epochs=10,
          validation_data=(valid_data, to_categorical(valid_targets)),
          callbacks=[checkpointer])

In [None]:
score, acc = model.evaluate(test_data, to_categorical(test_targets),
                            batch_size=32,
                            verbose=2)
print('Test score:', score)
print('Test accuracy:', acc)

Okay so this is extremely accurate in distinguishing between the three categories. Let's have a look at the activations of the three output neurons.

In [None]:
def network_response(m, data) :
    """
    Outputs the response of the network as a function of the position in the sentence.
    To speed up things, it passes series of data points in parallel.
    """
    # get info on the data that was passed
    nsentences = data.shape[0]
    sentence_length = data.shape[1]
    ndims = m.output_shape[1]
    N = sentence_length-1
    
    # init the output data structure
    predictions = np.zeros((nsentences, N, ndims))
    for i in range(1, sentence_length) :
        batch = data[:, :i, :]
        p = m.predict(batch)
        predictions[:, i-1, :] = p

    return predictions

In [None]:
# get the predictions of the network as a function of the letters read
predictions = network_response(model, test_data[0:7])

In [None]:
i = 5

fig = plt.figure(figsize=(15,3))
plt.plot(range(1, sentence_length), predictions[i, :, 0], color='b', label='Neuron 0')
plt.plot(range(1, sentence_length), predictions[i, :, 1], color='r', label='Neuron 1')
plt.plot(range(1, sentence_length), predictions[i, :, 2], color='k', label='Neuron 2')

sentence = rd.decode_sentence(test_data[i].argmax(axis=1), n_to_alpha=rd.n_to_alpha)
plt.xticks(np.arange(1, sentence_length), sentence)

plt.legend(loc='center right')
plt.show()

Okay, here the behaviour of the system is what we would expect. Let's try to see what is the result if both targets are in the sentence.

In [None]:
encoding.shape

In [None]:
sentence_1 = 'lkjdfnvkjsdnfvlruggerodlkjbnsdkjbnlsdjkgbnsidkbnsdcortiniodkfnvboslodnfoivnsidfvnisdfnvs'
sentence_2 = 'lkjdfnvkjsdnfvlcortinidlkjbnsdkjbnlsdjkgbnsidkbnsdruggeroodkfnvboslodnfoivnsidfvnisdfnvs'
encoding_1 = rd.encode_sentence(sentence_1, alpha_to_n=rd.alpha_to_n)
encoding_2 = rd.encode_sentence(sentence_2, alpha_to_n=rd.alpha_to_n)
encoding = np.array((to_categorical(encoding_1, num_classes=rd.nletters),
                      to_categorical(encoding_2, num_classes=rd.nletters)))
p = network_response(model, encoding)

fig, axes = plt.subplots(2, 1, figsize=(15,6))
axes[0].plot(range(1, len(sentence_1)), p[0, :, 0], color='b', label='Neuron 0')
axes[0].plot(range(1, len(sentence_1)), p[0, :, 1], color='r', label='Neuron 1')
axes[0].plot(range(1, len(sentence_1)), p[0, :, 2], color='k', label='Neuron 2')
axes[0].set_xticks(np.arange(1, len(sentence_1)))
axes[0].set_xticklabels(sentence_1)

axes[1].plot(range(1, len(sentence_2)), p[1, :, 0], color='b', label='Neuron 0')
axes[1].plot(range(1, len(sentence_2)), p[1, :, 1], color='r', label='Neuron 1')
axes[1].plot(range(1, len(sentence_2)), p[1, :, 2], color='k', label='Neuron 2')
axes[1].set_xticks(np.arange(1, len(sentence_2)))
axes[1].set_xticklabels(sentence_2)

plt.legend(loc='center right')
plt.show()

So here there is an asymmetry between the two cases. If 'ruggero' is before 'cortini', a successive 'cortini' cannot make the model change its mind. The opposite is true. I'll build another data set and try to figure out why this is the case.

In [None]:
# build our data set
easter_eggs = [None, 'ruggero', 'cobbick']
ntrain = 10000
nvalid = 2000
ntest = 2000
sentence_length = 80
train_data_2, train_targets_2,\
valid_data_2, valid_targets_2,\
test_data_2, test_targets_2 = prepare_data(sentence_length, ntrain, nvalid, ntest,
                                       rd.alpha_to_n, easter_eggs)

In [None]:
model_2 = Sequential()

# LSTM layer
lstm_units = 32
model_2.add(LSTM(lstm_units, return_sequences=False, input_shape=(None, rd.nletters)))

# output layer
model_2.add(Dense(len(easter_eggs), activation='softmax'))

# compile
model_2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# define checkpointer and fit the model
checkpointer = ModelCheckpoint(filepath='../data/multi-ruggero-detector-2.hdf5', 
                               verbose=1, save_best_only=True)
model_2.fit(train_data_2, to_categorical(train_targets_2),
          batch_size=32,
          epochs=10,
          validation_data=(valid_data_2, to_categorical(valid_targets_2)),
          callbacks=[checkpointer])

In [None]:
score, acc = model_2.evaluate(test_data_2, to_categorical(test_targets_2),
                            batch_size=32,
                            verbose=2)
print('Test score:', score)
print('Test accuracy:', acc)

In [None]:
sentence_1 = 'lkjdfnvkjsdnfvlruggerodlkjbnsdkjbnlsdjkgbnsidkbnsdcobbickodkfnvboslodnfoivnsidfvnisdfnvs'
sentence_2 = 'lkjdfnvkjsdnfvlcobbickdlkjbnsdkjbnlsdjkgbnsidkbnsdruggeroodkfnvboslodnfoivnsidfvnisdfnvs'
encoding_1 = rd.encode_sentence(sentence_1, alpha_to_n=rd.alpha_to_n)
encoding_2 = rd.encode_sentence(sentence_2, alpha_to_n=rd.alpha_to_n)
encoding = np.array((to_categorical(encoding_1, num_classes=rd.nletters),
                      to_categorical(encoding_2, num_classes=rd.nletters)))
p = network_response(model_2, encoding)

fig, axes = plt.subplots(2, 1, figsize=(15,6))
axes[0].plot(range(1, len(sentence_1)), p[0, :, 0], color='b', label='Neuron 0')
axes[0].plot(range(1, len(sentence_1)), p[0, :, 1], color='r', label='Neuron 1')
axes[0].plot(range(1, len(sentence_1)), p[0, :, 2], color='k', label='Neuron 2')
axes[0].set_xticks(np.arange(1, len(sentence_1)))
axes[0].set_xticklabels(sentence_1)

axes[1].plot(range(1, len(sentence_2)), p[1, :, 0], color='b', label='Neuron 0')
axes[1].plot(range(1, len(sentence_2)), p[1, :, 1], color='r', label='Neuron 1')
axes[1].plot(range(1, len(sentence_2)), p[1, :, 2], color='k', label='Neuron 2')
axes[1].set_xticks(np.arange(1, len(sentence_2)))
axes[1].set_xticklabels(sentence_2)

plt.legend(loc='center right')
plt.show()

Great, so now we know that some sequences behave differently than others.