In [1]:
import random
import numpy as np  
import pandas as pd
from keras.preprocessing import sequence
from keras.preprocessing import text
from keras.models import Sequential, Model
from keras.layers import Input, LSTM, Dense, Bidirectional, BatchNormalization, Dropout, Reshape
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from keras import backend as K
from keras import optimizers
from keras.utils import to_categorical

Using TensorFlow backend.


## Build Datasets

In [143]:
# Generate a dataset of strings comprising either odd or even integers, and their corresponding integer labels (1 or 2)

def generate_numstring(seed):
    numbers = [random.randrange(seed, 50, 2) for x in range(6)]
    numbers.sort()
    return " ".join([str(x) for x in numbers])

def build_dataset(n_samples):
    n_samples = n_samples/2
    evens_list = [generate_numstring(0) for x in range(n_samples)]
    evens_labels = [2 for x in range(n_samples)]
    odds_list = [generate_numstring(1) for x in range(n_samples)]
    odds_labels = [1 for x in range(n_samples)]
    strings_pre = evens_list + odds_list
    labels_pre = evens_labels + odds_labels
    merge = list(zip(strings_pre, labels_pre))
    random.shuffle(merge)
    strings, labels = zip(*merge)
    return(strings, labels)

data = build_dataset(100000)
X = data[0]
X2 = ["|"+x[:-1] for x in X]
y = data[1]

for row in range(10):
    print("{} = {}".format(X[row], y[row]))

0 6 18 22 26 32 = 2
5 9 13 15 23 35 = 1
13 17 25 25 29 43 = 1
3 23 37 43 49 49 = 1
6 6 20 36 40 44 = 2
3 7 11 33 41 41 = 1
11 19 19 21 27 31 = 1
8 18 22 26 40 46 = 2
2 6 10 18 34 46 = 2
9 11 13 31 35 49 = 1


In [144]:
# Train a simple character-level LSTM and verify accuracy

# Set Parameters
x_length = 18
training_ratio = .75
training_size = int(len(X)*training_ratio)
num_classes = 3
num_unique_symbols = 14
H = 252
epochs = 100
optimizer = 'rmsprop'
batch_size = 64
learning_rate = .0001

# Encode strings
t = text.Tokenizer(
    char_level=True,
    filters=None,
    lower=True,
    num_words=num_unique_symbols,
    oov_token='unk'
)

# Convert strings to sequences, pad them to uniform length, and divide up training and test sets
t.fit_on_texts(X2)
index_word = {v: k for k, v in t.word_index.items()}
X_seq = t.texts_to_sequences(X)
X2_seq = t.texts_to_sequences(X2)
X_padded = sequence.pad_sequences(X_seq, maxlen=x_length)
X2_padded = sequence.pad_sequences(X2_seq, maxlen=x_length)
X_train = X_padded[:training_size]
X2_train = X2_padded[:training_size]
X_test = X_padded[training_size:]
X2_test = X2_padded[training_size:]
y_train = y[:training_size]
y_test = y[training_size:]

# One-hot encode everything
encoded_X_train = to_categorical(X_train, num_classes=num_unique_symbols)
encoded_X2_train = to_categorical(X2_train, num_classes=num_unique_symbols)
encoded_X_test = to_categorical(X_test, num_classes=num_unique_symbols)
encoded_X2_test = to_categorical(X2_test, num_classes=num_unique_symbols)
encoded_y_train = to_categorical(y_train, num_classes=num_classes)
encoded_y_test = to_categorical(y_test, num_classes=num_classes)

## Classification Model

In [53]:
# A simple LSTM model to classify whether a string is even or odd
opt = optimizers.RMSprop(lr=learning_rate, rho=0.9, epsilon=None, decay=0.0)

model = Sequential()
model.add(LSTM(H, input_shape=(x_length, num_unique_symbols)))
model.add(Dense(num_classes, activation='softmax'))
model.compile(optimizer=opt,
              loss='categorical_crossentropy',
              metrics=['accuracy'])
print(model.summary())
callbacks = [EarlyStopping(monitor='val_loss', patience=2)]
model.fit(encoded_X_train, encoded_y_train, epochs=epochs, callbacks=callbacks, batch_size=batch_size,
          validation_data=(encoded_X_test, encoded_y_test))
scores = model.evaluate(encoded_X_test, encoded_y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_4 (LSTM)                (None, 64)                19712     
_________________________________________________________________
dense_4 (Dense)              (None, 3)                 195       
Total params: 19,907
Trainable params: 19,907
Non-trainable params: 0
_________________________________________________________________
None
Train on 7500 samples, validate on 2500 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Accuracy: 96.68%


## Seq2Seq Autoencoder Model

In [145]:
# Define Parameters for this model
num_unique_symbols = 14
x_length = 18
H = 256
epochs = 20
batch_size = 64
learning_rate = .0001

# define training encoder
encoder_inputs = Input(shape=(x_length, num_unique_symbols))
encoder = LSTM(H, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_inputs)
encoder_dense = Dense(num_classes, activation='softmax', name="encoder_final")
encoder_outputs = encoder_dense(encoder_outputs)
encoder_states = [state_h, state_c]

# define training decoder
decoder_inputs = Input(shape=(None, num_unique_symbols))
decoder_lstm = LSTM(H, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
decoder_dense = Dense(num_unique_symbols, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

# Combine training inputs into a single training model
model = Model([encoder_inputs, decoder_inputs], [encoder_outputs, decoder_outputs])

# define inference encoder
encoder_model = Model(encoder_inputs, [encoder_outputs] + encoder_states)

# define inference decoder
decoder_state_input_h = Input(shape=(H,))
decoder_state_input_c = Input(shape=(H,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)

In [None]:
# Compile and train the model
opt = optimizers.Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['acc'])
model.fit([encoded_X_train, encoded_X2_train], [encoded_y_train, encoded_X_train], epochs=epochs, batch_size=batch_size)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
17216/75000 [=====>........................] - ETA: 3:57 - loss: 0.0445 - encoder_final_loss: 6.0626e-04 - dense_9_loss: 0.0439 - encoder_final_acc: 0.9998 - dense_9_acc: 0.9918

## Translation

In [133]:
# Helper functions

def predict_sequence(infenc, infdec, source, n_steps, cardinality, translate=False):
    '''
    Given a source array, feed it through the autoencoder to predict a string - either itself in the naive case 
    where translation is turned off, or run gradient ascent to convert the source array to a target category,
    and run that through the autoencoder to get the translated version.
    '''
    source_string = one_hot_decode(index_word, source[0])
    # feed the source into the encoder inference model
    encode = infenc.predict(source)
    # make prediction of category for source sequence
    label_prediction_probs = encode[0][0]
    label_prediction = np.argmax(label_prediction_probs)
    source_label_prediction = "odd" if label_prediction == 1 else "even"
    source_label_certainty = label_prediction_probs[label_prediction]
    # start of sequence input
    target_seq = np.array([0.0 for _ in range(cardinality)]).reshape(1, 1, cardinality)
    
    # If set to translate, run gradient ascent to maximize to the target_label
    if translate:
        target_label = 1 if label_prediction==2 else 2
        new_source = translate_sequence(source, infenc, target_label)
        encode = infenc.predict(new_source)
    
    # feed the state into the decoder to make a prediction of the string
    state = encode[1:]
    output = list()
    for t in range(n_steps):
        # predict next char
        yhat, h, c = infdec.predict([target_seq] + state)
        # store prediction
        output.append(yhat[0,0,:])
        # update state
        state = [h, c]
        # update target sequence
        target_seq = yhat
    predicted_sequence = np.array([output])
    # Convert the response back to a string
    decode_string = one_hot_decode(index_word, predicted_sequence[0])
    
    # make prediction of category for predicted response
    decode_prediction = infenc.predict(predicted_sequence)
    label_prediction_probs = decode_prediction[0][0]
    label_prediction = np.argmax(label_prediction_probs)
    decode_label_prediction = "odd" if label_prediction == 1 else "even"
    decode_label_certainty = label_prediction_probs[label_prediction]
    
    return (source_string, source_label_prediction, source_label_certainty,
            decode_string, decode_label_prediction, decode_label_certainty)

def score_similarity(s1, s2):
    '''
    Measure the similarity of two strings character-by-character.  Not a very effective way of scoring this.
    '''
    list_s1 = list(s1)
    list_s2 = list(s2)
    score = 0
    increment = 100/len(s1)
    for char in range(len(list_s1)):
        if char == len(list_s1) or char == len(list_s2):
            break
        elif list_s1[char] == list_s2[char]:
            score += increment
    return score

def one_hot_decode(reverse_dict, encoded_seq):
    '''
    Turn a one-hot encoded array back into a readable string.
    '''
    retn = []
    seq = [np.argmax(vector) for vector in encoded_seq]
    for s in seq:
        if s > 0:
            retn.append(reverse_dict[s])
    return "".join(retn)

def normalize(x):
    # utility function to normalize a tensor by its L2 norm
    return x / (K.sqrt(K.mean(K.square(x))) + K.epsilon())
    #return x / K.max(x)

def translate_sequence(seq, model, target):
    '''
    Run gradient ascent to maximize a sequence to a target category
    '''
    target_probability = .95 # You want the model to be this certain the string is in the target category
    input_txt = model.input
    loss = K.mean(model.output[0][:, target])
    grads = K.gradients(loss, input_txt)[0]
    grads = normalize(grads)
    iterate = K.function([input_txt], [loss, grads])
    
    output_sequence = seq.copy()
    for i in range(20):
        loss_value, grads_value = iterate([output_sequence])
        output_sequence += grads_value
        
        probs = model.predict(output_sequence)[0][0]
        cat = np.argmax(probs)
        top_prob = probs[cat]
        if loss_value <= 0. or (cat==target and top_prob > target_probability):
            break
    return output_sequence

In [131]:
decoder_results = {'Source':[], 'Source Prediction':[], 'Source Certainty':[],
                   'Decoded':[], 'Decoded Prediction':[], 'Decoded Certainty':[], 'Similarity': []}
for _ in range(10):
    target = predict_sequence(encoder_model, decoder_model, encoded_X_test[[_]], x_length, num_unique_symbols)
    decoder_results['Source'].append(target[0])
    decoder_results['Source Prediction'].append(target[1])
    decoder_results['Source Certainty'].append(target[2])
    decoder_results['Decoded'].append(target[3])
    decoder_results['Decoded Prediction'].append(target[4])
    decoder_results['Decoded Certainty'].append(target[5])
    decoder_results['Similarity'].append(score_similarity(target[0], target[3]))


pd.DataFrame.from_dict(decoder_results)[['Source', 'Source Prediction', 'Source Certainty', 
                                         'Decoded', 'Decoded Prediction', 'Decoded Certainty',
                                         'Similarity']]

Unnamed: 0,Source,Source Prediction,Source Certainty,Decoded,Decoded Prediction,Decoded Certainty,Similarity
0,0 4 8 12 36 44,even,0.999992,0 4 8 12 32 44,even,0.999992,91
1,1 7 15 15 21 29,odd,0.999995,1 5 15 15 25 29,odd,0.999996,78
2,6 10 12 14 40 48,even,0.999973,0 10 16 14 42 48,even,0.999988,78
3,2 8 16 28 32 40,even,0.999987,2 8 18 28 30 40,even,0.99999,78
4,4 6 24 30 40 40,even,0.999992,4 6 20 30 40 40,even,0.999995,84
5,9 19 21 23 35 49,odd,0.999994,9 19 21 23 35 45,odd,0.999994,90
6,2 2 16 38 38 42,even,0.999869,2 2 18 38 38 42,even,0.999946,84
7,3 9 23 23 37 49,odd,0.999969,3 9 23 29 39 47,odd,0.999972,72
8,5 9 15 23 41 43,odd,0.999997,5 9 25 31 41 43,odd,0.999996,72
9,5 23 29 33 35 49,odd,0.999986,5 23 29 39 39 45,odd,0.999982,78


In [134]:
decoder_results = {'Source':[], 'Source Prediction':[], 'Source Certainty':[],
                   'Translated':[], 'Translated Prediction':[], 'Translated Certainty':[], 'Score': []}
for _ in range(10):
    target = predict_sequence(encoder_model, decoder_model, encoded_X_test[[_]], x_length, num_unique_symbols, translate=True)
    decoder_results['Source'].append(target[0])
    decoder_results['Source Prediction'].append(target[1])
    decoder_results['Source Certainty'].append(target[2])
    decoder_results['Translated'].append(target[3])
    decoder_results['Translated Prediction'].append(target[4])
    decoder_results['Translated Certainty'].append(target[5])
    decoder_results['Score'].append(score_similarity(target[0], target[3]))

pd.DataFrame.from_dict(decoder_results)[['Source', 'Source Prediction', 'Source Certainty', 
                                         'Translated', 'Translated Prediction', 'Translated Certainty',
                                         'Score']]

Unnamed: 0,Source,Source Prediction,Source Certainty,Translated,Translated Prediction,Translated Certainty,Score
0,0 4 8 12 36 44,even,0.999992,77777739939999 9,odd,0.999997,7
1,1 7 15 15 21 29,odd,0.999995,0000044444444888,even,0.999999,0
2,6 10 12 14 40 48,even,0.999973,777739999999999 9,odd,0.999992,0
3,2 8 16 28 32 40,even,0.999987,777777739999 99 9,odd,0.999995,6
4,4 6 24 30 40 40,even,0.999992,777737939999999 4,odd,0.999994,6
5,9 19 21 23 35 49,odd,0.999994,00040 44 48 48888,even,0.999997,0
6,2 2 16 38 38 42,even,0.999869,777777739999 99 4,odd,0.999995,12
7,3 9 23 23 37 49,odd,0.999969,880 40 40 48 4884,even,0.999998,30
8,5 9 15 23 41 43,odd,0.999997,00 0044444444888,even,0.999999,6
9,5 23 29 33 35 49,odd,0.999986,00 40 44 48848888,even,0.999996,0


## Scratch fields

In [141]:
target_category = 1
input_txt = encoder_model.input
layer_dict = dict([(layer.name, layer) for layer in encoder_model.layers[1:]])
layer_name = 'encoder_final'
layer_input = layer_dict[layer_name].input
loss = K.mean(encoder_model.output[0][:, target_category])
grads = K.gradients(loss, layer_input)[0]
grads = normalize(grads)
iterate = K.function([input_txt], [loss, grads])
step = 1.

In [142]:
output_sequence = encoded_X_test[[0]].copy()
for i in range(3):
    loss_value, grads_value = iterate([output_sequence])
    output_sequence += grads_value * step

    #revised_output = manual_prediction(output_sequence, weights)
    #cat = np.argmax(revised_output)
    #top_prob = revised_output[cat]
    
    #print('Current loss value: {}, predicted category: {}, certainty: {}'
    #      .format(loss_value, cat, top_prob))
    print(loss_value)
    if loss_value <= 0. or (cat==target_category and top_prob > .9):
        # some filters get stuck to 0, we can skip them
        break

ValueError: operands could not be broadcast together with shapes (1,18,14) (1,256) (1,18,14) 

In [140]:
input_txt

<tf.Tensor 'input_29:0' shape=(?, 18, 14) dtype=float32>

In [212]:
# Not sure this is necessary

def softmax(raw_preds):
    '''
    pass raw predictions through softmax activation function
    '''
    out = np.exp(raw_preds) # exponentiate vector of raw predictions
    return out/np.sum(out)

def manual_prediction(h, weights):
    '''
    Take raw model output and manually compute dense layer, softmax and return category prediction
    '''
    w_out = weights[0]
    b_out = weights[1]
    logits = np.matmul(h, w_out)+b_out
    return softmax(logits)[0]

layer_dict = dict([(layer.name, layer) for layer in encoder_model.layers])
weights = layer_dict['encoder_final'].get_weights()
w_out = weights[0]
b_out = weights[1]
encode = encoder_model.predict(encoded_X_test[[0]])
h = encode[1]
logits = np.matmul(h, w_out)+b_out
prediction = encode[0]
manual_prediction(h, weights, 2)

(1, 18, 14)