In [1]:
import numpy as np
import keras
import matplotlib.pyplot as plt
from gensim.corpora import Dictionary

%matplotlib inline

Using TensorFlow backend.


In [2]:
from s2sutils import *

In [4]:
# load models
dictionary = Dictionary.load('chartovec_big.dict')
chartovec_encoder = SentenceToCharVecEncoder(dictionary)

model = keras.models.load_model('s2s_big.h5')

encoder_model = keras.models.load_model('s2s_encoder_big.h5')

decoder_model = keras.models.load_model('s2s_decoder_big.h5')



In [5]:
numchars = len(chartovec_encoder.dictionary)
latent_dim = numchars + 20

print numchars
print latent_dim

93
113


In [6]:
def decode_sequence(input_sent, dictionary, maxlen=20, num_chars=93):
    # Encode the input as state vectors.
    input_seq = np.array([chartovec_encoder.encode_sentence(input_sent, endsig=True).toarray()])
    states_value = encoder_model.predict(input_seq)

    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1, num_chars))
    # Populate the first character of target sequence with the start character.
    target_seq[0, 0, dictionary.token2id['\n']] = 1.

    # Sampling loop for a batch of sequences
    # (to simplify, here we assume a batch of size 1).
    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)

        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_char = dictionary[sampled_token_index]
        decoded_sentence += sampled_char

        # Exit condition: either hit max length
        # or find stop character.
        if (sampled_char == '\n' or len(decoded_sentence) > maxlen):
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1, 1, num_chars))
        target_seq[0, 0, sampled_token_index] = 1.

        # Update states
        states_value = [h, c]

    return decoded_sentence

In [8]:
decode_sequence('Happy Holiday!', dictionary, maxlen=100)

u'first and something to see you to a single and so\n'

In [9]:
decode_sequence('Merry Christmas!', dictionary, maxlen=100)

u'city, and the subject of the soldier of the state of the state of the state of the state of the state'

In [10]:
decode_sequence('I love natural language processing and sequence to sequence model.', dictionary, maxlen=100)

u'"What a soldier to see you and the soldier of the state of the state of the state of the state of the'

In [11]:
decode_sequence('Python or R?', dictionary, maxlen=100)

u'"Sonya, and the superficial men and the\n'

In [14]:
decode_sequence('Merry Christmas', dictionary, maxlen=100)

u'chief and the superficial and the superficial\n'

In [15]:
decode_sequence('Christmas!', dictionary, maxlen=100)

u'life of the superficial and the superficial\n'

In [16]:
decode_sequence('Happy Christmas!', dictionary, maxlen=100)

u'cities, and the subject of the soldier of the state of the state of the state of the state of the sta'

In [17]:
decode_sequence('Sad Christmas!', dictionary, maxlen=100)

u'results of the superficial and the superficial\n'

In [18]:
decode_sequence('This is the best of time.', dictionary, maxlen=100)

u'"The men and the soldier of the soldier of the state of the state of the state of the state of the st'

In [19]:
decode_sequence('state of the soldier', dictionary, maxlen=100)

u'" crum and something to the sound of the state of the state of the state of the state of the state of'

In [20]:
decode_sequence('dfghds sfjksd sdfs qwejq ', dictionary, maxlen=100)

u'"avanced, and the same time and something to the sound of the state of the state of the state of the '