In [1]:
import numpy as np
import theano
import theano.tensor as T
import time
from itertools import chain
from sys import stdout

import lasagne as L

from SimpleRNNLM import SimpleRNNLM, iterate_minibatches
from mt_load import load_mt, get_mt_voc, get_w2v_embs

 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: GeForce GTX 780 (CNMeM is enabled with initial size: 30.0% of memory, cuDNN 5105)


In [2]:
# remember, now the pad value is the same as the <utt_end> token

pad_value = -1 # <utt_end>'s vector is the last one

In [3]:
mt_path = "/pio/data/data/mtriples/"
# mt_path = "/home/maciek/Desktop/mgr/DATA/MovieTriples_Dataset/"

train, valid, test = load_mt(path=mt_path, split=False, trim=200)

idx_to_w, w_to_idx, voc_size, freqs = get_mt_voc(path=mt_path, train_len=len(train))

In [4]:
word2vec_embs, word2vec_embs_mask = get_w2v_embs(mt_path)
w2v_train_mask = np.where(word2vec_embs_mask[:,0] == 1)[0]

In [5]:
def update_fn(loss, params):
    return L.updates.adagrad(loss, params, learning_rate=.01)

net = SimpleRNNLM(voc_size=voc_size,
                  emb_size=300,
                  rec_size=300,
                  mode='ssoft',
                  num_sampled=200,
                  emb_init=word2vec_embs,
                  ssoft_probs=freqs)

Building the model...
Compiling theano functions...
Building a network for generation...
Done


In [9]:
net.load_params(fname='w2vInit_300_300_ssoft200unigr_bs50_cut200_nosplit_early5.npz')

In [10]:
print net.validate(val_data=valid, batch_size=25)

Done 100 batches in 7.26s
Done 200 batches in 14.36s
Done 300 batches in 21.37s
Done 400 batches in 28.45s
Done 500 batches in 35.95s
Done 600 batches in 42.90s
Done 700 batches in 50.16s
Done 800 batches in 57.33s
Done 900 batches in 64.71s
3.38536382723


In [8]:
print net.validate(val_data=test, batch_size=25)

Done 100 batches in 7.33s
Done 200 batches in 14.68s
Done 300 batches in 22.09s
Done 400 batches in 29.32s
Done 500 batches in 36.63s
Done 600 batches in 43.84s
Done 700 batches in 51.01s
Done 800 batches in 58.45s
Done 900 batches in 65.95s
3.95432066999


In [9]:
L.layers.get_all_param_values(net.train_net)

[array([[ 0.1205289 ,  0.06720761,  0.00566821, ..., -0.17278215,
         -0.11692838,  0.41237643],
        [ 0.47035152, -0.25242484,  0.19176167, ..., -0.21812566,
         -0.2973913 , -0.05038602],
        [ 0.10282347,  0.28751189, -0.06337886, ...,  0.33019742,
          0.24158429,  0.27118179],
        ..., 
        [ 0.01506752, -0.88151652, -1.4447726 , ...,  1.23137307,
         -0.34998256,  2.56805658],
        [-0.528534  ,  0.50723785,  0.24611983, ..., -1.21289706,
         -1.44576716,  0.33012688],
        [-0.09265103,  0.07149383, -0.0485729 , ..., -0.07185069,
         -0.06424337, -0.01799409]], dtype=float32),
 array([[ -6.91524521e-03,  -9.38605797e-03,   1.11532293e-01, ...,
          -1.13870077e-01,   1.06219597e-01,  -6.53224625e-03],
        [  2.01458782e-01,  -1.98784858e-01,   3.90578806e-02, ...,
          -9.85075161e-02,  -4.91549149e-02,   2.77870242e-03],
        [  1.60697907e-01,  -2.12140918e-01,  -1.27519518e-01, ...,
          -6.92538917e-02

In [10]:
net.train_model(num_epochs=1,
                path=None,
                save_params=False,
                train_batch_size=50,
                train_data=train,
                val_batch_size=25,
                val_data=valid)

Done 10 batches in 2.49s	training loss:	6.906690
Done 20 batches in 4.99s	training loss:	6.335346
Done 30 batches in 7.33s	training loss:	6.101349
Done 40 batches in 9.75s	training loss:	5.940737
Done 50 batches in 12.49s	training loss:	5.816519
Done 60 batches in 14.86s	training loss:	5.696163
Done 70 batches in 17.31s	training loss:	5.592001
Done 80 batches in 19.67s	training loss:	5.508519
Done 90 batches in 22.34s	training loss:	5.425793
Done 100 batches in 24.70s	training loss:	5.359820
Done 110 batches in 27.25s	training loss:	5.296851
Done 120 batches in 29.87s	training loss:	5.240117
Done 130 batches in 32.34s	training loss:	5.185036
Done 140 batches in 34.86s	training loss:	5.134850
Done 150 batches in 37.14s	training loss:	5.093040
Done 160 batches in 39.53s	training loss:	5.051136
Done 170 batches in 41.93s	training loss:	5.011679
Done 180 batches in 44.44s	training loss:	4.974402
Done 190 batches in 46.88s	training loss:	4.943038
Done 200 batches in 49.77s	training loss:	4.

In [9]:
def rnd_next_word(probs, size=1):
    return np.random.choice(np.append(np.arange(probs.shape[0]-1), -1).astype(np.int32), 
                            size=size, p=probs)

def beam_search(get_probs_fun, beam=10, init_seq='', mode='rr'):
    utt = map(lambda w: w_to_idx.get(w, w_to_idx['<unk>']), init_seq.split())
    if len(utt) == 0 or utt[0] != 1:
        utt = [1] + utt
    utt = np.asarray(utt, dtype=np.int32)[np.newaxis]
    
    if mode[0] == 's':
        words = get_probs_fun(utt)[0].argpartition(-beam)[-beam:].astype(np.int32)
        words[words==voc_size-1] = pad_value
    elif mode[0] == 'r':
        words = rnd_next_word(get_probs_fun(utt)[0], beam)
    
    candidates = utt.repeat(beam, axis=0)
    candidates = np.hstack([candidates, words[np.newaxis].T])
    scores = np.zeros(beam)
    
#     print candidates
    
    while candidates.shape[1] < 100 and pad_value not in candidates[:,-1]:
        
        if mode[1] == 's':
            log_probs = np.log(get_probs_fun(candidates))
            tot_scores = log_probs + scores[np.newaxis].T

            idx = tot_scores.ravel().argpartition(-beam)[-beam:]
            i,j = divmod(idx, tot_scores.shape[1])
            j[j==voc_size-1] = pad_value
            
            scores = tot_scores[i,j]

            candidates = np.hstack([candidates[i], j[np.newaxis].T.astype(np.int32)])
            
        elif mode[1] == 'r':
            probs = get_probs_fun(candidates)
            words = []
            for k in xrange(beam):
                words.append(rnd_next_word(probs[k], beam)) # this doesn't have to be exactly 'beam'
            words = np.array(words)
            idx = np.indices((beam, words.shape[1]))[0]
            tot_scores = scores[np.newaxis].T + np.log(probs)[idx, words]
                
            idx = tot_scores.ravel().argpartition(-beam)[-beam:]
            i,j = divmod(idx, tot_scores.shape[1])

            scores = tot_scores[i,j]

            candidates = np.hstack([candidates[i], words[i,j][np.newaxis].T])
            
#     print candidates[:,:10]
#     print scores[:10]
        
    cands = candidates[candidates[:,-1] == pad_value]
    if cands.size > 0:
        return candidates[candidates[:,-1] == pad_value][0]
    return candidates[scores.argmax()]

In [39]:
utt = beam_search(net.get_probs_fn, init_seq='<s> hello , nice day , isn \' t it ? </s>', beam=10, mode='rr')

text = map(lambda i: idx_to_w[i], list(utt))
for w in text:
    if w not in ['<s>', '</s>']:
        stdout.write(w + ' ')
    elif w == '</s>':
        print ''

hello , nice day , isn ' t it ? 
no . 
<person> , i ' m sorry . 
<utt_end> 

In [8]:
net.get_probs_fn(np.array([1], dtype=np.int32)[np.newaxis])

array([[  1.57305487e-02,   6.14382998e-06,   3.13691999e-04, ...,
          1.02940156e-07,   7.17821308e-07,   5.06606284e-07]], dtype=float32)

In [11]:
net.get_probs_fn(np.array([1], dtype=np.int32)[np.newaxis])

array([[  1.57305487e-02,   6.14382998e-06,   3.13691999e-04, ...,
          1.02940156e-07,   7.17821308e-07,   5.06606284e-07]], dtype=float32)

In [13]:
init_seq = ''
utt = [1] + map(lambda w: w_to_idx.get(w, w_to_idx['<unk>']), init_seq.split())
utt = np.asarray(utt, dtype=np.int32)[np.newaxis]

i = 0
while utt[0,-1] != -1 and i < 100:
    word_probs = net.get_probs_fn(utt)[0]
    next_idx = rnd_next_word(word_probs)
    utt = np.append(utt, next_idx)[np.newaxis].astype(np.int32)
    i += 1
    
text = map(lambda i: idx_to_w[i], list(utt[0]))
' '.join([t for t in text if t not in ['<s>', '</s>', '<utt_end>']])

"he ' s dead . i don ' t want to walk out the post . that ' s why he sick and the dead woman named <person> . <person> . not that prince ! you surgeries now !"