In [8]:
def homework(train_X, valid_X, train_Y, valid_Y, tokenizer_iu):
    import numpy as np
    import copy
    from keras.models import Model
    from keras.layers import Input, Embedding, Dense, LSTM, Flatten
    from keras.layers import Permute, Activation, concatenate, dot, Reshape
    from keras import backend as K

    emb_dim = 256
    hid_dim = 256
    att_dim = 256

    vocab_size = len(tokenizer_iu.word_index) + 1
    seq_len = len(train_Y[0])

    en_in = Input(shape=(9, 9, 512))
    en_f = Flatten()(en_in)
    # decorderのLSTMへの入力に変換（先に、再利用のための「層の変数化」が必要）
    dense_h = Dense(hid_dim)
    dense_c = Dense(hid_dim)
    h_0 = dense_h(en_f)
    c_0 = dense_c(en_f)
    # Attention利用時にそなえてreshape: VGGなら(9*9, 512)へ
    en_map = Reshape((81, 512))(en_f)

    # decorderのインプット
    de_in = Input(shape=(seq_len,))

# 層の定義（再利用用）
    de_embedding = Embedding(vocab_size, emb_dim)
    de_lstm = LSTM(hid_dim, activation='tanh', return_sequences=True, return_state=True)
# 接続
    de_emb = de_embedding(de_in)
    de_out, _, _ = de_lstm(de_emb, initial_state=[h_0, c_0])

    ## Attension
    # 1. スコアの計算 （ドット積）
    sc_dense = Dense(hid_dim)
    en_score = sc_dense(en_map)
    score = dot([de_out, en_score], axes=(2,2))
    # 2. 重み計算 (Attention, softmax関数)
    attention = Activation('softmax')(score) 
    # 3. 文脈ベクトルの計算（エンコーダの出力に重みをかける）
    context = dot([attention, en_map], axes=(2,1)) 
    # 4. 出力ベクトルの計算（文脈とデコーダー出力を結合→Dense層へ）
    out_dens1 = Dense(att_dim, activation='tanh')
    out_dens2 = Dense(vocab_size, activation='softmax')
    concat = concatenate([context, de_out], axis=2)
    at_out = out_dens1(concat)
    output = out_dens2(at_out)

    # モデル構築（入力は符号化器＆復号化器、出力は復号化器のみ）
    model = Model([en_in, de_in], output)
    model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy')

    # 学習（Lesson4のを利用、validを別に与える）
    train_target = np.hstack((train_Y[:, 1:], np.zeros((len(train_Y),1), dtype=np.int32)))
    valid_target = np.hstack((valid_Y[:, 1:], np.zeros((len(valid_Y),1), dtype=np.int32)))
    model.fit([train_X, train_Y], np.expand_dims(train_target, -1), batch_size=128, epochs=80, verbose=2, validation_data=([valid_X, valid_Y], np.expand_dims(valid_target, -1)))

    # 予測用。Encoder側。Dence層のみ。
    en_in = Input(shape=(9, 9, 512))
    en_f = Flatten()(en_in)
    en_map = Reshape((81, 512))(en_f)
    h_0 = dense_h(en_f)
    c_0 = dense_c(en_f)
    en_model = Model([en_in], [en_map, h_0, c_0])

    # Decoder側。Lesson 4はAttentionと別だが１つにまとめる。
    h_tm1 = Input(shape=(hid_dim,))
    c_tm1 = Input(shape=(hid_dim,))
    de_in = Input(shape=(1,))
    en_map_in = Input(shape=(81, 512,))
    # DecoderのLSTM
    de_emb = de_embedding(de_in) # 学習済みEmbeddingレイヤーを利用
    de_out, de_state1, de_state2 = de_lstm(de_emb, initial_state=[h_tm1, c_tm1]) # 学習済みLSTMレイヤーを利用
    ## Attension
    en_score = sc_dense(en_map_in)
    score = dot([de_out, en_score], axes=(2,2))
    attention = Activation('softmax')(score) 
    context = dot([attention, en_map_in], axes=(2,1)) 
    concat = concatenate([context, de_out], axis=2)
    at_out = out_dens1(concat)
    output = out_dens2(at_out)

    de_model = Model([de_in, h_tm1, c_tm1, en_map_in], [output, de_state1, de_state2])

    # 出力をIUPAC名に戻す
    def decode_sequence(input_seq, bos_eos, max_output_length = 100):
        input_seq = np.reshape(input_seq, (1, 9, 9, 512))
        map, state1, state2 = en_model.predict(input_seq)

        target_seq = np.array(bos_eos[0])
        output_seq= copy.deepcopy(bos_eos[0]) # ここが重要！
    
        while True:
            output_tokens, state1, state2 = de_model.predict([target_seq, state1, state2, map])
            sampled_token_index = [np.argmax(output_tokens[0, -1, :])]
            output_seq += sampled_token_index
        
            if (sampled_token_index == bos_eos[1] or len(output_seq) > max_output_length):
                break

            target_seq = np.array(sampled_token_index)

        return output_seq

    def save_model(model, name):
        data_dir = '/root/userspace/data/data/'
        if not os.path.isdir(data_dir):
            os.makedirs(data_dir)
        result_dir = os.path.normpath(data_dir)
#        model.save_weights(os.path.join(result_dir, name + '_model.h5'))

    return decode_sequence

In [None]:
import numpy as np
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from nltk.translate.bleu_score import sentence_bleu

import os


def load_dataset():
# 画像変換arrayの読み込み
    train_1 = np.load('/root/userspace/data/data/vgg_array1.npy')
    train_2 = np.load('/root/userspace/data/data/vgg_array2.npy')
    train_3 = np.load('/root/userspace/data/data/vgg_array3.npy')
    train_4 = np.load('/root/userspace/data/data/vgg_array4.npy')
    train_X = np.r_[train_1, train_2, train_3, train_4]
    valid_X = np.load('/root/userspace/data/data/vgg_valid.npy')
    test_X = np.load('/root/userspace/data/data/vgg_test.npy')

# caption読み込み
    whole_texts1 = []
    test_data = open("/root/userspace/data/text/caprion_all.txt", "r")
    for line in test_data:
        whole_texts1.append(line.rstrip("\n"))
    caption_test = []
    test_data = open("/root/userspace/data/text/caprion_test.txt", "r")
    for line in test_data:
        caption_test.append(line)
    caption_valid = []
    test_data = open("/root/userspace/data/text/caprion_valid.txt", "r")
    for line in test_data:
        caption_valid.append(line)
    caption_train = []
    test_data = open("/root/userspace/data/text/caprion_train.txt", "r")
    for line in test_data:
        caption_train.append(line)

    for i in range(len(whole_texts1)):
        whole_texts1[i] = "<s> " + whole_texts1[i].strip() + " </s>"

    tokenizer_iu = Tokenizer(filters="")
    tokenizer_iu.fit_on_texts(whole_texts1)

    for i in range(len(caption_test)):
        caption_test[i] = "<s> " + caption_test[i].strip() + " </s>"
    test_Y = tokenizer_iu.texts_to_sequences(caption_test)
    test_Y = pad_sequences(test_Y, padding='post')
    pad2 = np.zeros((116, 12), dtype = int)
    test_Y = np.c_[test_Y, pad2]

    for i in range(len(caption_valid)):
        caption_valid[i] = "<s> " + caption_valid[i].strip() + " </s>"
    valid_Y = tokenizer_iu.texts_to_sequences(caption_valid)
    valid_Y = pad_sequences(valid_Y, padding='post')
    pad1 = np.zeros((283, 3), dtype = int)
    valid_Y = np.c_[valid_Y, pad1]

    for i in range(len(caption_train)):
        caption_train[i] = "<s> " + caption_train[i].strip() + " </s>"
    train_Y = tokenizer_iu.texts_to_sequences(caption_train)
    train_Y = pad_sequences(train_Y, padding='post')
    train_Y = np.r_[train_Y, train_Y, train_Y, train_Y]

    return train_X, valid_X, test_X, train_Y, valid_Y, test_Y, tokenizer_iu

def compute_bleu(refs, preds):
    return np.mean([sentence_bleu(r, p, emulate_multibleu=True) for r, p in zip(refs, preds)])

def score_homework():
    train_X, valid_X, test_X, train_Y, valid_Y, test_Y, tokenizer_iu = load_dataset()
    decode_sequence = homework(train_X, valid_X, train_Y, valid_Y, tokenizer_iu)

    bos_eos = tokenizer_iu.texts_to_sequences(["<s>", "</s>"])
    output = [decode_sequence(test_X[i][np.newaxis,:], bos_eos, 100) for i in range(len(test_X))]
    
    detokenizer_iu = dict(map(reversed, tokenizer_iu.word_index.items()))
    
    preds = [[detokenizer_iu[i] for i in output[n][1:-1]] for n in range(len(output))]
    refs = [[detokenizer_iu[i] for i in test_Y[n][1:-(np.count_nonzero(test_Y[n]==0)+1)]] for n in range(len(test_Y))]
    refs = [[seq] for seq in refs]
    
    print(compute_bleu(refs, preds))

if __name__ == '__main__':
    score_homework()


Train on 21480 samples, validate on 283 samples
Epoch 1/80
 - 35s - loss: 0.9940 - val_loss: 0.6479
Epoch 2/80
 - 34s - loss: 0.5381 - val_loss: 0.4876
Epoch 3/80
 - 34s - loss: 0.4203 - val_loss: 0.4125
Epoch 4/80
 - 34s - loss: 0.3501 - val_loss: 0.3703
Epoch 5/80
 - 34s - loss: 0.3035 - val_loss: 0.3375
Epoch 6/80
 - 34s - loss: 0.2692 - val_loss: 0.3243
Epoch 7/80
 - 34s - loss: 0.2421 - val_loss: 0.3041
Epoch 8/80
 - 34s - loss: 0.2191 - val_loss: 0.2918
Epoch 9/80
 - 34s - loss: 0.2021 - val_loss: 0.2774
Epoch 10/80
 - 34s - loss: 0.1833 - val_loss: 0.2713
Epoch 11/80
 - 34s - loss: 0.1681 - val_loss: 0.2734
Epoch 12/80
 - 34s - loss: 0.1542 - val_loss: 0.2611
Epoch 13/80
 - 34s - loss: 0.1432 - val_loss: 0.2626
Epoch 14/80
 - 34s - loss: 0.1356 - val_loss: 0.2481
Epoch 15/80
 - 34s - loss: 0.1270 - val_loss: 0.2500
Epoch 16/80
 - 34s - loss: 0.1210 - val_loss: 0.2489
Epoch 17/80
 - 34s - loss: 0.1137 - val_loss: 0.2550
Epoch 18/80
 - 34s - loss: 0.1075 - val_loss: 0.2453
Epoch 1

In [4]:
detokenizer_iu = dict(map(reversed, tokenizer_iu.word_index.items()))

text_no = 0
input_seq = np.reshape(test_X[text_no], (1, 9, 9, 512))
bos_eos = tokenizer_iu.texts_to_sequences(["<s>", "</s>"])

print('生成文:', ' '.join([detokenizer_iu[i] for i in decode_sequence(input_seq, bos_eos)]))
test_Y_array = np.array(test_Y[text_no])[np.nonzero(np.array(test_Y[text_no]))]
test_Y_list = list(test_y_array)
print('正解文:', ' '.join([detokenizer_iu[i] for i in test_y_list]))

NameError: name 'tokenizer_iu' is not defined

In [19]:
detokenizer_iu = dict(map(reversed, tokenizer_iu.word_index.items()))

text_no = 0
input_seq = np.reshape(test_X[text_no], (1, 8, 8, 2048))
bos_eos = tokenizer_iu.texts_to_sequences(["<s>", "</s>"])

print('生成文:', ' '.join([detokenizer_iu[i] for i in decode_sequence(input_seq, bos_eos)]))
test_Y_array = np.array(test_Y[text_no])[np.nonzero(np.array(test_Y[text_no]))]
test_Y_list = list(test_Y_array)
print('正解文:', ' '.join([detokenizer_iu[i] for i in test_Y_list]))

NameError: name 'tokenizer_iu' is not defined