In [1]:
import tensorflow as tf

In [2]:
import json
import requests
import numpy as np


def encoder_inference(enc_input):
    # подготовка запроса
    request_data = json.dumps({
        "signature_name": "serving_default",
        "inputs": {'encoder_input': enc_input.tolist()}
    })
    headers = {"content-type": "application/json"}

    # HTTP запрос на сервер
    json_response = requests.post(
        'http://localhost:8501/v1/models/encoder:predict',
        data=request_data, headers=headers)

    # Обработка JSON ответа
    predictions = json.loads(json_response.text)
    
    if 'predictions' in predictions:
        encoder_outputs = predictions['predictions']
    elif 'outputs' in predictions:
        encoder_outputs = predictions['outputs']
    else:
        print('Encoder error json:', predictions)
    
    return encoder_outputs

def decoder_inference(dec_input, h_out, c_out, enc_out):
    # подготовка запроса
    request_data = json.dumps({
        "signature_name": "serving_default",
        "inputs": {
            'decoder_input': dec_input,
            'h_input': h_out,
            'c_input': c_out,
            'encoder_output': enc_out,
            }
    })
    headers = {"content-type": "application/json"}

    # HTTP запрос на сервер
    json_response = requests.post(
        'http://localhost:8501/v1/models/decoder:predict',
        data=request_data, headers=headers)

    # Обработка JSON ответа
    predictions = json.loads(json_response.text)
    if 'predictions' in predictions:
        decoder_outputs = predictions['predictions']
    elif 'outputs' in predictions:
        decoder_outputs = predictions['outputs']
    else:
        print('Decoder error json:', predictions)

    return decoder_outputs

In [71]:
# load vocab
base_dir = './exports/'
enc_dir = base_dir + 'encoder/'
dec_dir = base_dir + 'decoder/'
vocab_dir = base_dir + 'vocabulary/'

input_token2idx = np.load(vocab_dir + 'input_token2idx.npy', allow_pickle=True).item()
target_token2idx = np.load(vocab_dir + 'target_token2idx.npy', allow_pickle=True).item()
target_idx2token = np.load(vocab_dir + 'target_idx2token.npy', allow_pickle=True)
print(f'Input vocab size: {len(input_token2idx)}')
print(f'Target vocab size: {len(target_token2idx)}')

Input vocab size: 17113
Target vocab size: 17239


In [18]:
# quick check
print(target_token2idx['<START>'], target_idx2token[18])
print(input_token2idx['<PAD>'], target_token2idx['<PAD>'])

18 <START>
16 16


Rewrite encoder is functional api

In [21]:
H_SIZE = 2048 # Размерность скрытого состояния LSTM
EMB_SIZE = 256 # размерность эмбеддингов (и для входных и для выходных цепочек)
ATT_UNITS = 2048 # attention units
INPUT_VOCAB_SIZE = len(input_token2idx)
max_enc_seq_length = 15 # fixed due to attention mechanics

enc_input = tf.keras.Input(shape=(max_enc_seq_length,), dtype=tf.int32, name='encoder_input')
masking = tf.keras.layers.Masking(mask_value=input_token2idx['<PAD>'])
embed = tf.keras.layers.Embedding(INPUT_VOCAB_SIZE, EMB_SIZE)
lstm1 = tf.keras.layers.LSTM(H_SIZE, return_sequences=True, return_state=True)
lstm1 = tf.keras.layers.Bidirectional(lstm1, merge_mode='sum', name='enc_out')

out = masking(enc_input)
out = embed(out)
out, h_f, c_f, h_b, c_b = lstm1(out)
h1 = tf.keras.layers.Add(name='h_output')([h_f, h_b])
c1 = tf.keras.layers.Add(name='c_output')([c_f, c_b])

encoder = tf.keras.Model(inputs=enc_input, outputs=[out, h1, c1])

In [22]:
encoder.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_input (InputLayer)      [(None, 15)]         0                                            
__________________________________________________________________________________________________
masking (Masking)               (None, 15)           0           encoder_input[0][0]              
__________________________________________________________________________________________________
embedding (Embedding)           (None, 15, 256)      4380928     masking[0][0]                    
__________________________________________________________________________________________________
enc_out (Bidirectional)         [(None, 15, 2048), ( 37765120    embedding[0][0]                  
______________________________________________________________________________________________

Тестирование инференса модели (и инициализация весов)

In [36]:
input_seq = tf.constant([[6925, 9, 4773, 11, 7169, 677, 17008, 21, 16, 16, 16, 16, 16, 16, 16]])
# hidden = encoder.initialize_hidden_state(batch_sz=1) 
enc_out, h_out, c_out = encoder(input_seq)
print(f'Input: {input_seq.shape}')
# print(f'starting states: ({hidden[0].shape}, {hidden[1].shape}, {hidden[2].shape}, {hidden[3].shape})')
print(f'outputs: {enc_out.shape}, {h_out.shape}, {c_out.shape}')

Input: (1, 15)
outputs: (1, 15, 2048), (1, 2048), (1, 2048)


Загрузка весов из обученнего энкодера

In [84]:
encoder.layers

[<tensorflow.python.keras.engine.input_layer.InputLayer at 0x7f3847c8bf10>,
 <tensorflow.python.keras.layers.core.Masking at 0x7f38477a2e80>,
 <tensorflow.python.keras.layers.embeddings.Embedding at 0x7f3847f5edc0>,
 <tensorflow.python.keras.layers.wrappers.Bidirectional at 0x7f3847eeaf70>,
 <tensorflow.python.keras.layers.merge.Add at 0x7f3848162340>,
 <tensorflow.python.keras.layers.merge.Add at 0x7f3847f574c0>]

In [85]:
# load embeding layer's weights
weights = np.load(enc_dir + 'encoder_embed.npy')
encoder.layers[2].set_weights([weights])

# load LSTM's weights
file_names = [enc_dir + f'encoder_lstm_{i}.npy' for i in range(6)]
weights = [np.load(file_name) for file_name in file_names]
encoder.layers[3].set_weights(weights)

Проверка работоспособности загруженной saved_model

In [49]:
path = './Chat-bot/saved_models/encoder_saved/1'
encoder.save(path)
model = tf.keras.models.load_model(path)

INFO:tensorflow:Assets written to: ./Chat-bot/saved_models/encoder_saved/1/assets
INFO:tensorflow:Assets written to: ./Chat-bot/saved_models/encoder_saved/1/assets


In [50]:
# check every tensor in model outputs if it matches original encoder
for t1, t2 in zip(encoder(input_seq), model(input_seq)):
    print(tf.math.reduce_all(tf.equal(t1, t2)).numpy())
del model

True
True
True


Создадим класс Декодера с вниманием и проинициализируем его

In [52]:
# DECODER
TARGET_VOCAB_SIZE = len(target_token2idx)
# input declaration
x = tf.keras.Input(shape=(1, ), name='decoder_input')
h_input = tf.keras.Input(shape=(H_SIZE, ), name='h_input')
c_input = tf.keras.Input(shape=(H_SIZE, ), name='c_input')
enc_output = tf.keras.Input(shape=(max_enc_seq_length, H_SIZE), name='encoder_output')

# layers declaration
dec_masking = tf.keras.layers.Masking(mask_value=target_token2idx['<PAD>'])
dec_embed = tf.keras.layers.Embedding(TARGET_VOCAB_SIZE, EMB_SIZE)
dec_lstm1 = tf.keras.layers.LSTM(H_SIZE, return_sequences=True, return_state=True)
dec_fc = tf.keras.layers.Dense(TARGET_VOCAB_SIZE, name='decoder_output')
att_W1 = tf.keras.layers.Dense(ATT_UNITS, name='att_W1')
att_W2 = tf.keras.layers.Dense(ATT_UNITS, name='att_W2')
att_V = tf.keras.layers.Dense(1, name='att_V')

# attention logic 
query_with_time_axis = tf.expand_dims(h_input, 1)
score = att_V(tf.nn.tanh(
            att_W1(query_with_time_axis) + att_W2(enc_output)))
attention_weights = tf.nn.softmax(score, axis=1)
context_vector = attention_weights * enc_output
context_vector = tf.reduce_sum(context_vector, axis=1)
# decoder logic
out = dec_masking(x)
out = dec_embed(out)
out = tf.concat([tf.expand_dims(context_vector, 1), out], axis=-1)
out, h1, c1 = dec_lstm1(out, initial_state=(h_input, c_input))
out = tf.reshape(out, (-1, out.shape[2]))
out = dec_fc(out)

# just to name outputs
h_out = tf.keras.layers.Lambda(lambda x: x, name='h_output')(h1)
c_out = tf.keras.layers.Lambda(lambda x: x, name='c_output')(c1)

# create decoder model
decoder = tf.keras.Model(inputs=[x, h_input, c_input, enc_output], outputs=[out, h_out, c_out])

In [53]:
decoder.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
h_input (InputLayer)            [(None, 2048)]       0                                            
__________________________________________________________________________________________________
tf.expand_dims (TFOpLambda)     (None, 1, 2048)      0           h_input[0][0]                    
__________________________________________________________________________________________________
encoder_output (InputLayer)     [(None, 15, 2048)]   0                                            
__________________________________________________________________________________________________
att_W1 (Dense)                  (None, 1, 2048)      4196352     tf.expand_dims[0][0]             
____________________________________________________________________________________________

In [54]:
# check output names
decoder.outputs

[<KerasTensor: shape=(None, 17239) dtype=float32 (created by layer 'decoder_output')>,
 <KerasTensor: shape=(None, 2048) dtype=float32 (created by layer 'h_output')>,
 <KerasTensor: shape=(None, 2048) dtype=float32 (created by layer 'c_output')>]

Запускаем енкодер и декодер в связке (инициализация весов декодера)

In [91]:
input_seq = tf.constant([[6925, 9, 4773, 11, 7169, 677, 17008, 21, 16, 16, 16, 16, 16, 16, 16]])
# hidden = encoder.initialize_hidden_state(batch_sz=1) # all zeros initial state
enc_out, h_out, c_out = encoder(input_seq)

dec_input = tf.expand_dims([18], 0) # target_token2idx['<START>'] = 18

predictions, h_out, c_out = decoder([dec_input, h_out, c_out, enc_out])
predicted_id = tf.argmax(predictions[0]).numpy()
print('argmax prediction:', predicted_id)
print(f'Encoder outputs: {enc_out.shape}, {h_out.shape}, {c_out.shape}')
print ('Decoder outputs: {}, {}, {}'.format(predictions.shape, h_out.shape, c_out.shape))

argmax prediction: 10358
Encoder outputs: (1, 15, 2048), (1, 2048), (1, 2048)
Decoder outputs: (1, 17239), (1, 2048), (1, 2048)


Загружаем веса обученного декодера

In [69]:
for i, layer in enumerate(decoder.layers):
    print(f'{i:2}: {layer.name:22}{layer.__class__}')

 0: h_input               <class 'tensorflow.python.keras.engine.input_layer.InputLayer'>
 1: tf.expand_dims        <class 'tensorflow.python.keras.layers.core.TFOpLambda'>
 2: encoder_output        <class 'tensorflow.python.keras.engine.input_layer.InputLayer'>
 3: att_W1                <class 'tensorflow.python.keras.layers.core.Dense'>
 4: att_W2                <class 'tensorflow.python.keras.layers.core.Dense'>
 5: tf.__operators__.add  <class 'tensorflow.python.keras.layers.core.TFOpLambda'>
 6: tf.math.tanh          <class 'tensorflow.python.keras.layers.core.TFOpLambda'>
 7: att_V                 <class 'tensorflow.python.keras.layers.core.Dense'>
 8: tf.nn.softmax         <class 'tensorflow.python.keras.layers.core.TFOpLambda'>
 9: tf.math.multiply      <class 'tensorflow.python.keras.layers.core.TFOpLambda'>
10: decoder_input         <class 'tensorflow.python.keras.engine.input_layer.InputLayer'>
11: tf.math.reduce_sum    <class 'tensorflow.python.keras.layers.core.TFOpLambda'

In [83]:
# load embeding layer's weights
weights = np.load(dec_dir + 'decoder_embed.npy')
decoder.layers[14].set_weights([weights])

# load LSTM's weights
file_names = [dec_dir + f'decoder_lstm_{i}.npy' for i in range(3)]
weights = [np.load(file_name) for file_name in file_names]
decoder.layers[17].set_weights(weights)

# load decoder Dense weights
file_names = [dec_dir + f'decoder_fc_{i}.npy' for i in range(2)]
weights = [np.load(file_name) for file_name in file_names]
decoder.layers[19].set_weights(weights)

# load attention.W1 Dense weights
file_names = [dec_dir + f'decoder_attW1_{i}.npy' for i in range(2)]
weights = [np.load(file_name) for file_name in file_names]
decoder.layers[3].set_weights(weights)

# load attention.W2 Dense weights
file_names = [dec_dir + f'decoder_attW2_{i}.npy' for i in range(2)]
weights = [np.load(file_name) for file_name in file_names]
decoder.layers[4].set_weights(weights)

# load attention.V Dense weights
file_names = [dec_dir + f'decoder_attV_{i}.npy' for i in range(2)]
weights = [np.load(file_name) for file_name in file_names]
decoder.layers[7].set_weights(weights)

### Проверяем, что декодер сохраняется и остается работоспособным после загрузки

In [87]:
path = './Chat-bot/saved_models/decoder_saved/1'
decoder.save(path)
model = tf.keras.models.load_model(path)
# model([dec_input, h_out, c_out, enc_out])

INFO:tensorflow:Assets written to: ./Chat-bot/saved_models/decoder_saved/1/assets
INFO:tensorflow:Assets written to: ./Chat-bot/saved_models/decoder_saved/1/assets


In [88]:
# check every tensor in model outputs if it matches original encoder
data = [dec_input, h_out, c_out, enc_out]
for t1, t2 in zip(decoder(data), model(data)):
    print(tf.math.reduce_all(tf.equal(t1, t2)).numpy())
del model

True
True
True


Запуск инференса chat-bot для токена <START> на тестовом предложении

In [90]:
input_seq = np.array([[6925, 9, 4773, 11, 7169, 677, 17008, 21, 16, 16, 16, 16, 16, 16, 16]])

# Подготовка данных для HTTP запроса
encoder_outputs = encoder_inference(input_seq)

enc_out = encoder_outputs['enc_out']
h_out = encoder_outputs['h_output']
c_out = encoder_outputs['c_output']
dec_input = [[target_token2idx['<START>']]] # token <START>

decoder_outputs = decoder_inference(dec_input, h_out, c_out, enc_out)

dec_out = np.array(decoder_outputs['decoder_output'])
print(np.argmax(dec_out))


10358


Предсказание подели из TF_serving совпадает с предсканием модели при запуске в связке (см. инициализация весов декодера).

In [179]:
input_seq = np.array([[6925, 9, 4773, 11, 7169, 677, 17008, 21, 16, 16, 16, 16, 16, 16, 16]])
result = ''
# Подготовка данных для HTTP запроса
encoder_outputs = encoder_inference(input_seq)

enc_out = encoder_outputs['enc_out']
h_out = encoder_outputs['h_output']
c_out = encoder_outputs['c_output']
dec_input = [[target_token2idx['<START>']]] # token <START>

# limit max output length to avoid cycled output. 
# Model was trained on max decoder length = 16.
for _ in range(16): 

    decoder_outputs = decoder_inference(dec_input, h_out, c_out, enc_out)
    predictions = np.array(decoder_outputs['decoder_output'])
    h_out = decoder_outputs['h_output']
    c_out = decoder_outputs['c_output']

    temperature = 1.0
    predictions = predictions[0] / temperature # predictions - logits, less logits -> less probability difference
    pred_softmax = np.exp(predictions)
    pred_softmax /= np.sum(pred_softmax)

    predicted_id = np.random.choice(range(TARGET_VOCAB_SIZE), p=pred_softmax)
    if target_idx2token[predicted_id] == '<END>':
        break

    result += target_idx2token[predicted_id] + ' '

    # the predicted ID is fed back into the model
    dec_input = [[int(predicted_id)]]

result

'no particular reason . . . nice . . . '