In [1]:
import numpy as np
import json
import tensorflow as tf
gpu = tf.config.experimental.list_physical_devices('GPU')[0]
tf.config.experimental.set_memory_growth(gpu, True)

### *Helper Functions*

In [2]:
def load_json(filename):
    with open(filename, 'r') as f:
        data = json.load(f)
    return data

In [3]:
def encode_input(input_text, sequence_length, input_encoder):
    x = np.zeros((len(input_text), sequence_length, len(input_encoder)), dtype="float32")
    
    for i, inp in enumerate(input_text):
        for t, char in enumerate(inp):
            x[i, t, input_encoder[char]] = 1.0
        x[i, t+1:, input_encoder[' ']] = 1.0
    return x

In [4]:
def encode_target(target_text, sequence_length, target_encoder):
    x = np.zeros((len(target_text), sequence_length, len(target_encoder)), dtype="float32")
    y = np.zeros((len(target_text), sequence_length, len(target_encoder)), dtype="float32")
    
    for i, inp in enumerate(target_text):
        for t, char in enumerate(inp):
            x[i, t, target_encoder[char]] = 1.0
            if t > 0:
                y[i, t - 1, target_encoder[char]] = 1.0
        x[i, t + 1 :, target_encoder[" "]] = 1.0
        y[i, t:, target_encoder[" "]] = 1.0
    return x, y

### *Data Preprocess*

In [5]:
data = load_json('data/data.json')
input_encoder = load_json('data/input-encoder.json')
target_encoder = load_json('data/target-encoder.json')

In [6]:
max_inplength = max([len(txt) for txt in data['input_text']])
max_tarlength = max([len(txt) for txt in data['target_text']])
print(max_inplength, max_tarlength)

13 59


In [7]:
encoder_inputs = encode_input(data['input_text'], max_inplength, input_encoder)

In [8]:
decoder_inputs, decoder_targets = encode_target(data['target_text'], max_tarlength, target_encoder)

### *Model*

In [9]:
encoder_input = tf.keras.Input(shape=(None,len(input_encoder)), name= 'encoder_input')
encoder = tf.keras.layers.LSTM(256, return_state=True, name = 'encoder_layer')
encoder_output, state_h, state_c = encoder(encoder_input)

encoder_states = [state_h, state_c]

In [10]:
decoder_input = tf.keras.Input(shape=(None, len(target_encoder)), name= 'decoder_input')

decoder = tf.keras.layers.LSTM(256, return_sequences=True, return_state=True, name= 'decoder_layer')
decoder_output, _, _ = decoder(decoder_input, initial_state=encoder_states)
dense = tf.keras.layers.Dense(len(target_encoder), activation="softmax", name= 'output_layer')

decoder_output = dense(decoder_output)

In [11]:
model = tf.keras.Model([encoder_input, decoder_input], decoder_output)

In [12]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_input (InputLayer)      [(None, None, 68)]   0                                            
__________________________________________________________________________________________________
decoder_input (InputLayer)      [(None, None, 89)]   0                                            
__________________________________________________________________________________________________
encoder_layer (LSTM)            [(None, 256), (None, 332800      encoder_input[0][0]              
__________________________________________________________________________________________________
decoder_layer (LSTM)            [(None, None, 256),  354304      decoder_input[0][0]              
                                                                 encoder_layer[0][1]          

In [13]:
model.compile(optimizer= tf.keras.optimizers.Adam(learning_rate = 0.0001),
              loss="categorical_crossentropy", metrics=["accuracy"])

In [14]:
model.fit(
    [encoder_inputs, decoder_inputs],
    decoder_targets,
    batch_size=32,
    epochs=50,
    validation_split=0.2,
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x7f381060cc90>

In [15]:
model.save('Eng2Fre.h5')