# Machine Translation

This is used to translate text from one language to another one, for example.

## Seq2Seq and Attention Model

Sequence 2 Sequence model can be found here

https://github.com/google/seq2seq

# Encoder Decoder with Attention

In [None]:
import tensorflow as tf

In [None]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

In [3]:
import numpy as np
from tensorflow.python.keras.models import Sequential, Model
from tensorflow.python.keras.layers import Embedding, Reshape, Activation, Input, Lambda, Dense, GRU, LSTM, CuDNNLSTM, CuDNNGRU, Dropout, TimeDistributed, RepeatVector
from tensorflow.python.keras.layers.merge import Dot
from tensorflow.python.keras.utils import np_utils
from tensorflow.python.keras.utils.data_utils import get_file
from tensorflow.python.keras.preprocessing.text import Tokenizer
from tensorflow.python.keras.preprocessing.sequence import skipgrams, pad_sequences
from tensorflow.python.keras.utils.np_utils import to_categorical
from tensorflow.python.keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint

In [4]:
# function to generate random integer values between 0 and n. the number of timesteps is n
def gen_seq(length, n):
    return [np.random.randint(0,n-1) for _ in range(length)]

In [5]:
def onehot_encoder(seq, n):
    encod = []
    for s in seq:
        v = [0 for _ in range(n)]
        v[s] = 1
        encod.append(v)
    return np.array(encod)

In [6]:
def onehot_decoder(encod_seq):
    return [np.argmax(idx) for idx in encod_seq]

In [7]:
sequence = gen_seq(6,30)
print(sequence)
print(onehot_encoder(sequence,30))
print(onehot_decoder(onehot_encoder(sequence,30)))

[25, 15, 15, 6, 6, 4]
[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]
[25, 15, 15, 6, 6, 4]


### for this example we will create 2 sequences. Sequence of input and sequence of output. 

In [8]:
def generate_pair(n_in,n_out,n_total):
    # generating random sequences
    seq_in = gen_seq(n_in, n_total)
    seq_out = seq_in[:n_out] + [0 for _ in range(n_in-n_out)]
    
    X = onehot_encoder(seq_in, n_total)
    y = onehot_encoder(seq_out, n_total)
    
    # reshaping as 3D tensor
    X  = X.reshape((1, X.shape[0], X.shape[1]))
    y  = y.reshape((1, y.shape[0], y.shape[1]))
    
    return X,y

In [9]:
X, y = generate_pair(6, 3, 30)
print('X=%s, y=%s' % (onehot_decoder(X[0]), onehot_decoder(y[0])))

X=[7, 18, 27, 15, 12, 5], y=[7, 18, 27, 0, 0, 0]


# Creating Model

In [10]:
n_features = 50
n_timesteps_in = 5
n_timesteps_out = 3

#### This approach the idea is to learn sequences, that's why we won't need a embbeding layer.

In [11]:
model = Sequential()

In [12]:
model.add(CuDNNLSTM(150,input_shape=(n_timesteps_in, n_features)))

In [13]:
model.add(RepeatVector(n_timesteps_in))

In [14]:
model.add(CuDNNLSTM(150, return_sequences=True)) # returning all hidden states for all timesteps, not only the last one.

In [15]:
model.add(TimeDistributed(Dense(n_features,activation='softmax')))

In [16]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
cu_dnnlstm (CuDNNLSTM)       (None, 150)               121200    
_________________________________________________________________
repeat_vector (RepeatVector) (None, 5, 150)            0         
_________________________________________________________________
cu_dnnlstm_1 (CuDNNLSTM)     (None, 5, 150)            181200    
_________________________________________________________________
time_distributed (TimeDistri (None, 5, 50)             7550      
Total params: 309,950
Trainable params: 309,950
Non-trainable params: 0
_________________________________________________________________


In [17]:
model.compile(loss='categorical_crossentropy',optimizer='adam', metrics=['acc'])

In [18]:
for epoch in range(5000):
    
    X,y = generate_pair(n_timesteps_in, n_timesteps_out, n_features)
    
    model.fit(X,y, epochs=3, verbose=1)

InvalidArgumentError: No OpKernel was registered to support Op 'CudnnRNNV2' used by {{node sequential/cu_dnnlstm/CudnnRNNV2}} with these attrs: [seed=0, dropout=0, T=DT_FLOAT, input_mode="linear_input", direction="unidirectional", rnn_mode="lstm", seed2=0, is_training=true]
Registered devices: [CPU]
Registered kernels:
  device='GPU'; T in [DT_DOUBLE]
  device='GPU'; T in [DT_FLOAT]
  device='GPU'; T in [DT_HALF]

	 [[sequential/cu_dnnlstm/CudnnRNNV2]] [Op:__inference_train_function_2397]

In [None]:
epochs = 100
correct = 0

In [None]:
# Testing the model with new 100 new randomly generated integer sequences
for _ in range(epochs):
    X,y = generate_pair(n_timesteps_in, n_timesteps_out, n_features)
    pred = model.predict(X)
    if array_equal(onehot_decoder(y[0]), onehot_decoder(pred[0])):
        correct += 1
print('Accuracy: %.2f%%' % (float(correct)/float(epochs)*100.0))

# Attention Mechanism

https://github.com/datalogue/keras-attention

No Keras has it's own mechanism

https://keras.io/api/layers/attention_layers/attention/