The dataset is taken from the site: http://www.manythings.org/anki/

There are many datasets which could have been taken. I selected English to French as it had enough samples to train the model effectively.

In [1]:
import pandas as pd
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import io

from keras.models import Model
from keras.layers import Dense, Embedding, Input
from keras.layers import LSTM, Bidirectional, GlobalMaxPool1D, Dropout,Concatenate
from keras.layers import RepeatVector, Concatenate, Activation, Dot, Lambda          # New Layers
import keras.backend as K
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.optimizers import Adam
from sklearn.metrics import roc_auc_score

import warnings
warnings.filterwarnings("ignore")

Using TensorFlow backend.


In [3]:
from google.colab import files
uploaded = files.upload()

Saving fra.txt to fra.txt


In [0]:
# Softmax function to calculate alpha values for the context in attention

# Expected shape is  (batch_size,TimeStep,Dimension)

def softmax_over_time(x):
  assert (K.ndim(x) > 2)
  e = K.exp(x - K.max(x,axis = 1, keepdims = True))
  s = K.sum(e,axis=1,keepdims = True)
  return e/s

In [0]:
df = pd.read_table('/content/fra.txt',header = None)

In [6]:
#Dataset for the model

df.columns = ['English','French']
df.head()

Unnamed: 0,English,French
0,Go.,Va !
1,Hi.,Salut !
2,Hi.,Salut.
3,Run!,Cours !
4,Run!,Courez !


In [0]:
# Setting Parameters

BATCH_SIZE = 32            # Batch size for the training set. After each BATCH_SIZE the weights will be updated
EPOCHS = 10                # Number of times we will train the model
LSTM_units = 512           # Output nits for the LSTM Layer
MAX_SEQUENCE_LENGTH = 100  # Maximum number of words in a single sentence
VOCAB_SIZE = 20000         # Vocab size for the dataset
EMBEDDING_DIM = 50         # Embedding units to represent a single word in English
EMBEDDING_DIM_FRENCH = 100 # Embedding units to represent a single word in French

In [0]:
english_text_input = df['English'][:15000].values                                 # Input for the training Encoder
french_text_input = df['French'][:15000].apply(lambda x:'<sos> ' + x).values      # Input for the training Decoder
french_text_output = df['French'][:15000].apply(lambda x:x + ' <eos>').values     # Output for the training Decoder

# We are using <sos> and <eos> as we will be using Teacher Forcing.

In [9]:
print("Number of samples for the training data: ",len(english_text_input))  # 170190 samples

Number of samples for the training data:  15000


In [10]:
# Tokenize the English Text
tokenizer_english = Tokenizer(num_words=VOCAB_SIZE)
sentences = tokenizer_english.fit_on_texts(english_text_input)
english_sequences_input = tokenizer_english.texts_to_sequences(english_text_input)
english_sequences_input[:5]

[[18], [668], [668], [146], [146]]

The words are now converted into numbers

In [11]:
# Get the word to index Mapping
word2idx_english = tokenizer_english.word_index
print('Unique english words: ',len(word2idx_english))    # Identified 14384 unique letters

Unique english words:  2921


In [12]:
#Tokenize the French Text
tokenizer_french = Tokenizer(num_words=VOCAB_SIZE,filters = '')
sentences = tokenizer_french.fit_on_texts(french_text_input + french_text_output)  # We use both the dataset so that <sos> and <eos> are also included in the tokenize set of words
french_sequences_input = tokenizer_french.texts_to_sequences(french_text_input)
french_sequences_output = tokenizer_french.texts_to_sequences(french_text_output)
print('Input: ',french_sequences_input[:5])
print('Output: ',french_sequences_output[:5])

Input:  [[1, 58, 6], [1, 1208, 6], [1], [1], [1]]
Output:  [[58, 6, 2], [1208, 6, 2], [2], [2], [2]]


In [13]:
# Get the word to index Mapping

word2idx_french = tokenizer_french.word_index
print('Unique french words: ',len(word2idx_french))    # Identified 14384 unique letters


Unique french words:  16547


In [0]:
# Getting Max length for both the texts
max_english = max(len(s) for s in english_sequences_input)   # Max sequence length in English
max_french = max(len(s) for s in french_sequences_input)     # Max sequence length in French
num_words_french = len(word2idx_french) + 1                  # Possible outputs for the french language

In [15]:
# Padding the sequences

encode_english_input = pad_sequences(english_sequences_input,maxlen = max_english,padding = 'post')
decode_french_input = pad_sequences(french_sequences_input,maxlen = max_french,padding = 'post')
decode_french_output = pad_sequences(french_sequences_output,maxlen = max_french,padding = 'post')

# Size of the input and output

print('Size of Encode Input: ',encode_english_input.shape)
print('Size of Decode Input: ',decode_french_input.shape)
print('Size of Decode Output: ',decode_french_output.shape)


Size of Encode Input:  (15000, 5)
Size of Decode Input:  (15000, 12)
Size of Decode Output:  (15000, 12)


In [16]:
# Loading pretrained word vector

word2vec = {}
with open(os.path.join('/content/drive/My Drive/Colab Notebooks/Dataset/glove.6B.50d.txt')) as f:
  for line in f:
    values = line.split()
    word = values[0]
    vec = np.asarray(values[1:], dtype='float32')
    word2vec[word] = vec
print('Found %s word vectors.' % len(word2vec))




Found 400000 word vectors.


In [17]:
num_words = min(VOCAB_SIZE, len(word2idx_english) + 1)

print('Number of words',num_words)

embedding_matrix = np.zeros((num_words, EMBEDDING_DIM))  # Creating the embedding matrix with each word having dimension of 50
print('Shape of Embedding Matrix',embedding_matrix.shape)

for word, i in word2idx_english.items():
  if i < VOCAB_SIZE:
    embedding_vector = word2vec.get(word)
    if embedding_vector is not None:
      # words not found in embedding index will be all zeros.
      embedding_matrix[i] = embedding_vector



Number of words 2922
Shape of Embedding Matrix (2922, 50)


In [18]:
embedding_matrix

array([[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.18910000e-01,  1.52549997e-01, -8.20730031e-02, ...,
        -5.75119972e-01, -2.66710013e-01,  9.21209991e-01],
       [-1.09190005e-03,  3.33240002e-01,  3.57430011e-01, ...,
        -4.56970006e-01, -4.89690006e-02,  1.13160002e+00],
       ...,
       [ 2.15690002e-01, -9.00229990e-01,  6.82510018e-01, ...,
         4.65460002e-01,  1.81079999e-01, -1.22239999e-01],
       [ 9.63559985e-01, -5.39669991e-01,  2.77429998e-01, ...,
        -3.87650013e-01,  1.31150007e-01,  6.29419982e-01],
       [-4.29910004e-01,  5.82780004e-01, -8.21919963e-02, ...,
        -6.38769984e-01, -6.83719963e-02, -8.71749997e-01]])

In [0]:
# from google.colab import drive
# drive.mount('/content/drive')

# MODEL CREATION

In [20]:
# Embedding Layer

print('Number of words ',num_words)

embedding_layer = Embedding(input_dim = num_words, output_dim = EMBEDDING_DIM, weights = [embedding_matrix],input_length = max_english)


W0807 15:48:41.614988 140107071637376 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.



Number of words  2922


In [21]:
# Creating the target variable

decoder_target_onehot = np.zeros((len(english_text_input),max_french,num_words_french),dtype = 'float32')
print('Shape: ',decoder_target_onehot.shape)   

# 10000 represents each sequence
# 11 represents max length in english
# 11903 represents max length in french

Shape:  (15000, 12, 16548)


In [0]:
# Assiging the values to the output

for i,d in enumerate(decode_french_output):
  for t,word in enumerate(d):
    decoder_target_onehot[i,t,word] = 1

## Adding Layers

In [23]:
## Setting the encoder
## Embedding Layer

encoder_input = Input(shape = (max_english,))

print('Encoder input size before embedding',encoder_input.shape)

x = embedding_layer(encoder_input)

print('Encoder input size after embedding',x.shape)    # So each of the english words are now represented by 50 wordvector 


W0807 15:48:51.164558 140107071637376 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0807 15:48:51.177212 140107071637376 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0807 15:48:51.189575 140107071637376 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

W0807 15:48:51.190344 140107071637376 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:181: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.



Encoder input size before embedding (?, 5)
Encoder input size after embedding (?, 5, 50)


In [24]:
## Setting the encoder
## Bidirectional LSTM Layer

encoder_lstm = Bidirectional(LSTM(LSTM_units,return_sequences = True,dropout = 0.2))
encoder_lstm_2 = LSTM(LSTM_units*2,return_sequences = True,dropout = 0.2)

print('Input size before LSTM Layer',x.shape)

encoder_outputs = encoder_lstm(x)      # Would have all the hidden states present
encoder_outputs = encoder_lstm_2(encoder_outputs) 

print('Input size after LSTM Layer',encoder_outputs.shape)    # So each of the 50 wordvector is now represented by size of 512


Input size before LSTM Layer (?, 5, 50)


W0807 15:48:56.633262 140107071637376 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Input size after LSTM Layer (?, ?, 1024)


In [25]:
## Setting the decoder
## Embedding Layer

decoder_input = Input(shape = (max_french,))

print('Decoder input size before embedding',decoder_input.shape)

decoder_embedding = Embedding(num_words_french,EMBEDDING_DIM_FRENCH)
decoder_x = decoder_embedding(decoder_input)

print('Decoder input size after embedding',decoder_x.shape)    # So each of the english words are now represented by 100 wordvector 


Decoder input size before embedding (?, 12)
Decoder input size after embedding (?, 12, 100)


In [0]:
### ATTENTION ###

attention_repeat_layer = RepeatVector(max_english)   # Repeat a vector max_english times
attention_concatenate_layer = Concatenate(axis = -1) # Concatenate previous state with each hidden layer

attention_dense_layer_1 = Dense(1024,activation = 'tanh')  # A dense layer for the input [previous_state,hidden_layer]
# attention_dense_layer_2 = Dense(1024,activation = 'tanh')
# attention_dense_layer_3 = Dense(1024,activation = 'tanh')
# attention_dense_layer_4 = Dense(1024,activation = 'tanh')
# attention_dense_layer_5 = Dense(512,activation = 'tanh')
attention_dense_layer_6 = Dense(1,activation = softmax_over_time)


attention_dot = Dot(axes = 1)  # Dot product for alpha and the hidden layer


In [0]:
def attention_step(hidden_state,previous_state):
  
  # previous_state is of size LSTM_units
  
  previous_state = attention_repeat_layer(previous_state)
  # previous_state will be of size (Tx,LSTM_units) now
  
  # hidden state is of size (Tx,LSTM_units*2)
  
  x = attention_concatenate_layer([hidden_state,previous_state])
  # x will be of size (Tx,LSTM_units*2(from hidden state) + LSTM_units(from previous state))
  
  x = attention_dense_layer_1(x)
  # x will be of size (Tx,10)
  
#   x = attention_dense_layer_2(x)
  
#   x = attention_dense_layer_3(x)
  
#   x = attention_dense_layer_4(x)
  
#   x = attention_dense_layer_5(x)
  
  alpha = attention_dense_layer_6(x)
  # alpha will be of size (Tx,1)
  
  
  context = attention_dot([alpha,hidden_state])    
  
  
  # shape of context will be (1,LSTM_units*2)
  
  """
  Size of shape: (?, ?, LSTM_units*2(shape from encoder_outputs))
  Previous state shape before RepeatVector function:  (?, LSTM_units(shape from initial_s))
  Previous state shape after RepeatVector function:  (?, 5(Tx), 256)
  Shape after concatenation:  (?, 5(Tx), 768(LSTM_units*2+LSTM_units))
  Shape after 1 Dense Layer:  (?, 5(Tx), 10(from dense layer 1))
  Shape of alpha:  (?, 5(Tx), 1)
  Shape of context:  (?, 1, 512(LSTM_units*2: Explained below))
  
  """
  
  """
  WORKING OF DOT PRODUCT
  
  @ Assuming Tx is 5 and LSTM_units is 2 so 
  
  alpha = [
          1
          2
          3
          4
          5
          ]
  
  hidden_state = [
          1 2 3 4
          1 2 3 4
          1 2 3 4
          1 2 3 4
          1 2 3 4
  ] 
  
  Now (alpha).(hidden_state) 
   
          [       [            
          1       1 2 3 4      [1*1 + 2*1 + 3*1 + 4*1 + 5*1,1*2 + 2*2 + 3*2 + 4*2 + 5*2,1*3 + 2*3 + 3*3 + 4*3 + 5*3,1*4 + 2*4 + 3*4 + 4*4 + 5*4] = [15.30,45,60]
          2       1 2 3 4      
          3    .  1 2 3 4   =  
          4       1 2 3 4      
          5       1 2 3 4      
          ]             ]
  
  Hence the shape of context will be (1,LSTM_units*2) = (1,4)
  
  """
  return context

In [0]:
decoder_lstm = LSTM(LSTM_units,return_state = True)
decoder_dense_1 = Dense(LSTM_units*2,activation = 'relu')
# decoder_dense_2 = Dense(LSTM_units*3,activation = 'relu')
# decoder_dense_3 = Dense(LSTM_units*4,activation = 'relu')
# decoder_dense_4 = Dense(LSTM_units*5,activation = 'relu')
# decoder_dense_5 = Dense(LSTM_units*5,activation = 'relu')

decoder_dense = Dense(num_words_french,activation = 'softmax')


In [0]:
initial_s = Input(shape = (LSTM_units,))       # Initial hidden state to be provided to the decoder LSTM
initial_c = Input(shape = (LSTM_units,))       # Initial cell state to be provided to the decoder LSTM

context_last_word_concat_layer = Concatenate(axis = 2) # Concatenate the context vector and the previous output generated to provide as input



In [0]:
s = initial_s
c = initial_c

In [31]:
outputs = [] # To collect output for each single input sentence

for t in range(max_french):        # So that we collect upto Ty(max output length) values for each input statement
  
  context = attention_step(encoder_outputs,s)
  # Get the context with shape (1,LSTM_units*2)
  
  selector = Lambda(lambda x: x[:,t:t+1])    # Correct input to provide for teacher forcing
  xt = selector(decoder_x)                   # Selecting the correct input
  
  # Shape of xt is (1,EMBEDDING_DIM_FRENCH)
  
  decoder_lstm_input = context_last_word_concat_layer([context,xt])
  
  print("Shape of decoder input (concatenate of context and input):",decoder_lstm_input.shape)
  # Shape of xt is (1,LSTM_units*2(shape of context)+EMBEDDING_DIM_FRENCH)
  
  o,s,c = decoder_lstm(decoder_lstm_input,initial_state = [s,c])       # We get new s and c 
  
  print("Shape of decoder output:",o.shape)   #(LSTM_units)
  
  decoder_outputs = decoder_dense_1(o)
#   decoder_outputs = decoder_dense_2(decoder_outputs)
#   decoder_outputs = decoder_dense_3(decoder_outputs)
#   decoder_outputs = decoder_dense_4(decoder_outputs)   # now decoder_outputs is the probability
#   decoder_outputs = decoder_dense_5(decoder_outputs)
  
  decoder_outputs = decoder_dense(decoder_outputs)
  
  print("Shape of decoder output(after dense):",decoder_outputs.shape) #(max_french)
  
  outputs.append(decoder_outputs) 
  

  

Shape of decoder input (concatenate of context and input): (?, 1, 1124)
Shape of decoder output: (?, 512)
Shape of decoder output(after dense): (?, 16548)
Shape of decoder input (concatenate of context and input): (?, 1, 1124)
Shape of decoder output: (?, 512)
Shape of decoder output(after dense): (?, 16548)
Shape of decoder input (concatenate of context and input): (?, 1, 1124)
Shape of decoder output: (?, 512)
Shape of decoder output(after dense): (?, 16548)
Shape of decoder input (concatenate of context and input): (?, 1, 1124)
Shape of decoder output: (?, 512)
Shape of decoder output(after dense): (?, 16548)
Shape of decoder input (concatenate of context and input): (?, 1, 1124)
Shape of decoder output: (?, 512)
Shape of decoder output(after dense): (?, 16548)
Shape of decoder input (concatenate of context and input): (?, 1, 1124)
Shape of decoder output: (?, 512)
Shape of decoder output(after dense): (?, 16548)
Shape of decoder input (concatenate of context and input): (?, 1, 1124

In [32]:
outputs # OUTPUT IS OF SIZE Ty,?,16548 but we do not need that we need to have Ty as the second dimension so we will do a stack and transpose function

[<tf.Tensor 'dense_4/Softmax:0' shape=(?, 16548) dtype=float32>,
 <tf.Tensor 'dense_4_1/Softmax:0' shape=(?, 16548) dtype=float32>,
 <tf.Tensor 'dense_4_2/Softmax:0' shape=(?, 16548) dtype=float32>,
 <tf.Tensor 'dense_4_3/Softmax:0' shape=(?, 16548) dtype=float32>,
 <tf.Tensor 'dense_4_4/Softmax:0' shape=(?, 16548) dtype=float32>,
 <tf.Tensor 'dense_4_5/Softmax:0' shape=(?, 16548) dtype=float32>,
 <tf.Tensor 'dense_4_6/Softmax:0' shape=(?, 16548) dtype=float32>,
 <tf.Tensor 'dense_4_7/Softmax:0' shape=(?, 16548) dtype=float32>,
 <tf.Tensor 'dense_4_8/Softmax:0' shape=(?, 16548) dtype=float32>,
 <tf.Tensor 'dense_4_9/Softmax:0' shape=(?, 16548) dtype=float32>,
 <tf.Tensor 'dense_4_10/Softmax:0' shape=(?, 16548) dtype=float32>,
 <tf.Tensor 'dense_4_11/Softmax:0' shape=(?, 16548) dtype=float32>]

In [0]:
def stack_and_transpose(x):
  return K.permute_dimensions(K.stack(x),pattern = (1,0,2))

In [34]:
stacker = Lambda(stack_and_transpose)
outputs = stacker(outputs)
outputs                        # Appropriate shape

<tf.Tensor 'lambda_13/transpose:0' shape=(?, 12, 16548) dtype=float32>

In [0]:
model = Model([encoder_input,decoder_input,initial_s,initial_c],outputs)

In [36]:
model.compile(optimizer = 'rmsprop', loss = 'categorical_crossentropy',metrics = ['accuracy'])

W0807 15:49:05.231151 140107071637376 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.



In [0]:
z = np.zeros((15000,LSTM_units))  # To pass as initial s and c for the decoder

In [38]:
r = model.fit([encode_english_input,decode_french_input,z,z],decoder_target_onehot,batch_size = BATCH_SIZE,epochs = EPOCHS,validation_split = 0.1)

W0807 15:49:05.611253 140107071637376 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 13500 samples, validate on 1500 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [0]:
encoder_model = Model(encoder_input,encoder_outputs)

In [40]:
encoder_outputs_as_input = Input(shape = (max_english,LSTM_units*2,))
decoder_single_input = Input(shape = (1,))
decoder_single_input_emb = decoder_embedding(decoder_single_input)

print("Shape of the encoder input for the testing decoder: ",encoder_outputs_as_input.shape)
print("Shape of the decoder input for the testing decoder before embedding: ",decoder_single_input.shape)
print("Shape of the decoder input for the testing decoder after embedding: ",decoder_single_input_emb.shape)


Shape of the encoder input for the testing decoder:  (?, 5, 1024)
Shape of the decoder input for the testing decoder before embedding:  (?, 1)
Shape of the decoder input for the testing decoder after embedding:  (?, 1, 100)


In [41]:
context = attention_step(encoder_outputs_as_input,initial_s)

print('Shape of context: ',context.shape)

Shape of context:  (?, 1, 1024)


In [42]:
decoder_lstm_input = context_last_word_concat_layer([context,decoder_single_input_emb])

print('Shape of decoder lstm input for testing: ',decoder_lstm_input.shape)

Shape of decoder lstm input for testing:  (?, 1, 1124)


In [43]:
o,s,c = decoder_lstm(decoder_lstm_input,initial_state = [initial_s,initial_c])

decoder_outputs = decoder_dense_1(o)
# decoder_outputs = decoder_dense_2(decoder_outputs)
# decoder_outputs = decoder_dense_3(decoder_outputs)
# decoder_outputs = decoder_dense_4(decoder_outputs)   # now decoder_outputs is the probability
# decoder_outputs = decoder_dense_5(decoder_outputs)

decoder_outputs = decoder_dense(decoder_outputs)

print('Decoder output:',decoder_outputs.shape)

Decoder output: (?, 16548)


In [0]:
decoder_model = Model([decoder_single_input,encoder_outputs_as_input,initial_s,initial_c],[decoder_outputs,s,c])

In [0]:
idx2word_english = {v:k for k,v in word2idx_english.items()}
idx2word_french = {v:k for k,v in word2idx_french.items()}


In [0]:
def decode_sentences(input_sequence):
  
  encoder_outputs = encoder_model.predict(input_sequence)
  
  
  target = np.zeros((1,1))
  
  target[0,0] = word2idx_french['<sos>']
  
  
  eos = word2idx_french['<eos>']
  
  s = np.zeros((1,LSTM_units))
  c = np.zeros((1,LSTM_units))
  
  output_sequence = []
  
  for _ in range(max_french):
    
    o,s,c = decoder_model.predict([target,encoder_outputs,s,c])

    idx = np.argmax(o.flatten())
    
    if eos == idx:
      break
      
    word = ''
    
    if idx > 0:
      
      word = idx2word_french[idx]
      output_sequence.append(word)
      
    target[0,0] = idx
  
  return ' '.join(output_sequence)
    
  
  

In [47]:
for _ in range(20):
  
  i = np.random.choice(len(english_text_input))
  
  input_sequence = encode_english_input[i:i+1]
  
  translation = decode_sentences(input_sequence)
  
  print('-')
  
  print('Input: ', english_text_input[i])
  print('Expected: ', french_text_input[i])
  print('Translation: ', translation)
  
    
    
  

-
Input:  I love astronomy.
Expected:  <sos> J'adore l'astronomie.
Translation:  j'adore la musique.
-
Input:  I'm selfish.
Expected:  <sos> Je suis égoïste.
Translation:  je suis
-
Input:  You may go.
Expected:  <sos> Tu peux y aller.
Translation:  tu peux y aller.
-
Input:  That was a test.
Expected:  <sos> C'était un examen.
Translation:  c'était un mensonge.
-
Input:  Do it again!
Expected:  <sos> Faites-le de nouveau !
Translation:  faites-le de nouveau !
-
Input:  I wasn't kidding.
Expected:  <sos> Je n'étais pas en train de blaguer.
Translation:  je n'ai pas faim.
-
Input:  Buy me a drink.
Expected:  <sos> Payez-moi un coup !
Translation:  donnez-moi un verre !
-
Input:  Do it delicately.
Expected:  <sos> Fais-le délicatement.
Translation:  faites-le de !
-
Input:  No taxi stopped.
Expected:  <sos> Aucun taxi ne s'est arrêté.
Translation:  personne n'est d'un mensonge.
-
Input:  I feel ashamed.
Expected:  <sos> Je me sens honteux.
Translation:  je me sens mal.
-
Input:  Who will