<a href="https://colab.research.google.com/github/maaniaxs/Deep-Learning/blob/main/Language_Translation_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# BUILDING MODEL_ZERO
# eng-to-spanish Translation with Keras_NLP

In [None]:
# KerasNLP provides building blocks for NLP (model layers, tokenizers, metrics, etc.) and makes it convenient to construct NLP pipelines

In [None]:
#import keras_nlp
import numpy as np
import pathlib
import random
import tensorflow as tf
from tensorflow import keras
#from tensorflow_text.tools.wordpiece_vocab import bert_vocab_from_dataset as bert_vocab

In [None]:
!pip install keras_nlp
import keras_nlp
from tensorflow_text.tools.wordpiece_vocab import bert_vocab_from_dataset as bert_vocab

In [None]:
BATCH_S = 64             # Let's also define our parameters/hyperparameters.
MAX_SEQ_LENGTH = 40
ENG_VOCAB_SIZE = 15000
SPA_VOCAB_SIZE = 15000
EMBED_DIM = 256
INTERMEDIATE_DIM = 2048
NUM_HEADS = 8

In [None]:
#Downloading the data. We'll be working with an English-to-Spanish translation dataset provided by Anki. Let's download it
text_file = keras.utils.get_file(
    fname="spa-eng.zip",
    origin="http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip",
    extract=True )
text_file = pathlib.Path(text_file).parent / "spa-eng" / "spa.txt"

Downloading data from http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip


In [None]:
#Parsing the data
# Each line contains an English sentence and its corresponding Spanish sentence. The English sentence is the source sequence and Spanish one 
# is the target sequence. Before adding the text to a list, we convert it to lowercase
with open(text_file) as f:
    lines = f.read().split("\n")[:-1]
text_pairs = []
for line in lines:
    eng, spa = line.split("\t")
    eng = eng.lower()
    spa = spa.lower()
    text_pairs.append((eng, spa))

In [None]:
len(text_pairs), text_pairs[0:5]

(118964,
 [('go.', 've.'),
  ('go.', 'vete.'),
  ('go.', 'vaya.'),
  ('go.', 'váyase.'),
  ('hi.', 'hola.')])

In [None]:
for _ in range(5):
    print(random.choice(text_pairs))     #Here's what our sentence pairs look like:

In [None]:
# Now, let's split the sentence pairs into a training set, a validation set, and a test set.
random.shuffle(text_pairs)
num_val_samples = int(0.15 * len(text_pairs))
num_train_samples = len(text_pairs) - 2 * num_val_samples
train_pairs = text_pairs[:num_train_samples]
val_pairs = text_pairs[num_train_samples : num_train_samples + num_val_samples]
test_pairs = text_pairs[num_train_samples + num_val_samples :]

print(f"{len(text_pairs)} total pairs")
print(f"{len(train_pairs)} training pairs")
print(f"{len(val_pairs)} validation pairs")
print(f"{len(test_pairs)} test pairs")

118964 total pairs
83276 training pairs
17844 validation pairs
17844 test pairs


In [None]:
# Tokenizing the data
# We'll define two tokenizers - one for the source language (English), and the other for the target language (Spanish). 
# We'll be using keras_nlp.tokenizers.WordPieceTokenizer to tokenize the text. keras_nlp.tokenizers.WordPieceTokenizer takes a WordPiece 
# vocabulary and has functions for tokenizing the text, and detokenizing sequences of tokens.
def train_word_piece(text_samples, vocab_size, reserved_tokens):
    bert_vocab_args = dict(
        vocab_size=vocab_size, # The target vocabulary size
        reserved_tokens=reserved_tokens, # Reserved tokens that must be included in the vocabulary
        bert_tokenizer_params={"lower_case": True},)  # Arguments for `text.BertTokenizer`
    
    word_piece_ds = tf.data.Dataset.from_tensor_slices(text_samples)
    vocab = bert_vocab.bert_vocab_from_dataset( word_piece_ds.batch(1000).prefetch(2), **bert_vocab_args)
    return vocab

In [None]:
# "[PAD]" - Padding token. Padding tokens are appended to the input sequence length when the input sequence length is shorter than the maximum sequence length.
# "[UNK]" - Unknown token.
# "[START]" - Token that marks the start of the input sequence.
# "[END]" - Token that marks the end of the input sequence.
reserved_tokens = ["[PAD]", "[UNK]", "[START]", "[END]"]
eng_samples = [text_pair[0] for text_pair in train_pairs]
eng_vocab = train_word_piece(eng_samples, ENG_VOCAB_SIZE, reserved_tokens)
spa_samples = [text_pair[1] for text_pair in train_pairs]
spa_vocab = train_word_piece(spa_samples, SPA_VOCAB_SIZE, reserved_tokens)

In [None]:
len(spa_samples), spa_samples[:3], len(eng_vocab)

(83276,
 ['ella es amable con él.',
  'mira aquel edificio.',
  'tom pudo estar disfrazado.',
  'el almuerzo de tom incluye un sándwich y una manzana.',
  'el que no está satisfecho con poco, no está satisfecho con nada.',
  'papá noel estaba parado en el jardín.'],
 3620)

In [None]:
print("English Tokens: ", eng_vocab[100:110])        #Let's see some tokens!
print("Spanish Tokens: ", spa_vocab[100:110])

English Tokens:  ['re', 'how', 'll', 'did', 'very', 'as', 'had', 'all', 'here', 'about']
Spanish Tokens:  ['del', 'estaba', 'quiero', 'tengo', 'fue', 'aqui', 'casa', 'cuando', 'hacer', '##n']


In [None]:
# Now, let's define the tokenizers. We will configure the tokenizers with the the vocabularies trained above
eng_tokenizer = keras_nlp.tokenizers.WordPieceTokenizer( vocabulary=eng_vocab, lowercase=False )

spa_tokenizer = keras_nlp.tokenizers.WordPieceTokenizer( vocabulary=spa_vocab, lowercase=False )

In [None]:
eng_input_ex = text_pairs[1][0]            # For Example
eng_tokens_ex = eng_tokenizer.tokenize(eng_input_ex)
print("English-sentence: ", eng_input_ex)
print("Tokens: ", eng_tokens_ex)
print("Text after detokenizing: ", eng_tokenizer.detokenize(eng_tokens_ex))
print()
spa_input_ex = text_pairs[1][1]
spa_tokens_ex = spa_tokenizer.tokenize(spa_input_ex)
print("Spanish-sentence: ", spa_input_ex)
print("Tokens: ", spa_tokens_ex)
print("Text after detokenizing: ", spa_tokenizer.detokenize(spa_tokens_ex))

English-sentence:  the student body is opposed to the new rules.
Tokens:  tf.Tensor([  57  607 1291   61 2862   58   57  187  852   12], shape=(10,), dtype=int32)
Text after detokenizing:  tf.Tensor(b'the student body is opposed to the new rules .', shape=(), dtype=string)

Spanish-sentence:  el cuerpo estudiantil está en contra de las nuevas normas.
Tokens:  tf.Tensor([  64 1632 1824 4383   73   68  509   63   87 2910 4599   15], shape=(12,), dtype=int32)
Text after detokenizing:  tf.Tensor(b'el cuerpo estudiantil esta en contra de las nuevas normas .', shape=(), dtype=string)


In [None]:
#Format datasets,     Next, we'll format our datasets.
# At each training step, the model will seek to predict target words N+1 (and beyond) using the source sentence and the target words 0 to N.
# 1. inputs is a dictionary with the keys encoder_inputs and decoder_inputs. encoder_inputs is the tokenized source sentence and decoder_inputs is 
#   the target sentence "so far", that is to say, the words 0 to N used to predict word N+1 (and beyond) in the target sentence.
# 2. target is the target sentence offset by one step: it provides the next words in the target sentence -- what the model will try to predict.
# 3. We will add special tokens, "[START]" and "[END]", to the input Spanish sentence after tokenizing the text. 
#   We will also pad the input to a fixed length. This can be easily done using

In [None]:
def preprocess_batch(eng, spa):
    batch_size = tf.shape(spa)[0]      # ENG_VOCAB_SIZE = 15000, SPA_VOCAB_SIZE = 15000,  MAX_SEQUENCE_LENGTH = 40
    eng = eng_tokenizer(eng)
    spa = spa_tokenizer(spa)
    # PADDING `eng` to `MAX_SEQUENCE_LENGTH`.
    eng_start_end_packer = keras_nlp.layers.StartEndPacker(
        sequence_length=MAX_SEQ_LENGTH,
        pad_value = eng_tokenizer.token_to_id("[PAD]"),) #eng_tokenizer.token_to_id("[PAD]") = 0
    eng = eng_start_end_packer(eng)

    # Add special tokens (`"[START]"` and `"[END]"`) to `spa` and padding it as well.
    spa_start_end_packer = keras_nlp.layers.StartEndPacker(
        sequence_length=MAX_SEQ_LENGTH + 1,  # 40+1
        start_value=spa_tokenizer.token_to_id("[START]"),  # eng_tokenizer.token_to_id("[START]") = 2
        end_value=spa_tokenizer.token_to_id("[END]"),    # eng_tokenizer.token_to_id("[END]") = 3
        pad_value=spa_tokenizer.token_to_id("[PAD]"), )   # eng_tokenizer.token_to_id("[PAD]") = 0
    spa = spa_start_end_packer(spa)

    return ({ "encoder_inputs": eng, "decoder_inputs": spa[:, :-1] }, spa[:, 1:],)

def make_dataset(pairs):
    # eng_texts, spa_texts = zip(pairs)  # it's not work
    eng_texts, spa_texts = zip(*pairs)
    eng_texts = list(eng_texts)
    spa_texts = list(spa_texts)
    dataset = tf.data.Dataset.from_tensor_slices((eng_texts, spa_texts))
    dataset = dataset.batch(BATCH_S)
    dataset = dataset.map(preprocess_batch, num_parallel_calls=tf.data.AUTOTUNE)
    return dataset.shuffle(2048).prefetch(16).cache()

train_ds = make_dataset(train_pairs)
val_ds = make_dataset(val_pairs)

In [None]:
train_ds

<CacheDataset element_spec=({'encoder_inputs': TensorSpec(shape=(None, 40), dtype=tf.int32, name=None), 'decoder_inputs': TensorSpec(shape=(None, 40), dtype=tf.int32, name=None)}, TensorSpec(shape=(None, 40), dtype=tf.int32, name=None))>

In [None]:
eng_tokenizer.token_to_id("[END]"), eng_tokenizer.token_to_id("[PAD]"), eng_tokenizer.token_to_id("[START]"), 

(3, 0, 2)

In [None]:
# Let's take a quick look at the sequence shapes (we have batches of 64 pairs, and all sequences are 40 steps long):
for inputs, targets in train_ds.take(1):
    print(f'inputs["encoder_inputs"].shape: {inputs["encoder_inputs"].shape}')
    print(f'inputs["decoder_inputs"].shape: {inputs["decoder_inputs"].shape}')
    print(f"targets.shape: {targets.shape}")

inputs["encoder_inputs"].shape: (64, 40)
inputs["decoder_inputs"].shape: (64, 40)
targets.shape: (64, 40)


# Building The Model_1
* English to french

In [None]:
# Encoder
encoder_inputs = keras.Input(shape=(None,), dtype="int64", name="encoder_inputs")     # MAX_SEQ_LENGTH = 40
x = keras_nlp.layers.TokenAndPositionEmbedding(vocabulary_size=ENG_VOCAB_SIZE,
    sequence_length=MAX_SEQ_LENGTH, embedding_dim=256, mask_zero=True,)(encoder_inputs)  # EMBED_DIM = 256, 

encoder_outputs = keras_nlp.layers.TransformerEncoder(
    intermediate_dim=2048, num_heads=NUM_HEADS)(inputs=x)     # INTERMEDIATE_DIM = 2048, NUM_HEADS = 8 
encoder = keras.Model(encoder_inputs, encoder_outputs)

# Decoder
decoder_inputs = keras.Input(shape=(None,), dtype="int64", name="decoder_inputs")
encoded_seq_inputs = keras.Input(shape=(None, 256), name="decoder_state_inputs")

x = keras_nlp.layers.TokenAndPositionEmbedding(
    vocabulary_size=SPA_VOCAB_SIZE, sequence_length=MAX_SEQ_LENGTH,
    embedding_dim=256, mask_zero=True,)(decoder_inputs)
  
x = keras_nlp.layers.TransformerDecoder(
    intermediate_dim=2048, num_heads=NUM_HEADS )(x, encoded_seq_inputs)     #(decoder_sequence=x, encoder_sequence=encoded_seq_inputs)
x = keras.layers.Dropout(0.3)(x)   #x = keras.layers.Dropout(0.5)(x)
decoder_outputs = keras.layers.Dense(SPA_VOCAB_SIZE, activation="softmax")(x)
decoder = keras.Model( [decoder_inputs, encoded_seq_inputs,], decoder_outputs,)

decoder_outputs = decoder([decoder_inputs, encoder_outputs])

transformer = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs, name="transformer",)

In [None]:
#transformer.summary()
transformer.compile("rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
transformer.fit(train_ds, epochs=2, validation_data=val_ds)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7fa0b7626820>

In [None]:
def decode_sequences(input_sentences):
    batch_size = tf.shape(input_sentences)[0]
    # Tokenize the encoder input.
    encoder_input_tokens = eng_tokenizer(input_sentences).to_tensor(shape=(None, MAX_SEQ_LENGTH))
    # Define a function that outputs the next token's probability given the
    # input sequence.
    def token_probability_fn(decoder_input_tokens):
        return transformer([encoder_input_tokens, decoder_input_tokens])[:, -1, :]

    # Set the prompt to the "[START]" token.
    prompt = tf.fill((batch_size, 1), spa_tokenizer.token_to_id("[START]"))

    generated_tokens = keras_nlp.utils.greedy_search(
        token_probability_fn, prompt, max_length = 40,
        end_token_id=spa_tokenizer.token_to_id("[END]"),)
    generated_sentences = spa_tokenizer.detokenize(generated_tokens)
    return generated_sentences

test_eng_texts = [pair[0] for pair in test_pairs[:6]]
for i in range(4):
    input_sentence = random.choice(test_eng_texts)
    translated = decode_sequences(tf.constant([input_sentence]))
    translated = translated.numpy()[0].decode("utf-8")
    translated = ( translated.replace("[PAD]", "")
        .replace("[START]", "")
        .replace("[END]", "")
        .strip() )
    print(f"** Example {i} **")
    print(input_sentence)
    print(translated)
    print()

** Example 0 **
i don't want to go to boston with you.
no quiero ir a boston contigo .

** Example 1 **
they own a lot of land.
ellos soloon un monton de arumpresentan .

** Example 2 **
that's unnecessary.
eso es despresentar .

** Example 3 **
that's unnecessary.
eso es despresentar .



# BUILDING  MODEL_2


In [None]:
!pip install keras_nlp

In [3]:
import pathlib
import numpy as np
import random
import string
import re
import keras
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.layers import TextVectorization
import keras_nlp

In [5]:
#Downloading the data. We'll be working with an English-to-Spanish translation dataset provided by Anki. Let's download it
text_file = keras.utils.get_file( fname="spa-eng.zip",
    origin="http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip", extract=True )
text_file = pathlib.Path(text_file).parent / "spa-eng" / "spa.txt"

Downloading data from http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip


In [39]:
# Each line contains an English sentence and its corresponding Spanish sentence. The English sentence is the source sequence and Spanish one 
#  is the target sequence.

text = text_file.read_text(encoding='utf-8')
lines = text.splitlines()

eng = [l.split('\t')[0] for l in lines]
spa = [l.split('\t')[1] for l in lines]
text_pairs = [l.split('\t') for l in lines]

In [40]:
len(eng), len(spa)

(118964, 118964)

In [41]:
text_pairs[10:40:4]

[['Fire!', '¡Disparad!'],
 ['Jump!', '¡Salta!'],
 ['Stop!', '¡Pare!'],
 ['Go on.', 'Continúe.'],
 ['I try.', 'Lo intento.'],
 ['Smile.', 'Sonríe.'],
 ['Go now.', 'Ve ahora mismo.'],
 ['He ran.', 'Él corrió.']]

In [47]:
!wget  http://www.manythings.org/anki/fra-eng.zip

--2023-01-16 17:41:50--  http://www.manythings.org/anki/fra-eng.zip
Resolving www.manythings.org (www.manythings.org)... 173.254.30.110
Connecting to www.manythings.org (www.manythings.org)|173.254.30.110|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 6720195 (6.4M) [application/zip]
Saving to: ‘fra-eng.zip’


2023-01-16 17:41:50 (31.7 MB/s) - ‘fra-eng.zip’ saved [6720195/6720195]



In [48]:
!unzip --qq '/content/fra-eng.zip'

In [53]:
with open('/content/fra.txt') as f:
  f_file = f.read()

f_lines = f_file.splitlines()
text_pairs = [l.split('\t')[:2] for l in f_lines]

In [55]:
text_pairs[9]

['Run!', 'File !']

In [56]:
# Now, let's split the sentence pairs into a training set, a validation set, and a test set.
random.shuffle(text_pairs)
num_val_samples = int(0.15 * len(text_pairs))
num_train_samples = len(text_pairs) - 2 * num_val_samples
train_pairs = text_pairs[:num_train_samples]
val_pairs = text_pairs[num_train_samples : num_train_samples + num_val_samples]
test_pairs = text_pairs[num_train_samples + num_val_samples :]

print(f"{len(text_pairs)} total pairs")
print(f"{len(train_pairs)} training pairs")
print(f"{len(val_pairs)} validation pairs")
print(f"{len(test_pairs)} test pairs")

197463 total pairs
138225 training pairs
29619 validation pairs
29619 test pairs


In [91]:
# Vectorizing the text data
#textVectorization layer.
strip_chars = string.punctuation + "¿"
strip_chars = strip_chars.replace("[", "")
strip_chars = strip_chars.replace("]", "")

vocab_size, sequence_length, batch_size = 15000, 20, 96

def custom_standardization(input_string):
    lowercase = tf.strings.lower(input_string)
    return tf.strings.regex_replace(lowercase, "[%s]" % re.escape(strip_chars), "")

eng_vectorization = TextVectorization(
    max_tokens= vocab_size, output_mode="int", output_sequence_length=20,)
fra_vectorization = TextVectorization(max_tokens= 30000, output_mode="int",
    output_sequence_length = 20 + 1, standardize = custom_standardization,)

train_eng_texts = [pair[0] for pair in train_pairs]
train_fra_texts = [pair[1] for pair in train_pairs]
eng_vectorization.adapt(train_eng_texts)
fra_vectorization.adapt(train_fra_texts)

In [88]:
# checking french vocabulary size
fra_vect = TextVectorization(max_tokens= None, output_mode="int",
    output_sequence_length = 20 + 1, standardize = custom_standardization,)
fra_vect.adapt(train_fra_texts)
len(fra_vect.get_vocabulary())

31110

In [59]:
len(eng_vectorization.get_vocabulary()), len(fra_vectorization.get_vocabulary())

(14285, 15000)

In [92]:
def format_dataset(eng, fra):
    eng = eng_vectorization(eng)
    fra = fra_vectorization(fra)
    return ({"encoder_inputs": eng, "decoder_inputs": fra[:, :-1],}, fra[:, 1:])

def make_dataset(pairs):
    eng_texts, fra_texts = zip(*pairs)
    eng_texts = list(eng_texts)
    fra_texts = list(fra_texts)
    dataset = tf.data.Dataset.from_tensor_slices((eng_texts, fra_texts))
    dataset = dataset.batch(batch_size)
    dataset = dataset.map(format_dataset)
    return dataset.shuffle(2048).prefetch(16).cache()

train_ds = make_dataset(train_pairs)
val_ds = make_dataset(val_pairs)

In [80]:
# Making a PositionalEmbedding layer Class
class PositionalEmbedding(layers.Layer):
    def __init__(self, sequence_length, vocab_size, embed_dim, **kwargs):
        super(PositionalEmbedding, self).__init__(**kwargs)
        self.token_embeddings = layers.Embedding( input_dim=vocab_size, output_dim=embed_dim)
        self.position_embeddings = layers.Embedding( input_dim=sequence_length, output_dim=embed_dim)
        self.sequence_length = sequence_length
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim

    def call(self, inputs):
        length = tf.shape(inputs)[-1]
        positions = tf.range(start=0, limit=length, delta=1)
        embedded_tokens = self.token_embeddings(inputs)
        embedded_positions = self.position_embeddings(positions)
        return embedded_tokens + embedded_positions

    def compute_mask(self, inputs, mask=None):
        return tf.math.not_equal(inputs, 0)

In [93]:
# THIRD MODEL = 
embed_dim = 256
latent_dim = 2048
num_heads = 8

encoder_inputs = keras.Input(shape=(None,), dtype="int64", name="encoder_inputs")
x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(encoder_inputs)
encoder_outputs = keras_nlp.layers.TransformerEncoder( latent_dim, num_heads)(x)
encoder = keras.Model(encoder_inputs, encoder_outputs)

decoder_inputs = keras.Input(shape=(None,), dtype="int64", name="decoder_inputs")
encoded_seq_inputs = keras.Input(shape=(None, embed_dim), name="decoder_state_inputs")
x = PositionalEmbedding(sequence_length, 30000, embed_dim)(decoder_inputs)
x = keras_nlp.layers.TransformerDecoder( latent_dim, num_heads)(x, encoded_seq_inputs)
x = layers.Dropout(0.3)(x)
decoder_outputs = layers.Dense(30000, activation="softmax")(x)
decoder = keras.Model([decoder_inputs, encoded_seq_inputs], decoder_outputs)

decoder_outputs = decoder([decoder_inputs, encoder_outputs])
transformer = keras.Model( [encoder_inputs, decoder_inputs], decoder_outputs, name="transformer")

In [65]:
transformer.summary()

Model: "transformer"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 encoder_inputs (InputLayer)    [(None, None)]       0           []                               
                                                                                                  
 positional_embedding (Position  (None, None, 256)   4101120     ['encoder_inputs[0][0]']         
 alEmbedding)                                                                                     
                                                                                                  
 decoder_inputs (InputLayer)    [(None, None)]       0           []                               
                                                                                                  
 transformer_encoder (Transform  (None, None, 256)   1315072     ['positional_embedding[

In [98]:
#transformer.summary()
transformer.compile( "rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

transformer.fit(train_ds, epochs=2, validation_data=val_ds,)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f47d8c890d0>

In [95]:

fra_vocab = fra_vectorization.get_vocabulary()
fra_index_lookup = dict(zip(range(len(fra_vocab)), fra_vocab))
max_decoded_sentence_length = 20

def decode_sequence(input_sentence):
    tokenized_input_sentence = eng_vectorization([input_sentence])
    decoded_sentence = "[start]"
    for i in range(max_decoded_sentence_length):
        tokenized_target_sentence = fra_vectorization([decoded_sentence])[:, :-1]
        predictions = transformer([tokenized_input_sentence, tokenized_target_sentence])

        sampled_token_index = np.argmax(predictions[0, i, :])
        sampled_token = fra_index_lookup[sampled_token_index]
        decoded_sentence += " " + sampled_token

        if sampled_token == "[end]":
            break
    return decoded_sentence

In [103]:
eng_texts = [l[0] for l in text_pairs]

for _ in range(10):
  input_sent = random.choice(eng_texts)
  translated = decode_sequence(input_sent)
  translate = translated.replace('[start]','')
  print( input_sent,':',translate)

Is this Tom's car? :  cette voiture  de                
Tom forgot to pay his rent. :  a oublié de payer son ici              
Don't even think about it. :  pas même à ce sujet               
They all objected to his proposal. :  toute sa proposition                 
I want to go back to doing what I was doing before you interrupted me. :  je veux aller à faire ce que je voulais que vous me voulez que je me faire un coup je
He drowned in the river. :  dans la rivière                 
Will you please come with me? :  je vous prie                 
Did you hear what we said? :  ce que nous avons dit               
I swore I'd never do that. :  je navais jamais fait ça               
Did you take a shower today? :  une douche aujourdhui                 
