### Build a Model, Input to BERT Layer is Embedded (Not Tokens Ids)

In [1]:
import tensorflow as tf
from transformers import AutoTokenizer
from transformers import TFBertForMaskedLM
from transformers import TFCamembertForMaskedLM

In [2]:
### Hyperparameters

vocabSize = 32005
batchSize = 1
sequenceSize = 32
hiddenDimension = 768

In [3]:
### Generate a Tensor of tokens_ids

tokenizer = AutoTokenizer.from_pretrained("jplu/tf-camembert-base", do_lower_case=True)
sentence = "Elle se situe au cœur d'un vaste bassin sédimentaire aux sols fertiles et au climat tempéré Elle se situe au cœur d'un vaste bassin"
tokens = tokenizer.tokenize(sentence)
tokens_ids = tokenizer.convert_tokens_to_ids(tokens)
tensor = tf.convert_to_tensor(tokens_ids)
tensor = tf.expand_dims(tensor, 0)
tensor

<tf.Tensor: shape=(1, 32), dtype=int32, numpy=
array([[  109,    48,  3685,    36,   766,    18,    11,    59,  2615,
         3633,    52, 12279, 19464,    68,  7498, 24377,    10,    14,
           36,  3287, 22208,   141,   109,    48,  3685,    36,   766,
           18,    11,    59,  2615,  3633]], dtype=int32)>

In [4]:
# ### Check BERT output

# bertLayer = TFCamembertForMaskedLM.from_pretrained("jplu/tf-camembert-base")
# bertLayer(tensor)

In [5]:
# ### generate a random tensor
# randTensor = tf.random.uniform(shape=[batchSize, sequenceSize, hiddenDimension])

In [6]:
# ### Check BERT output

# bertLayer(None, inputs_embeds=randTensor)

In [7]:
# inp = tf.keras.Input(shape=(32), dtype='int32')
# x = TFCamembertForMaskedLM.from_pretrained("jplu/tf-camembert-base")(inp)[0]
# x = tf.keras.layers.Reshape((sequenceSize*vocabSize,))(x)
# out = tf.keras.layers.Dense(2, activation='softmax')(x)

# model = tf.keras.Model(inp, out)

In [8]:
# inp = tf.keras.Input(shape=(sequenceSize, hiddenDimension), batch_size=batchSize, dtype='float32')
# x = bertLayer(None, inputs_embeds=inp)[0]
# x = tf.keras.layers.Reshape((sequenceSize*vocabSize,))(x)
# out = tf.keras.layers.Dense(2, activation='softmax')(x)

# model = tf.keras.Model(inp, out)

In [9]:
# model(randTensor)

### Build a Model With a Linear Projection at the Beginning

In [10]:
tensor = tf.zeros(shape=[batchSize, sequenceSize, vocabSize])

In [11]:
### Build a Linear Layer to Embed the tokens_ids

linearProj = tf.keras.layers.Dense(768, input_shape=(vocabSize,), use_bias=False)
linearProj(tensor)  # execute the layer

<tf.Tensor: shape=(1, 32, 768), dtype=float32, numpy=
array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]], dtype=float32)>

In [12]:
### generate a random tensor
randTensor = tf.random.uniform(shape=[batchSize, sequenceSize, hiddenDimension])

In [13]:
bertLayer = TFCamembertForMaskedLM.from_pretrained("jplu/tf-camembert-base")
bertLayer(None, inputs_embeds=randTensor)

(<tf.Tensor: shape=(1, 32, 32005), dtype=float32, numpy=
 array([[[18.29583   , -4.882077  ,  6.7804275 , ..., -6.1650743 ,
          -3.6798992 ,  0.74201536],
         [-2.2358382 , -4.3069005 ,  9.268164  , ..., -5.5183606 ,
          -3.5315247 , -4.1858497 ],
         [-2.215941  , -4.8831463 ,  3.6801977 , ..., -6.437811  ,
          -4.0220213 , -3.5282693 ],
         ...,
         [-2.3257854 , -4.4981136 ,  9.450423  , ..., -4.9840775 ,
          -4.489959  , -3.028567  ],
         [-2.9122112 , -5.208738  ,  5.2395935 , ..., -4.728059  ,
          -3.7446043 , -2.3865955 ],
         [-0.60326767, -3.964406  , 13.500547  , ..., -5.69354   ,
          -3.5518374 , -2.7256484 ]]], dtype=float32)>,)

In [14]:
### Build the Experimental Model

inp = tf.keras.Input(shape=(sequenceSize, vocabSize), batch_size=batchSize, dtype='float32')
x = linearProj(inp)
x = bertLayer(None, inputs_embeds=x)[0]
x = tf.keras.layers.Reshape((sequenceSize*vocabSize,))(x)
out = tf.keras.layers.Dense(2, activation='softmax')(x)

modelExp = tf.keras.Model(inp, out)

In [15]:
modelExp(tensor)

<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[3.5208745e-05, 9.9996483e-01]], dtype=float32)>

### Try to Build The Input for the Experimental Model

In [18]:
### build the input one-hot encoding the sentence
tokens = tokenizer.tokenize(sentence)
tokens_ids = tokenizer.convert_tokens_to_ids(tokens)
tensor = tf.convert_to_tensor(tokens_ids)
tensor = tf.expand_dims(tf.one_hot(tensor, vocabSize), 0)
tensor

<tf.Tensor: shape=(1, 32, 32005), dtype=float32, numpy=
array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]], dtype=float32)>

In [20]:
### run the model on the input
modelExp(tensor)

<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[9.821886e-06, 9.999902e-01]], dtype=float32)>