<a href="https://colab.research.google.com/github/kartoone/cosc470/blob/main/examples/llm/tf-keras-embeddings.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Setup

In [2]:
import keras
from keras import layers
from keras import ops

You can also create a Sequential model incrementally via the `add()` method:

In [3]:
model = keras.Sequential()
model.add(keras.Input(shape=(5,)))  # 5 words in our vocab
model.add(layers.Dense(2, activation="linear")) # two numbers to represent each word
model.add(layers.Dense(5, activation="softmax")) # 5 words in our vocab
model.summary()

Note that the `Input` object is not displayed as part of `model.layers`, since
it isn't a layer:

In [4]:
model.compile(loss="categorical_crossentropy", optimizer="adam")

In [5]:
import numpy as np

weight_embedding_model = keras.Model(
    inputs=model.inputs,
    outputs=[layer.output for layer in model.layers],
)

# Call feature extractor on test input.
x = np.array([[0., 0., 0., 1., 0.]])
print(x)
features = weight_embedding_model(x)
print(features)



[[0. 0. 0. 1. 0.]]
[<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[-0.17316723,  0.44316173]], dtype=float32)>, <tf.Tensor: shape=(1, 5), dtype=float32, numpy=
array([[0.18476278, 0.1709449 , 0.22722332, 0.19123158, 0.22583748]],
      dtype=float32)>]


Expected: ['keras_tensor']
Received: inputs=Tensor(shape=(1, 5))


Now we need to train ... our training dataset is word for the input and expected word for the output. But the notion of a word is our 1-hot encoding for each word in our vocab ... from the video here is the vocab:

In [6]:
vocab = ["Troll 2","gymkata","is","great","<EOS>"]
# vocabonehot = [[1,0,0,0,0],[0,1,0,0,0],[0,0,1,0,0],[0,0,0,1,0],[0,0,0,0,1]]
from tensorflow.keras.utils import to_categorical
vocabonehot = to_categorical(np.arange(len(vocab)));

# based on just two sentences: "Troll 2 is great" and "Gymkata is great", here is our training dataset
# update this to word with all the sentences you were given
training_inputs = np.array([vocabonehot[0],vocabonehot[1],vocabonehot[2],vocabonehot[2],vocabonehot[2],vocabonehot[3]])
training_outputs = np.array([vocabonehot[2],vocabonehot[2],vocabonehot[3],vocabonehot[3],vocabonehot[3],vocabonehot[4]])
model.fit(training_inputs, training_outputs, batch_size=2, epochs=1000);
features = weight_embedding_model(np.array([vocabonehot[0], vocabonehot[1], vocabonehot[2], vocabonehot[3], vocabonehot[4]]))
print(features)


Epoch 1/1000
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - loss: 1.3618
Epoch 2/1000
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 1.3379 
Epoch 3/1000
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 1.4259
Epoch 4/1000
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 1.3762
Epoch 5/1000
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 1.4694
Epoch 6/1000
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 1.2654
Epoch 7/1000
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 1.4224
Epoch 8/1000
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 1.4166
Epoch 9/1000
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 1.4104
Epoch 10/1000
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 1.302

Expected: ['keras_tensor']
Received: inputs=Tensor(shape=(5, 5))


## Positional encoding

We can do it manually (1st code cell) or Keras has a layer we can just use that takes care of all the details (2nd code cell).


In [15]:
import numpy as np
np.set_printoptions(linewidth=150)
seq_length = 10 # maximum sequence length ... i.e,. we have tiny vocabulary and even tinier sentences
num_hiddens = 2 # we are using two weights in our token embedding
sinx = np.sin(np.arange(1, seq_length))
cosx = np.cos(np.arange(1, seq_length))
#print(sinx)
#print(cosx)

ourembeddings = features[0]
#print(ourembeddings)

# example sentence1: Troll 2 is great ... equivalent to [0 2 3]
# example sentence2: Gymkata is great ... equivalent to [1 2 3]
# example sentence3: great is great ... equivalent to [3 2 3]
sentence1 = np.array([ourembeddings[0], ourembeddings[2], ourembeddings[3]])
sentence2 = np.array([ourembeddings[1], ourembeddings[2], ourembeddings[3]])
sentence3 = np.array([ourembeddings[3], ourembeddings[2], ourembeddings[0]])
sentence4 = np.array([ourembeddings[3], ourembeddings[2], ourembeddings[1]])
sentence5 = np.array([ourembeddings[3], ourembeddings[2], ourembeddings[3]])
#print(sentence1)
#print(sentence2)
#print(sentence3)
#print(sentence4)
#print(sentence5)

positionsvals = np.array([[sinx[0], cosx[0]], [sinx[1], cosx[1]], [sinx[2], cosx[2]]])
#print(positionsvals)

sentencewithpositions1 = sentence1 + positionsvals
sentencewithpositions2 = sentence2 + positionsvals
sentencewithpositions3 = sentence3 + positionsvals
sentencewithpositions4 = sentence4 + positionsvals
sentencewithpositions5 = sentence5 + positionsvals
print(np.array([sentence1, sentencewithpositions1]))
print(np.array([sentence2, sentencewithpositions2]))
print(np.array([sentence3, sentencewithpositions3]))
print(np.array([sentence4, sentencewithpositions4]))
print(np.array([sentence5, sentencewithpositions5]))



[[[-1.81073689  2.03550768]
  [-1.92438602 -1.31159711]
  [ 1.04118395  1.4141407 ]]

 [[-0.96926591  2.57580998]
  [-1.0150886  -1.72774395]
  [ 1.18230396  0.4241482 ]]]
[[[-0.95909899  1.9877938 ]
  [-1.92438602 -1.31159711]
  [ 1.04118395  1.4141407 ]]

 [[-0.11762801  2.52809611]
  [-1.0150886  -1.72774395]
  [ 1.18230396  0.4241482 ]]]
[[[ 1.04118395  1.4141407 ]
  [-1.92438602 -1.31159711]
  [-1.81073689  2.03550768]]

 [[ 1.88265493  1.95444301]
  [-1.0150886  -1.72774395]
  [-1.66961689  1.04551518]]]
[[[ 1.04118395  1.4141407 ]
  [-1.92438602 -1.31159711]
  [-0.95909899  1.9877938 ]]

 [[ 1.88265493  1.95444301]
  [-1.0150886  -1.72774395]
  [-0.81797899  0.99780131]]]
[[[ 1.04118395  1.4141407 ]
  [-1.92438602 -1.31159711]
  [ 1.04118395  1.4141407 ]]

 [[ 1.88265493  1.95444301]
  [-1.0150886  -1.72774395]
  [ 1.18230396  0.4241482 ]]]


In [None]:
import keras_hub

layer = keras_hub.layers.PositionEmbedding(sequence_length=10)


In [19]:
import keras
import keras_hub

seq_length = 10
vocab_size = 5
embed_dim = 2
inputs = keras.Input(shape=(seq_length,))
token_embeddings = keras.layers.Embedding(
    input_dim=vocab_size, output_dim=embed_dim
)(inputs)
position_embeddings = keras_hub.layers.PositionEmbedding(
    sequence_length=seq_length
)(token_embeddings)
outputs = token_embeddings + position_embeddings
model = keras.Model(inputs=inputs, outputs=outputs, name="keras_ce_model")

vocab = ["What","is","great","<EOS>"]

print(vocabonehot)

vocabonehot = [[1,0,0,0,0],[0,1,0,0,0],[0,0,1,0,0],[0,0,0,1,0],[0,0,0,0,1]]

# based on just two sentences: "Troll 2 is great" and "Gymkata is great", here is our training dataset
training_inputs = np.array([vocabonehot[0],vocabonehot[1],vocabonehot[2],vocabonehot[2],vocabonehot[2],vocabonehot[3]])
training_outputs = np.array([vocabonehot[2],vocabonehot[2],vocabonehot[3],vocabonehot[3],vocabonehot[3],vocabonehot[4]])
model.fit(training_inputs, training_outputs, batch_size=2, epochs=1000);


[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]


ValueError: You must call `compile()` before using the model.