## Evolution of Embedding Layer

### TODO:
- [ ] Add callback 
- [ ] Store training steps
- [ ] Extract embedding weights
- [ ] Calculate SVD
- [ ] Project it on some human percivable space
- [ ] Present the evolution (rotation) of vectors

In [1]:
import tensorflow as tf
from tensorflow import keras
from keras import layers

import numpy as np

In [2]:
num_tokens = 10000
(x_train_raw, y_train ), (x_test_raw, y_test) = keras.datasets.imdb.load_data(num_words=num_tokens)

In [3]:
words_dict = keras.datasets.imdb.get_word_index()
len(words_dict)

88584

In [4]:
x_train_raw.shape, x_test_raw.shape

((25000,), (25000,))

In [5]:
# Entries (reviews) have different length 
len(x_train_raw[0]), len(x_train_raw[1])

(218, 189)

In [6]:
max_len = 250
x_train = keras.preprocessing.sequence.pad_sequences(x_train_raw, maxlen=max_len)
x_test = keras.preprocessing.sequence.pad_sequences(x_test_raw, maxlen=max_len)


In [7]:

x_train.shape, x_test.shape

((25000, 250), (25000, 250))

In [8]:
# How to convert numpy array to a dataset tensor

In [9]:
train_ds = tf.data.Dataset.from_tensor_slices ((x_train, y_train))
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test))

train_ds = train_ds.shuffle(train_ds.cardinality())
test_ds = test_ds.shuffle(test_ds.cardinality())
batch_size = 32
train_ds = train_ds.batch(batch_size)
test_ds = test_ds.batch(batch_size)

In [10]:
train_ds.cardinality(), test_ds.cardinality()

(<tf.Tensor: shape=(), dtype=int64, numpy=782>,
 <tf.Tensor: shape=(), dtype=int64, numpy=782>)

In [59]:
def create_mode(max_len=max_len, 
                num_tokens=num_tokens,
                embedding_size=16):
  
  """ Create a sequencial vanilla deeplearning model.
  """
  inputs = layers.Input(shape=(None,), dtype=tf.int64)
  x = layers.Embedding(num_tokens, embedding_size)(inputs)
  x = layers.GlobalAvgPool1D()(x)
  x = layers.Dense(units=32, activation='relu')(x)
  x = layers.Dropout(0.2)(x)
  outputs = layers.Dense(units=1, activation='relu')(x)

  model = keras.Model(inputs=inputs, outputs=outputs)

  return model

In [60]:
model = create_mode()

In [61]:
model.compile(optimizer='adam',
              loss="binary_crossentropy",
              metrics=['accuracy'])

In [62]:
model.summary()

Model: "model_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_6 (InputLayer)        [(None, None)]            0         
                                                                 
 embedding_5 (Embedding)     (None, None, 16)          160000    
                                                                 
 global_average_pooling1d_5   (None, 16)               0         
 (GlobalAveragePooling1D)                                        
                                                                 
 dense_10 (Dense)            (None, 32)                544       
                                                                 
 dropout_5 (Dropout)         (None, 32)                0         
                                                                 
 dense_11 (Dense)            (None, 1)                 33        
                                                           

In [65]:
epochs=2
model.fit(train_ds,
          validation_data=test_ds,
          epochs=epochs)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7fc6edf46150>

In [74]:
embed_l = model.get_layer('embedding_5')

In [75]:
embd_mtx = embed_l.embeddings

In [77]:
embd_mtx.shape

TensorShape([10000, 16])

In [83]:
mtx_embd = tf.matmul(embd_mtx, embd_mtx, transpose_a=True)

In [84]:
from numpy import linalg as lng

In [89]:
svd = lng.svd(embd_mtx)

In [88]:
len(svd)

3

In [None]:
svd[0].shapem 