## Evolution of Embedding Layer

### TODO:
- [ ] Add callback 
- [ ] Store training steps
- [ ] Extract embedding weights
- [ ] Calculate SVD
- [ ] Project it on some human percivable space
- [ ] Present the evolution (rotation) of vectors

In [1]:
import tensorflow as tf
from tensorflow import keras
from keras import layers

import numpy as np

In [2]:
num_tokens = 10000
(x_train_raw, y_train ), (x_test_raw, y_test) = keras.datasets.imdb.load_data(num_words=num_tokens)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [3]:
words_dict = keras.datasets.imdb.get_word_index()
len(words_dict)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json


88584

In [4]:
x_train_raw.shape, x_test_raw.shape

((25000,), (25000,))

In [5]:
# Entries (reviews) have different length 
len(x_train_raw[0]), len(x_train_raw[1])

(218, 189)

In [6]:
max_len = 250
x_train = keras.preprocessing.sequence.pad_sequences(x_train_raw, maxlen=max_len)
x_test = keras.preprocessing.sequence.pad_sequences(x_test_raw, maxlen=max_len)


In [7]:

x_train.shape, x_test.shape

((25000, 250), (25000, 250))

In [8]:
# How to convert numpy array to a dataset tensor

In [9]:
train_ds = tf.data.Dataset.from_tensor_slices ((x_train, y_train))
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test))

train_ds = train_ds.shuffle(train_ds.cardinality())
test_ds = test_ds.shuffle(test_ds.cardinality())
batch_size = 32
train_ds = train_ds.batch(batch_size)
test_ds = test_ds.batch(batch_size)

In [10]:
train_ds.cardinality(), test_ds.cardinality()

(<tf.Tensor: shape=(), dtype=int64, numpy=782>,
 <tf.Tensor: shape=(), dtype=int64, numpy=782>)

In [11]:
def create_mode(max_len=max_len, 
                num_tokens=num_tokens,
                embedding_size=16):
  
  """ Create a sequencial vanilla deeplearning model.
  """
  inputs = layers.Input(shape=(None,), dtype=tf.int64)
  x = layers.Embedding(num_tokens, embedding_size)(inputs)
  x = layers.GlobalAvgPool1D()(x)
  x = layers.Dense(units=32, activation='relu')(x)
  x = layers.Dropout(0.2)(x)
  outputs = layers.Dense(units=1, activation='relu')(x)

  model = keras.Model(inputs=inputs, outputs=outputs)

  return model

In [12]:
model = create_mode()

In [13]:
model.compile(optimizer='adam',
              loss="binary_crossentropy",
              metrics=['accuracy'])

In [14]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None)]            0         
                                                                 
 embedding (Embedding)       (None, None, 16)          160000    
                                                                 
 global_average_pooling1d (G  (None, 16)               0         
 lobalAveragePooling1D)                                          
                                                                 
 dense (Dense)               (None, 32)                544       
                                                                 
 dropout (Dropout)           (None, 32)                0         
                                                                 
 dense_1 (Dense)             (None, 1)                 33        
                                                             

In [15]:
callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath='path/to/my/model_{epoch}',
        save_freq='epoch')
]

In [16]:
epochs=10
model.fit(train_ds,
          validation_data=test_ds,
          epochs=epochs,
          callbacks=callbacks)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fb18cf575d0>

In [17]:
embed_l = model.get_layer('embedding_5')

ValueError: ignored

In [None]:
embd_mtx = embed_l.embeddings

In [None]:
embd_mtx.shape

In [None]:
mtx_embd = tf.matmul(embd_mtx, embd_mtx, transpose_a=True)

In [None]:
from numpy import linalg as lng

In [None]:
svd = lng.svd(embd_mtx)

In [None]:
len(svd)

In [None]:
svd[0].shapem 