<a href="https://colab.research.google.com/github/juanpajedrez/tensorflow_learning/blob/main/Notebook_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
import numpy as np

In [2]:
# Matrices defined in numpy
A = np.array([[2, 6, 5, 2],
              [2, -2, 2, 3],
              [1, 5, 4, 0]])

B = np.array([[2, 9, 0, 3, 0],
              [3, 6, 8, -2, 2],
              [1, 3, 5, 0, 1],
              [3, 0, 2, 0, 5]])

In [3]:
print(A.shape)
print(B.shape)

(3, 4)
(4, 5)


In [4]:
#Lets print now the matrix multiplication
print(np.matmul(A, B))

[[33 69 77 -6 27]
 [ 9 12  0 10 13]
 [21 51 60 -7 14]]


In [5]:
#Lets print now the einsum
print(np.einsum("ij, jk -> ik", A, B))

[[33 69 77 -6 27]
 [ 9 12  0 10 13]
 [21 51 60 -7 14]]


### Lets look how to use einsum for element wise multiplication

In [6]:
#Make sure the matrices are the same shape
A = np.array([[2, 6, 5, 2],
              [2, -2, 2, 3],
              [1, 5, 4, 0]])

B = np.array([[2, 9, 0, 3],
              [3, 6, 8, -2],
              [1, 3, 5, 0]])
print(A.shape)
print(B.shape)

(3, 4)
(3, 4)


In [7]:
#Use numpy for element wise multiplication
print(A*B)

[[  4  54   0   6]
 [  6 -12  16  -6]
 [  1  15  20   0]]


In [8]:
#Use einsum for element wise multiplication
print(np.einsum("ij, ij -> ij", A, B))

[[  4  54   0   6]
 [  6 -12  16  -6]
 [  1  15  20   0]]


### Lets do the einsum for the matrix transpose

In [9]:
A = np.array([[2, 6, 5, 2],
              [2, -2, 2, 3],
              [1, 5, 4, 0]])
#Print the A transpose of this
print(A.T)

[[ 2  2  1]
 [ 6 -2  5]
 [ 5  2  4]
 [ 2  3  0]]


In [10]:
# Now using the einsum
print(np.einsum("ij -> ji", A))

[[ 2  2  1]
 [ 6 -2  5]
 [ 5  2  4]
 [ 2  3  0]]


### Lets perform summing across specific dimensions

In [11]:
A = np.array([[2, 6, 5, 2],
              [2, -2, 2, 3],
              [1, 5, 4, 0]])

#Using numpy
print(np.sum(A, axis = 0))

#Now using einsum
print(np.einsum("ij -> j", A))

[ 5  9 11  5]
[ 5  9 11  5]


In [12]:
#Using numpy
print(np.sum(A, axis = 1))

#Now using einsum
print(np.einsum("ij -> i", A))

[15  5 10]
[15  5 10]


### LETS NOW apply all of these principles for 3D arrays

In [13]:
#Lets crete two 3D arrays
A = np.array([
      [[2, 6, 5, 2],
       [2, -2, 2, 3],
       [1, 5, 4, 0]],

      [[1, 3, 1, 22],
       [0, 2, 2, 0],
       [1, 5, 4, 1]]])
B = np.array([
      [[2, 9, 0, 3, 0],
       [3, 6, 8, -2, 2],
       [1, 3, 5, 0, 1],
       [3, 0, 2, 0, 5]],

      [[1, 0, 0, 3, 0],
       [3, 0, 4, -2, 2],
       [1, 0, 2, 0, 0],
       [3, 0, 1, 1, 0]]])

In [14]:
#Lets see the shape
#b -> Batch size,
#j -> Columns
#i -> Rows

print(A.shape)
print(B.shape)

(2, 3, 4)
(2, 4, 5)


In [15]:
# Lets print the batch matrix multiplication
print(np.matmul(A, B))


[[[33 69 77 -6 27]
  [ 9 12  0 10 13]
  [21 51 60 -7 14]]

 [[77  0 36 19  6]
  [ 8  0 12 -4  4]
  [23  0 29 -6 10]]]


In [16]:
# Now lets do it with einsum
print(np.einsum("bij, bjk -> bik", A, B))

[[[33 69 77 -6 27]
  [ 9 12  0 10 13]
  [21 51 60 -7 14]]

 [[77  0 36 19  6]
  [ 8  0 12 -4  4]
  [23  0 29 -6 10]]]


### Lets perform the SUM of all

In [17]:
#Lets crete two 3D arrays
A = np.array([
      [[2, 6, 5, 2],
       [2, -2, 2, 3],
       [1, 5, 4, 0]],

      [[1, 3, 1, 22],
       [0, 2, 2, 0],
       [1, 5, 4, 1]]])

In [18]:
#Using numpy
print(np.sum(A))

72


In [19]:
#Using np.einsum
print(np.einsum("bij ->", A))

72


### Attention calculation: K and Q
Simulating the Key and Q calculation for attention, we would use the np.einsum for this.

In [20]:
# Q = batchsize, s_q, modelsize
# K = batchsize, s_k, modelsize

#s_q = sequence length q
#s_k = sequence length k

In [21]:
Q = np.random.randn(32, 64, 512) #b, q, m shape
K = np.random.randn(32, 128, 512) #b, k, m shape

In [26]:
### Obtain the Key transpose SPECIFIC to cols and rows
K_transpose = np.einsum("bkm -> bmk", K)

In [23]:
# Now do the np.einsum (BOTH with Q and K directly and transpose)
# bqm @ bmk -> (qm) * (mk) while the b are the same.
print(np.einsum("bqm, bmk -> bqk", Q, K_transpose).shape)

(32, 64, 128)


In [24]:
# Now do the np.einsum (BOTH with Q and K directly and transpose)
print(np.einsum("bqm, bkm -> bqk", Q, K).shape)

(32, 64, 128)


### Efficient transformer: Attention based on specific and previous chunks.

In [27]:
#Lets simulate what happens when splitting chunks into smaller chunks for
#attention in an efficient transformer. First thing is to simulate data
A = np.random.randn(2, 4, 8)
B = np.random.randn(2, 4, 4)

In [31]:
# First, lets split them (simulating chunk transformer splitting, into smaller
# chunks).
A = np.reshape(A, (2, 4, 4, 2)) # Dimensions bcij
B = np.reshape(B, (2, 4, 4, 1)) # Dimensions bcik

In [39]:
#Transpose B accordingly
B_transpose = np.einsum("bcik -> bcki", B)

In [36]:
# Now lets use the einsum
print(np.einsum("bcik, bcij -> bckj", B, A).shape)
print(np.einsum("bcki, bcij -> bckj", B_transpose, A).shape)

(2, 4, 1, 2)
(2, 4, 1, 2)


In [38]:
#Lets now do it with np.... this is hectic
np.matmul(np.transpose(B, (0, 1, 3, 2)), A).shape

(2, 4, 1, 2)