# Attention sample code

In [1]:
from numpy import array
from numpy import random
from numpy import dot
from scipy.special import softmax

In [2]:
# encoder representations of four different words
word_1 = array([1, 0, 0])
word_2 = array([0, 1, 0])
word_3 = array([1, 1, 0])
word_4 = array([0, 0, 1])


In [9]:
# stacking the word embeddings into a single array
words = array([word_1, word_2, word_3, word_4])
print("Words vector: \n")
print(words)
# generating the weight matrices
random.seed(42)
W_Q = random.randint(3, size=(3, 3))
W_K = random.randint(3, size=(3, 3))
W_V = random.randint(3, size=(3, 3))
print("\nW_ vectors - \n")
print(W_Q)
print(W_K)
print(W_V)


Words vector: 

[[1 0 0]
 [0 1 0]
 [1 1 0]
 [0 0 1]]

W_ vectors - 

[[2 0 2]
 [2 0 0]
 [2 1 2]]
[[2 2 2]
 [0 2 1]
 [0 1 1]]
[[1 1 0]
 [0 1 1]
 [0 0 0]]


In [10]:
# generating the queries, keys and values
Q = words @ W_Q
K = words @ W_K
V = words @ W_V

In [12]:
print("Q is --\n")
print(Q)
print("\nK is --\n")
print(K)
print("\nV is --\n")
print(V)

Q is --

[[2 0 2]
 [2 0 0]
 [4 0 2]
 [2 1 2]]

K is --

[[2 2 2]
 [0 2 1]
 [2 4 3]
 [0 1 1]]

V is --

[[1 1 0]
 [0 1 1]
 [1 2 1]
 [0 0 0]]


In [17]:
# scoring the query vectors against all key vectors
scores = Q @ K.transpose()

print(scores)

# computing the weights by a softmax operation
weights = softmax(scores / K.shape[1] ** 0.5, axis=1)

# print("K.shape.. ", K.shape[1])
print(weights)

[[ 8  2 10  2]
 [ 4  0  4  0]
 [12  2 14  2]
 [10  4 14  3]]
[[2.36089863e-01 7.38987555e-03 7.49130386e-01 7.38987555e-03]
 [4.54826323e-01 4.51736775e-02 4.54826323e-01 4.51736775e-02]
 [2.39275049e-01 7.43870015e-04 7.59237211e-01 7.43870015e-04]
 [8.99501754e-02 2.81554063e-03 9.05653685e-01 1.58059922e-03]]


In [18]:
# computing the attention by a weighted sum of the value vectors
attention = weights @ V

print(attention)

[[0.98522025 1.74174051 0.75652026]
 [0.90965265 1.40965265 0.5       ]
 [0.99851226 1.75849334 0.75998108]
 [0.99560386 1.90407309 0.90846923]]


# KQV Concepts

In [1]:
import numpy as np
import pandas as pd

X = np.array([ \
   [1, 1, 1, 0, 0], \
   [3, 3, 3, 0, 0], \
   [4, 4, 4, 0, 0], \
   [5, 5, 5, 0, 0], \
   [0, 2, 0, 4, 4], \
   [0, 0, 0, 5, 5], \
   [0, 1, 0, 2, 2], \
])
pd.DataFrame(X,
    index=['P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7'],
    columns=["Star Wars", "The Matrix", "Iron man", "U got mail", "Titanic"])

Unnamed: 0,Star Wars,The Matrix,Iron man,U got mail,Titanic
P1,1,1,1,0,0
P2,3,3,3,0,0
P3,4,4,4,0,0
P4,5,5,5,0,0
P5,0,2,0,4,4
P6,0,0,0,5,5
P7,0,1,0,2,2


In [2]:
from numpy import linalg as la

np.set_printoptions(precision=2, suppress=True, floatmode='maxprec_equal')

U, s, Vt = la.svd(X, full_matrices=False)
U, s, Vt

(array([[-0.14, -0.02, -0.01,  0.56, -0.38],
        [-0.41, -0.07, -0.03,  0.21,  0.76],
        [-0.55, -0.09, -0.04, -0.72, -0.18],
        [-0.69, -0.12, -0.05,  0.34, -0.23],
        [-0.15,  0.59,  0.65,  0.00,  0.20],
        [-0.07,  0.73, -0.68,  0.00,  0.00],
        [-0.08,  0.30,  0.33,  0.00, -0.40]]),
 array([12.48,  9.51,  1.35,  0.00,  0.00]),
 array([[-0.56, -0.59, -0.56, -0.09, -0.09],
        [-0.13,  0.03, -0.13,  0.70,  0.70],
        [-0.41,  0.80, -0.41, -0.09, -0.09],
        [-0.71,  0.00,  0.71,  0.00,  0.00],
        [ 0.00, -0.00,  0.00, -0.71,  0.71]]))

In [5]:
k = 2
U_k, s_k, Vt_k = U[:, :k], s[:k], Vt[:k, :]
V_k = Vt_k.T
U_k, s_k, V_k

(array([[-0.14, -0.02],
        [-0.41, -0.07],
        [-0.55, -0.09],
        [-0.69, -0.12],
        [-0.15,  0.59],
        [-0.07,  0.73],
        [-0.08,  0.30]]),
 array([12.48,  9.51]),
 array([[-0.56, -0.13],
        [-0.59,  0.03],
        [-0.56, -0.13],
        [-0.09,  0.70],
        [-0.09,  0.70]]))

In [6]:
P8 = np.array([5, 0, 0, 0, 0])
P9 = np.array([0, 4, 5, 0, 0])
cos_sim = P8.dot(P9) / (la.norm(P8) * la.norm(P9))
cos_sim

0.0