# Self-Attention cacultate with numpy

In [1]:
import numpy as np
from scipy.special import softmax

## Input

In [2]:
input_1 = np.array([1, 0, 1, 0], dtype='float32')
input_2 = np.array([0, 2, 0, 2], dtype='float32')
input_3 = np.array([1, 1, 1, 1], dtype='float32')

## Weight

In [3]:
wk = np.array([[0, 0, 1],
             [1, 1, 0],
             [0, 1, 0],
             [1, 1, 0]], dtype='float32')
wq = np.array([[1, 0, 1],
             [1, 0, 0],
             [0, 0, 1],
             [0, 1, 1]], dtype='float32')
wv = np.array([[0, 2, 0],
             [0, 3, 0],
             [1, 0, 3],
             [1, 1, 0]], dtype='float32')

## Calculate Key Representations

### Calculate for input

In [4]:
print(input_1.dot(wk))
print(input_2.dot(wk))
print(input_3.dot(wk))

[0. 1. 1.]
[4. 4. 0.]
[2. 3. 1.]


### Calculate for inputs

In [5]:
inputs = np.vstack([input_1,input_2,input_3])
print(inputs)

[[1. 0. 1. 0.]
 [0. 2. 0. 2.]
 [1. 1. 1. 1.]]


In [6]:
inputs.dot(wk) # same with "compute KR"

array([[0., 1., 1.],
       [4., 4., 0.],
       [2., 3., 1.]], dtype=float32)

## Calculate QKV representations

In [7]:
query_representations = inputs.dot(wq)
key_representations = inputs.dot(wk)
value_representations = inputs.dot(wv)
query_representations_dim = np.array([float(query_representations.shape[0])**0.5],dtype='float32')

In [8]:
print(query_representations,"\n")
print(key_representations,"\n")
print(value_representations,"\n")
print(query_representations_dim)

[[1. 0. 2.]
 [2. 2. 2.]
 [2. 1. 3.]] 

[[0. 1. 1.]
 [4. 4. 0.]
 [2. 3. 1.]] 

[[1. 2. 3.]
 [2. 8. 0.]
 [2. 6. 3.]] 

[1.7320508]


## Calculate attention scores

![](https://picb.zhimg.com/80/v2-752c1c91e1b4dbca1b64f59a7e026b9b_720w.jpg)

In [9]:
softmax(np.divide(query_representations.dot(key_representations.transpose()),query_representations_dim),axis=1)\
    .dot(value_representations)

array([[1.8638741 , 6.3193707 , 1.7041886 ],
       [1.9991105 , 7.8141265 , 0.27347228],
       [1.9925548 , 7.479635  , 0.73587704]], dtype=float32)