In [3]:
import numpy as np
import torch.nn as nn
import torch.nn.functional as fun

In [4]:
x = np.random.rand(2, 3)
x

array([[0.88451896, 0.23431214, 0.0164485 ],
       [0.21165959, 0.9029372 , 0.03248161]])

scalar vector multiplication

In [5]:
scalar = np.array([4])
vector = np.array([1, 3, 4])
print(np.einsum('i,j->j', scalar, vector))

[ 4 12 16]


vector vector multiplications 

In [6]:

a =np.array([1,2,3])
b =np.array([2,4,6])

np.einsum("i,i->i",b,a)

array([ 2,  8, 18])

outer product

In [7]:
a = np.array([1,2,3])
b = np.array([4,5,6,7])
np.einsum('i,j->ij', a, b)

array([[ 4,  5,  6,  7],
       [ 8, 10, 12, 14],
       [12, 15, 18, 21]])

scalar dot product

In [9]:

a=np.array([1])
b=[[1,2,3],[1,2,3]]
np.einsum("i,ji->j", a, b)

array([6, 6])

hadamard product

In [10]:
mat1 = [[1,2,3],[4,5,6]]
mat2 = [[1,2,3],[4,5,6]]
np.einsum("ij,ij->ij",mat1,mat2)

array([[ 1,  4,  9],
       [16, 25, 36]])

batch matrix multiplication

In [11]:
a = np.random.rand(3,2,5)
b = np.random.rand(3,5,3)
print(a)
print(b)
np.einsum('ijk,ikl->ijl', a, b)

[[[0.02363698 0.78474126 0.48448156 0.77948616 0.00503479]
  [0.59257715 0.65172718 0.61540574 0.37461867 0.99148532]]

 [[0.29557621 0.84687723 0.76557815 0.21480959 0.89426274]
  [0.12291257 0.11604237 0.86010112 0.08415184 0.39132709]]

 [[0.04317219 0.26670241 0.68399287 0.00426782 0.93406356]
  [0.34218962 0.81114794 0.595427   0.86873046 0.57108231]]]
[[[0.09217265 0.67572691 0.51839897]
  [0.59081522 0.45806263 0.01400993]
  [0.10005867 0.07219301 0.75584302]
  [0.8739995  0.27299913 0.46897194]
  [0.57244481 0.39026294 0.15495932]]

 [[0.84857597 0.0632298  0.31300607]
  [0.00136672 0.80180091 0.11581563]
  [0.57091111 0.4652273  0.29699919]
  [0.05265029 0.57620774 0.50946241]
  [0.96129469 0.38122161 0.25114544]]

 [[0.79595128 0.24772567 0.04308973]
  [0.98434684 0.8654122  0.963551  ]
  [0.56514583 0.72207578 0.47817573]
  [0.29345239 0.60052015 0.47883423]
  [0.83848875 0.6794922  0.89262334]]]


array([[[1.198445  , 0.62517291, 0.75577688],
        [1.39623359, 1.23259074, 1.1107977 ]],

       [[1.56001319, 1.51857124, 0.75200228],
        [0.97611181, 0.69862842, 0.44851345]],

       [[1.46790055, 1.37264889, 1.42172096],
        [2.14109739, 2.12642601, 2.00678529]]])

tensor reductoin

In [12]:
a = np.random.rand(2,17,5,7)
b = np.random.rand(11,2,4,17,6)
np.einsum('pqrs,tpwqm->rstwm', a, b).shape

(5, 7, 11, 4, 6)

transpose

In [13]:
print(np.einsum("ij -> ji", x))

[[0.88451896 0.21165959]
 [0.23431214 0.9029372 ]
 [0.0164485  0.03248161]]


bilinear transformation

In [14]:
a = np.random.rand(2,3)
b = np.random.rand(5,3,7)
c = np.random.rand(2,7)
np.einsum('ik,jkl,il->ij', a, b, c)

array([[0.35742739, 0.28114535, 0.35212185, 0.35913054, 0.38305359],
       [3.32357751, 3.30512635, 3.1321536 , 2.92282341, 2.93239034]])

attention

In [15]:
def random_tensors(shape, num=1, requires_grad=False):
  tensors = [np.random.randn(*shape) for i in range(0, num)]
  return tensors[0] if num == 1 else tensors


bM, br, w = random_tensors([7], num=3)
WY, Wh, Wr, Wt = random_tensors([7, 7], num=4)

print(bM, br, w)

print(WY, Wh, Wr, Wt)

def attention(Y, ht, rt1):

  tmp = np.einsum('ik,kl->il', ht, Wh) + np.einsum('ik,kl->il', rt1, Wr) 
  Mt = np.tanh(np.matmul(Y, WY) + np.expand_dims(tmp, 1) + bM)
  at = np.exp(np.matmul(Mt, w)) / np.sum(np.exp(np.matmul(Mt, w)), axis=1, keepdims=True)
  rt = np.matmul(at[..., None, :], Y).squeeze() + np.tanh(np.matmul(rt1, Wt) + br)
 
  return rt, at

Y = np.array(random_tensors([3, 5, 7]))

ht, rt1 = random_tensors([3, 7], num=2)

print(ht)

print(rt1)

rt, at = attention(Y, ht, rt1)
at

[ 0.54525604 -0.27784639 -0.11273398  0.82310793  1.19382036 -0.71378252
  1.24514436] [ 0.35559636  0.24041619 -0.14980234 -0.08475141 -0.65788139 -0.96367643
  1.72564576] [-0.12715336 -0.25769324  1.1194503   0.30696943 -0.08127405 -0.39876806
  0.93069977]
[[ 0.75648276 -0.63839277 -1.55395     0.52498994 -0.74532338  0.12855529
  -1.52860707]
 [ 1.09736599  0.18261152  0.10471982  0.78906012  0.41359494  0.85339721
   2.28340045]
 [ 0.81048805 -0.68469533  0.52293137  1.8374563   0.65609048 -1.32393416
  -0.7652057 ]
 [ 0.28456047  0.19607011  0.7987256   0.76135369  1.19902144 -0.20751731
  -0.62130552]
 [-1.31615452  0.03666693 -0.95449482  1.06569776 -0.89244692 -3.48232827
  -0.13772567]
 [ 0.67809262 -0.61307767 -1.96916992 -0.50282375  0.46376154  0.96089962
   0.57224004]
 [-0.25238106  2.05442012  0.73814155 -1.33113198 -1.44145177  0.33394468
   0.65387864]] [[ 1.45546185e+00  8.29269310e-01 -1.59547700e-01  5.67207978e-01
  -9.36396394e-01 -1.26608770e+00  6.02600918e-01

array([[0.01997156, 0.01109191, 0.90755958, 0.02108139, 0.04029556],
       [0.0827044 , 0.59950065, 0.15848615, 0.12086911, 0.0384397 ],
       [0.04828252, 0.23702554, 0.31273627, 0.22862465, 0.17333102]])

tree qn

In [16]:
b = random_tensors([5, 3], requires_grad=True)
W = random_tensors([5, 3, 3], requires_grad=True)

print(b)
print(W)

def transition(zl):
  return zl[:, None, :] + np.tanh(np.einsum("bk,aki->bai", zl, W) + b)

zl = random_tensors([2, 3])
zl
transition(zl)
zl

[[ 0.50821123  0.90014936 -0.60385227]
 [ 0.69215488 -1.78768829  0.02466198]
 [-1.50874976 -1.85839351  0.11481778]
 [ 0.23102821 -0.8458811  -0.74272413]
 [ 0.1285724  -0.7369467  -2.34553577]]
[[[-0.37412187 -0.06134974  1.00581008]
  [ 1.16790923  0.51516494 -0.95070361]
  [ 0.62769131 -0.01614966 -0.66740877]]

 [[ 0.84755028  0.7800709  -0.29999518]
  [ 0.38099461  0.53642316  0.16811752]
  [-0.72684879 -0.25713111  0.6501279 ]]

 [[-0.02188734 -1.68761097 -0.65145989]
  [ 1.5826556  -1.40498171  2.03272291]
  [ 0.47953743 -0.69015786 -0.94683652]]

 [[ 0.15653062  0.10790695 -1.11619871]
  [ 0.40417991 -0.75444407 -0.16326943]
  [-0.84275165  0.36765215  0.16296428]]

 [[ 0.05230584 -1.11286927 -1.4455519 ]
  [-0.48383917 -1.53105119 -0.5308803 ]
  [-0.61200337 -1.00404899  0.02235195]]]


array([[ 2.85093752,  1.24563175,  0.40124155],
       [-1.52450013,  0.358082  , -0.16257073]])