# Import  Modules

In [14]:
import numpy as np
import random
import os
import torch
from torch.optim import RMSprop
from torch import nn
from IPython.core.debugger import set_trace

# Define word2vec with pytorch Embedding

In [15]:
class Embedding_embedding(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_1 = nn.Embedding(5,3)
        self.linear_2 = nn.Linear(3,2,bias=False)
    def forward(self,x):
        x = self.linear_1(x)
        x = self.linear_2(x)
        return x

embd_emb = Embedding_embedding()
optim_emb = torch.optim.SGD(embd_emb.parameters(),lr=1)

# Define  word2vec with pytorch Linear

In [16]:
class Embedding_linear(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_1 = nn.Linear(5,3,bias=False)
        self.linear_2 = nn.Linear(3,2,bias=False)
    def forward(self,x):
        x = self.linear_1.weight.data[x.data]
        x = self.linear_2(x)
        return x

embd_lin = Embedding_linear()
optim_lin = torch.optim.SGD(embd_lin.parameters(),lr=1)

In [17]:
print(embd_emb.linear_1.weight)
print(embd_lin.linear_1.weight)

Parameter containing:
tensor([[ 1.0444, -0.4959, -0.9114],
        [ 3.3869,  0.5671, -0.6503],
        [ 0.5815, -0.5658,  0.3803],
        [-0.9690,  0.8647, -0.5627],
        [ 1.7004,  0.6742,  1.4118]], requires_grad=True)
Parameter containing:
tensor([[-0.2777,  0.1487,  0.4403, -0.0120,  0.0886],
        [-0.1846, -0.2153, -0.2824, -0.4455,  0.4130],
        [-0.4275, -0.2174, -0.4104,  0.0337,  0.1484]], requires_grad=True)


# Inintialize both newtork with same parameters

In [18]:
with torch.no_grad():
    tmp = torch.nn.Parameter(embd_emb.linear_1.weight)
    embd_lin.linear_1.weight = tmp
    tmp = torch.nn.Parameter(embd_emb.linear_2.weight)
    embd_lin.linear_2.weight = tmp

In [19]:
print(list(embd_emb.parameters())[0])
print(list(embd_lin.parameters())[0])

Parameter containing:
tensor([[ 1.0444, -0.4959, -0.9114],
        [ 3.3869,  0.5671, -0.6503],
        [ 0.5815, -0.5658,  0.3803],
        [-0.9690,  0.8647, -0.5627],
        [ 1.7004,  0.6742,  1.4118]], requires_grad=True)
Parameter containing:
tensor([[ 1.0444, -0.4959, -0.9114],
        [ 3.3869,  0.5671, -0.6503],
        [ 0.5815, -0.5658,  0.3803],
        [-0.9690,  0.8647, -0.5627],
        [ 1.7004,  0.6742,  1.4118]], requires_grad=True)


# Flow data

### for embd_emb model

In [20]:
x_emb = torch.LongTensor([0,1,2,2,4,1])
ans_emb = embd_emb(x_emb)

y_emb = np.ones([1,2])
loss_emb = ans_emb-torch.Tensor(y_emb)
loss_tmp_emb = loss_emb.detach().numpy().copy()
loss_emb = loss_emb**2
loss_emb = loss_emb.sum()
print(loss_emb)
optim_emb.zero_grad()
loss_emb.backward()



tensor(14.9193, grad_fn=<SumBackward0>)


### for embd_lin model

In [21]:
x_lin = torch.LongTensor([0,1,2,2,4,1])
ans_lin = embd_lin(x_lin)

y_lin = np.ones([1,2])
loss_lin = ans_lin-torch.Tensor(y_lin)
loss_tmp_lin = loss_lin.detach().numpy().copy()
loss_lin = loss_lin**2
loss_lin = loss_lin.sum()
print(loss_lin)
optim_lin.zero_grad()
loss_lin.backward()

tensor(14.9193, grad_fn=<SumBackward0>)


# Calculating Gradient

### Manually

In [22]:

w1 = embd_emb.linear_1._parameters['weight'].detach().numpy().copy()
w2 = embd_emb.linear_2._parameters['weight'].detach().numpy().copy()

grd = 2*np.dot(loss_tmp_emb,w2)
s = embd_emb.linear_1._parameters['weight'].detach().numpy().copy()
rng = list(x_lin.numpy())
for i in rng:
    s[i] = s[i] - grd[i]
print(f'Updated weight after manual backpropagation \n{s}')

Updated weight after manual backpropagation 
[[ 0.7516109   0.603504   -1.8834918 ]
 [ 1.1599792   2.374674   -0.40530527]
 [ 1.2570589   2.9620557  -4.4448185 ]
 [-0.96902066  0.86467224 -0.56268245]
 [ 2.0292573   2.4206758  -0.9710889 ]]


## with autograd

In [23]:
# print(list(embd_emb.parameters())[0].grad[0])
# print(f'with numpy {grd}')

optim_emb.step()
optim_lin.step()

print(f'Updated weights of linear_1 of the first model \n {embd_emb.linear_1._parameters["weight"].detach()}')

print(f'Updated weights of linear_1 of the second model \n {embd_lin.linear_1._parameters["weight"].detach()}')

Updated weights of linear_1 of the first model 
 tensor([[ 0.7516,  0.6035, -1.8835],
        [ 1.1600,  2.3747, -0.4053],
        [ 1.2571,  2.9621, -4.4448],
        [-0.9690,  0.8647, -0.5627],
        [ 2.0293,  2.4207, -0.9711]])
Updated weights of linear_1 of the second model 
 tensor([[ 0.7516,  0.6035, -1.8835],
        [ 1.1600,  2.3747, -0.4053],
        [ 1.2571,  2.9621, -4.4448],
        [-0.9690,  0.8647, -0.5627],
        [ 2.0293,  2.4207, -0.9711]])
