In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim



criterion = nn.MSELoss()

W1 = torch.tensor([[2.0,1.0],[0.0,-1.0]], requires_grad=True)
W2 = torch.tensor([[-1.0,2.0],[1.0,2.0]], requires_grad=True)
X  = torch.tensor([1.0,2.0])
T  = torch.tensor([0.0,0.0])


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(2,2,bias=False)
        self.fc2 = nn.Linear(2,2,bias=False)
        self.ReLU = nn.ReLU()
    def forward(self, x):
        S2 = self.fc1(x)
        #print("S2 is:", S2)
        Y2 = self.ReLU(S2)
        y = self.ReLU(self.fc2(Y2))
        return y

net = Net()
# nn.linear computes X*A^T. Thus, we provided transposed value
net.fc1.weight = torch.nn.Parameter(torch.transpose(W1,0,1)) 
net.fc2.weight = torch.nn.Parameter(torch.transpose(W2,0,1))


print(net)
print("Initial parameter:", list(net.parameters()))

Y3 = net(X)

print("Y3 is:", Y3)

optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.5)
loss = criterion(Y3, T)
loss.backward()

print("W1.grad:", net.fc1.weight.grad)
print("W2.grad:", net.fc2.weight.grad)



Net(
  (fc1): Linear(in_features=2, out_features=2, bias=False)
  (fc2): Linear(in_features=2, out_features=2, bias=False)
  (ReLU): ReLU()
)
Initial parameter: [Parameter containing:
tensor([[ 2.,  0.],
        [ 1., -1.]], requires_grad=True), Parameter containing:
tensor([[-1.,  1.],
        [ 2.,  2.]], requires_grad=True)]
Y3 is: tensor([0., 4.], grad_fn=<ReluBackward0>)
W1.grad: tensor([[ 8., 16.],
        [ 0.,  0.]])
W2.grad: tensor([[0., 0.],
        [8., 0.]])


In [7]:
optimizer.step()

print("new parameter:", list(net.parameters()))
Y3 = net(X)
print("Y3 after 1st training is:", Y3)

loss = criterion(Y3, T)
loss.backward()

optimizer.step()

Y3 = net(X)
print("Y3 after 2nd training is:", Y3)

loss = criterion(Y3, T)
loss.backward()

optimizer.step()

Y3 = net(X)
print("Y3 after 3rd training is:", Y3)

new parameter: [Parameter containing:
tensor([[ 1.9200, -0.1600],
        [ 1.0000, -1.0000]], requires_grad=True), Parameter containing:
tensor([[-1.0000,  1.0000],
        [ 1.9200,  2.0000]], requires_grad=True)]
Y3 after 1st training is: tensor([0.0000, 3.0720], grad_fn=<ReluBackward0>)
Y3 after 2nd training is: tensor([0.0000, 1.2345], grad_fn=<ReluBackward0>)
Y3 after 3rd training is: tensor([0., 0.], grad_fn=<ReluBackward0>)
