In [5]:
import torch 

In [6]:
a = torch.tensor([5.],requires_grad = True)
b = torch.tensor([6.],requires_grad = True) 

In [7]:
y = a**3-b**2   # 3a**2+2b 
y 

tensor([89.], grad_fn=<SubBackward0>)

In [8]:
#dy/da  = 3a**2  ==> 3*25 ==> 75 
# dy/db = -2a   ==> -2*6 ==> -12 

In [9]:
a.grad,b.grad

(None, None)

In [10]:
y.backward()

In [11]:
a.grad

tensor([75.])

In [13]:
b.grad 

tensor([-12.])

In [14]:
W = torch.randn(10,1,requires_grad = True)
b = torch.randn(1,requires_grad = True)

In [16]:
W 

tensor([[-0.0032],
        [-1.0623],
        [ 1.4467],
        [ 0.3075],
        [-0.1497],
        [ 1.6410],
        [ 0.0495],
        [-1.5697],
        [-0.9127],
        [-0.8190]], requires_grad=True)

In [17]:
b 

tensor([-0.5410], requires_grad=True)

In [18]:
x = torch.rand(1,10)
x  # one sample with many feaures (10)

tensor([[0.8916, 0.9958, 0.0325, 0.8517, 0.9184, 0.2756, 0.1850, 0.8500, 0.0706,
         0.6013]])

In [19]:
output = torch.matmul(x,W)+b

In [20]:
output 

tensor([[-2.8598]], grad_fn=<AddBackward0>)

In [21]:
loss = 1-output 

In [22]:
loss

tensor([[3.8598]], grad_fn=<RsubBackward1>)

In [23]:
loss.backward()

In [24]:
W.grad 

tensor([[-0.8916],
        [-0.9958],
        [-0.0325],
        [-0.8517],
        [-0.9184],
        [-0.2756],
        [-0.1850],
        [-0.8500],
        [-0.0706],
        [-0.6013]])

In [25]:
b.grad

tensor([-1.])

In [26]:
# here i dont want to keep the gradient but just the update 
learning_rate = 0.01
with torch.no_grad():
    W = W-learning_rate*W.grad.data

In [27]:
W # new updated weights 

tensor([[ 0.0058],
        [-1.0523],
        [ 1.4471],
        [ 0.3160],
        [-0.1405],
        [ 1.6437],
        [ 0.0514],
        [-1.5612],
        [-0.9120],
        [-0.8130]])

In [28]:
b

tensor([-0.5410], requires_grad=True)

In [30]:
with torch.no_grad():
    b = b-learning_rate*b.grad.data

In [31]:
b

tensor([-0.5310])

In [75]:
BATCH_SIZE = 2
DIM_IN = 10 
HIDDEN_SIZE = 5
DIM_OUT = 3  

class Model(torch.nn.Module):
    def __init__(self, dim_in,dim_out,dim_hidden):
        super(Model, self).__init__() 
        self.layer1 = torch.nn.Linear(dim_in, dim_hidden)
        self.relu = torch.nn.ReLU()
        self.layer2 = torch.nn.Linear(dim_hidden, dim_out)
    
    def forward(self, x):
        x = self.layer1(x)
        x = self.relu(x)
        x = self.layer2(x)
        return x

In [76]:
model_input = torch.randn(size = (BATCH_SIZE,DIM_IN))
model_input

tensor([[ 1.5598, -0.0166,  1.2922, -0.0209,  0.6523,  1.2460, -0.2688,  1.0066,
          0.9079, -1.3793],
        [ 0.6410,  0.1106, -1.1726,  0.1430,  1.0778,  0.0864, -0.6228,  0.3312,
          0.4596, -1.6956]])

In [77]:
model = Model(DIM_IN,DIM_OUT,HIDDEN_SIZE)  

In [78]:
prediction = model(model_input)

In [79]:
model.state_dict()

OrderedDict([('layer1.weight',
              tensor([[-0.2084, -0.1310,  0.2878,  0.1474,  0.0647, -0.0296,  0.1016, -0.0133,
                       -0.0402, -0.1438],
                      [ 0.2922,  0.2071, -0.2646,  0.1377, -0.0151,  0.2158,  0.0112,  0.0149,
                        0.2160, -0.0894],
                      [ 0.1052,  0.0814,  0.2695, -0.2531, -0.2298,  0.2054,  0.2583,  0.2278,
                       -0.2680,  0.2735],
                      [-0.2584,  0.2315, -0.0289, -0.1030,  0.1270, -0.1713,  0.1713, -0.1416,
                        0.1172,  0.1681],
                      [ 0.3119,  0.1895,  0.3134, -0.1926,  0.3028,  0.2430, -0.0471, -0.2896,
                        0.2088,  0.2478]])),
             ('layer1.bias', tensor([0.1538, 0.2298, 0.2887, 0.0395, 0.2933])),
             ('layer2.weight',
              tensor([[-0.2452, -0.2386,  0.1864, -0.1999,  0.4027],
                      [ 0.1814, -0.3270, -0.2416,  0.0841,  0.3197],
                      [-0.0327, 

In [80]:
prediction

tensor([[ 0.5676, -0.1037,  0.7177],
        [ 0.0666, -0.4615,  0.4180]], grad_fn=<AddmmBackward0>)

In [84]:
actual_target = torch.tensor(
    [[-0.4817,  0.7800, -0.1737],
    [ 1.1430, -0.3604,  0.4867]],
    requires_grad=False)

In [85]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

In [86]:
loss = (actual_target - prediction).pow(2).sum()
print(loss)

tensor(3.8502, grad_fn=<SumBackward0>)


In [87]:
loss.backward()

In [88]:
print(model.layer2.weight)
print(model.layer2.weight.grad)

Parameter containing:
tensor([[-0.2452, -0.2386,  0.1864, -0.1999,  0.4027],
        [ 0.1814, -0.3270, -0.2416,  0.0841,  0.3197],
        [-0.0327, -0.0166,  0.2494,  0.2550,  0.1666]], requires_grad=True)
tensor([[ 0.6846, -0.2510,  0.9453,  0.0000,  2.4705],
        [-0.5765, -1.8465, -0.7961,  0.0000, -2.2332],
        [ 0.5815,  1.5141,  0.8030,  0.0000,  2.2268]])


In [89]:
optimizer.step()

In [90]:
print(model.layer2.weight)
print(model.layer2.weight.grad)

Parameter containing:
tensor([[-0.2459, -0.2383,  0.1854, -0.1999,  0.4002],
        [ 0.1820, -0.3251, -0.2408,  0.0841,  0.3219],
        [-0.0333, -0.0181,  0.2486,  0.2550,  0.1643]], requires_grad=True)
tensor([[ 0.6846, -0.2510,  0.9453,  0.0000,  2.4705],
        [-0.5765, -1.8465, -0.7961,  0.0000, -2.2332],
        [ 0.5815,  1.5141,  0.8030,  0.0000,  2.2268]])


In [91]:
optimizer.zero_grad()

In [92]:
print(model.layer2.weight.grad)

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])


In [93]:
x = torch.rand(5, requires_grad=True)
y = x.detach()

print(x)
print(y)

tensor([0.5795, 0.5769, 0.1023, 0.1539, 0.0381], requires_grad=True)
tensor([0.5795, 0.5769, 0.1023, 0.1539, 0.0381])
