In [1]:
import torch
from torch import cuda as cd

In [2]:
torch.cuda.is_available()

True

In [4]:
v1 = cd.FloatTensor([1.0, 1.0])
v2 = cd.FloatTensor([2.0, 2.0])

In [9]:
v1 = torch.tensor([1.0, 1.0], requires_grad=True, dtype=torch.float16)
cv = v1.to('cuda'); cv

tensor([1., 1.], device='cuda:0', dtype=torch.float16,
       grad_fn=<ToCopyBackward0>)

In [13]:
v1 = torch.tensor([1.0, 1.0], requires_grad=True)
v2 = torch.tensor([2.0, 2.0])
v_sum = v1 + v2
v_res = (v_sum*2).sum(); v_res

tensor(12., grad_fn=<SumBackward0>)

In [16]:
print(v1.is_leaf, v2.is_leaf)
#(True, True)
print(v_sum.is_leaf, v_res.is_leaf)
#(False, False)
print(v1.requires_grad)
#True
print(v2.requires_grad)
#False
print(v_sum.requires_grad)
#True
print(v_res.requires_grad)
#True

True True
False False
True
False
True
True


In [17]:
print(v1.grad)
v_res.backward()
v1.grad

None


tensor([2., 2.])

In [20]:
print(v2.grad)

None


In [2]:
import torch.nn as nn
l = nn.Linear(2, 5)
v = torch.FloatTensor([1, 2])
l(v)

tensor([-0.6698,  0.3323, -0.1244, -1.7146, -1.0995], grad_fn=<AddBackward0>)

In [5]:
weights = [x for x in l.parameters()]; weights

[Parameter containing:
 tensor([[ 0.2527, -0.2877],
         [-0.4889,  0.1065],
         [ 0.6992, -0.1670],
         [-0.3522, -0.4921],
         [-0.1100, -0.4584]], requires_grad=True),
 Parameter containing:
 tensor([-0.3471,  0.6083, -0.4896, -0.3781, -0.0727], requires_grad=True)]

In [6]:
l.state_dict()

OrderedDict([('weight',
              tensor([[ 0.2527, -0.2877],
                      [-0.4889,  0.1065],
                      [ 0.6992, -0.1670],
                      [-0.3522, -0.4921],
                      [-0.1100, -0.4584]])),
             ('bias', tensor([-0.3471,  0.6083, -0.4896, -0.3781, -0.0727]))])

In [8]:
s = nn.Sequential(nn.Linear(2, 5),
                  nn.ReLU(),
                  nn.Linear(5, 20),
                  nn.ReLU(),
                  nn.Linear(20, 10),
                  nn.Dropout(p=0.3),
                  nn.Softmax(dim=1))
s(torch.FloatTensor([[1,2]]))

tensor([[0.1110, 0.0803, 0.1124, 0.0810, 0.0609, 0.0864, 0.1110, 0.1351, 0.1110,
         0.1110]], grad_fn=<SoftmaxBackward0>)

In [9]:
class OurModule(nn.Module):
    def __init__(self, num_inputs, num_classes, dropout_prob=0.3):
        super(OurModule, self).__init__()
        self.pipe = nn.Sequential(
            nn.Linear(num_inputs, 5),
            nn.ReLU(),
            nn.Linear(5, 20),
            nn.ReLU(),
            nn.Linear(20, num_classes),
            nn.Dropout(p=dropout_prob),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        return self.pipe(x)


In [23]:
om = OurModule(4, 10)
explore = om.state_dict()
print(explore['pipe.0.weight'].requires_grad)
print(explore['pipe.0.weight'].grad)
om.zero_grad()
print(explore['pipe.0.weight'].grad)
explore['pipe.0.weight'].requires_grad = True
explore1 = om.state_dict()
print(explore1['pipe.0.weight'].requires_grad)
explore1['pipe.0.weight'].grad
om.parameters()

False
None
None
False


AttributeError: 'OurModule' object has no attribute 'params'

In [15]:
net = OurModule(num_inputs=2, num_classes=3)
v = torch.FloatTensor([[2, 3]])
out = net(v)
print(net)
print(out)
print(out.sum())

OurModule(
  (pipe): Sequential(
    (0): Linear(in_features=2, out_features=5, bias=True)
    (1): ReLU()
    (2): Linear(in_features=5, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=3, bias=True)
    (5): Dropout(p=0.3, inplace=False)
    (6): Softmax(dim=1)
  )
)
tensor([[0.3223, 0.3256, 0.3521]], grad_fn=<SoftmaxBackward0>)
tensor(1., grad_fn=<SumBackward0>)
