In [1]:
import torch
from torch import cuda as cd

In [2]:
torch.cuda.is_available()

True

In [4]:
v1 = cd.FloatTensor([1.0, 1.0])
v2 = cd.FloatTensor([2.0, 2.0])

In [9]:
v1 = torch.tensor([1.0, 1.0], requires_grad=True, dtype=torch.float16)
cv = v1.to('cuda'); cv

tensor([1., 1.], device='cuda:0', dtype=torch.float16,
       grad_fn=<ToCopyBackward0>)

In [13]:
v1 = torch.tensor([1.0, 1.0], requires_grad=True)
v2 = torch.tensor([2.0, 2.0])
v_sum = v1 + v2
v_res = (v_sum*2).sum(); v_res

tensor(12., grad_fn=<SumBackward0>)

In [16]:
print(v1.is_leaf, v2.is_leaf)
#(True, True)
print(v_sum.is_leaf, v_res.is_leaf)
#(False, False)
print(v1.requires_grad)
#True
print(v2.requires_grad)
#False
print(v_sum.requires_grad)
#True
print(v_res.requires_grad)
#True

True True
False False
True
False
True
True


In [17]:
print(v1.grad)
v_res.backward()
v1.grad

None


tensor([2., 2.])

In [20]:
print(v2.grad)

None


In [2]:
import torch.nn as nn
l = nn.Linear(2, 5)
v = torch.FloatTensor([1, 2])
l(v)

tensor([-0.6698,  0.3323, -0.1244, -1.7146, -1.0995], grad_fn=<AddBackward0>)

In [5]:
weights = [x for x in l.parameters()]; weights

[Parameter containing:
 tensor([[ 0.2527, -0.2877],
         [-0.4889,  0.1065],
         [ 0.6992, -0.1670],
         [-0.3522, -0.4921],
         [-0.1100, -0.4584]], requires_grad=True),
 Parameter containing:
 tensor([-0.3471,  0.6083, -0.4896, -0.3781, -0.0727], requires_grad=True)]

In [6]:
l.state_dict()

OrderedDict([('weight',
              tensor([[ 0.2527, -0.2877],
                      [-0.4889,  0.1065],
                      [ 0.6992, -0.1670],
                      [-0.3522, -0.4921],
                      [-0.1100, -0.4584]])),
             ('bias', tensor([-0.3471,  0.6083, -0.4896, -0.3781, -0.0727]))])

In [8]:
s = nn.Sequential(nn.Linear(2, 5),
                  nn.ReLU(),
                  nn.Linear(5, 20),
                  nn.ReLU(),
                  nn.Linear(20, 10),
                  nn.Dropout(p=0.3),
                  nn.Softmax(dim=1))
s(torch.FloatTensor([[1,2]]))

tensor([[0.1110, 0.0803, 0.1124, 0.0810, 0.0609, 0.0864, 0.1110, 0.1351, 0.1110,
         0.1110]], grad_fn=<SoftmaxBackward0>)

In [9]:
class OurModule(nn.Module):
    def __init__(self, num_inputs, num_classes, dropout_prob=0.3):
        super(OurModule, self).__init__()
        self.pipe = nn.Sequential(
            nn.Linear(num_inputs, 5),
            nn.ReLU(),
            nn.Linear(5, 20),
            nn.ReLU(),
            nn.Linear(20, num_classes),
            nn.Dropout(p=dropout_prob),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        return self.pipe(x)


In [10]:
om = OurModule(4, 10)
om.state_dict()

OrderedDict([('pipe.0.weight',
              tensor([[-0.2880, -0.1210, -0.1055, -0.3797],
                      [-0.0099,  0.0426, -0.4343,  0.0116],
                      [-0.4811, -0.0695, -0.4620, -0.0915],
                      [ 0.2280,  0.1384, -0.0990,  0.2991],
                      [-0.4768, -0.3075,  0.4993,  0.2594]])),
             ('pipe.0.bias',
              tensor([ 0.2132,  0.4058, -0.4434,  0.2489,  0.4450])),
             ('pipe.2.weight',
              tensor([[-0.2204,  0.4376, -0.2879, -0.3519, -0.2248],
                      [ 0.4456,  0.4019,  0.3911, -0.0035, -0.1829],
                      [-0.0843,  0.2125, -0.1950,  0.1722, -0.3968],
                      [-0.3543, -0.2289, -0.3621,  0.2675,  0.2198],
                      [-0.2762, -0.3255,  0.3971, -0.2064,  0.4366],
                      [-0.0321,  0.0683,  0.1667,  0.1833, -0.0089],
                      [ 0.2045, -0.1398,  0.3495,  0.0634, -0.1265],
                      [ 0.1199, -0.0534, -0.2717, -0.