In [1]:
import torch
import numpy as np

# Tensors

In [2]:
def print_tensor(x):
    print("Type: {}".format(x.type()))
    print("Size: {}".format(x.shape))
    print("Values: \n{}".format(x))


* Construct a 3x4 matrix, **uninitialized**:

In [3]:
x = torch.empty(3, 4)
print_tensor(x)

Type: torch.FloatTensor
Size: torch.Size([3, 4])
Values: 
tensor([[5.3150e-37, 0.0000e+00, 3.3631e-44, 0.0000e+00],
        [       nan, 0.0000e+00, 1.1578e+27, 1.1362e+30],
        [7.1547e+22, 4.5828e+30, 1.2121e+04, 7.1846e+22]])


Or:

In [5]:
x = torch.Tensor(3, 4)
print_tensor(x)

Type: torch.FloatTensor
Size: torch.Size([3, 4])
Values: 
tensor([[1.1675e-37, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00]])


* Construct a 3x4 matrix, filled with zero and of dtype long (**default dtype is float**):

In [6]:
x = torch.zeros(3, 4, dtype=torch.long)
print_tensor(x)


Type: torch.LongTensor
Size: torch.Size([3, 4])
Values: 
tensor([[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]])


* Construct a 3x4 matrix filled with 1:

In [7]:
x = torch.ones(3, 4)
print_tensor(x)

Type: torch.FloatTensor
Size: torch.Size([3, 4])
Values: 
tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])


* Construct a matrix from existing one, and the dtype could be override:

In [8]:
y = torch.randn_like(x, dtype=torch.double)
print_tensor(y)

Type: torch.DoubleTensor
Size: torch.Size([3, 4])
Values: 
tensor([[-1.0737,  0.3816,  1.2341,  0.6274],
        [-2.1032, -0.4746,  0.9298, -1.7381],
        [-1.6873,  0.7534, -0.1205,  0.5039]], dtype=torch.float64)


* Construct a randomly initialized matrix:

In [9]:
x = torch.randn(3, 4)
print_tensor(x)

Type: torch.FloatTensor
Size: torch.Size([3, 4])
Values: 
tensor([[-0.1983, -1.2715,  0.3841, -0.5512],
        [-1.7458, -0.4385, -1.0104,  1.0109],
        [ 1.2072, -1.3872,  2.3415, -0.7650]])


* Construct a tensor from data:

In [10]:
x = torch.tensor([1, 2, 3], dtype=torch.double)
print_tensor(x)

Type: torch.DoubleTensor
Size: torch.Size([3])
Values: 
tensor([1., 2., 3.], dtype=torch.float64)


# Operations

* Addition:

In [11]:
x = torch.randn(3, 4)
y = torch.randn(3, 4)
z = torch.add(x, y)
print_tensor(z)

Type: torch.FloatTensor
Size: torch.Size([3, 4])
Values: 
tensor([[-0.3894,  0.6480, -0.4915,  2.0711],
        [-0.1317, -1.4626,  2.0889,  0.5542],
        [-2.6885,  1.3449, -0.6045,  3.2349]])


Or:

In [12]:
print_tensor(x+y)

Type: torch.FloatTensor
Size: torch.Size([3, 4])
Values: 
tensor([[-0.3894,  0.6480, -0.4915,  2.0711],
        [-0.1317, -1.4626,  2.0889,  0.5542],
        [-2.6885,  1.3449, -0.6045,  3.2349]])


Or:

In [13]:
res = torch.empty(3, 4)
torch.add(x, y, out=res)
print_tensor(res)

Type: torch.FloatTensor
Size: torch.Size([3, 4])
Values: 
tensor([[-0.3894,  0.6480, -0.4915,  2.0711],
        [-0.1317, -1.4626,  2.0889,  0.5542],
        [-2.6885,  1.3449, -0.6045,  3.2349]])


Or:

In [14]:
y.add_(x)
print_tensor(y)

Type: torch.FloatTensor
Size: torch.Size([3, 4])
Values: 
tensor([[-0.3894,  0.6480, -0.4915,  2.0711],
        [-0.1317, -1.4626,  2.0889,  0.5542],
        [-2.6885,  1.3449, -0.6045,  3.2349]])


**Note**: Any operation that mutates a tensor in-place is post-fixed with an `_`. For example: `x.copy_(y)`, `x.t_()`, will change `x`.

* Reshape

In [15]:
x = torch.randn(3, 4)
print_tensor(x)
y = x.view(4, 3)
z = x.view(6, -1) # the size -1 is inferred from other dim
print_tensor(y)
print_tensor(z)

Type: torch.FloatTensor
Size: torch.Size([3, 4])
Values: 
tensor([[ 1.2844, -1.3509, -0.1249,  0.0427],
        [-0.2797, -0.1632,  0.7218, -0.3860],
        [ 1.6910, -0.9057,  2.1930,  1.1108]])
Type: torch.FloatTensor
Size: torch.Size([4, 3])
Values: 
tensor([[ 1.2844, -1.3509, -0.1249],
        [ 0.0427, -0.2797, -0.1632],
        [ 0.7218, -0.3860,  1.6910],
        [-0.9057,  2.1930,  1.1108]])
Type: torch.FloatTensor
Size: torch.Size([6, 2])
Values: 
tensor([[ 1.2844, -1.3509],
        [-0.1249,  0.0427],
        [-0.2797, -0.1632],
        [ 0.7218, -0.3860],
        [ 1.6910, -0.9057],
        [ 2.1930,  1.1108]])


# Numpy Bridge

* Convert a Torch Tensor to a Numpy Array

In [16]:
x = torch.randn(3, 4)
print_tensor(x)
y = x.numpy()
print(y)

Type: torch.FloatTensor
Size: torch.Size([3, 4])
Values: 
tensor([[ 0.1591, -1.0089,  0.2497,  0.6450],
        [-1.7070, -0.3038, -0.5330,  1.0889],
        [ 0.2026, -0.2747,  1.1482, -0.4997]])
[[ 0.15912676 -1.00890338  0.24970655  0.64500386]
 [-1.70702636 -0.30384415 -0.53302473  1.08886623]
 [ 0.20256519 -0.27467865  1.14816725 -0.49972969]]


Note that `y` is a reference of `x`, i.e., if the values of either changes, the other will change

In [17]:
x.add_(1)
print_tensor(x)
print(y)
np.add(y, 1, out=y)
print(y)
print_tensor(x)

Type: torch.FloatTensor
Size: torch.Size([3, 4])
Values: 
tensor([[ 1.1591, -0.0089,  1.2497,  1.6450],
        [-0.7070,  0.6962,  0.4670,  2.0889],
        [ 1.2026,  0.7253,  2.1482,  0.5003]])
[[ 1.15912676 -0.00890338  1.24970651  1.6450038 ]
 [-0.70702636  0.69615585  0.46697527  2.08886623]
 [ 1.20256519  0.72532135  2.14816713  0.50027031]]
[[ 2.15912676  0.99109662  2.24970651  2.6450038 ]
 [ 0.29297364  1.69615579  1.46697521  3.08886623]
 [ 2.20256519  1.72532129  3.14816713  1.50027037]]
Type: torch.FloatTensor
Size: torch.Size([3, 4])
Values: 
tensor([[2.1591, 0.9911, 2.2497, 2.6450],
        [0.2930, 1.6962, 1.4670, 3.0889],
        [2.2026, 1.7253, 3.1482, 1.5003]])


* Convert a Numpy Array to a Torch Tensor

In [18]:
x = np.random.randn(3, 4)
print(x)
y = torch.from_numpy(x)
print_tensor(y)

[[-0.32459293  1.14189097  0.3436654   1.18843369]
 [-1.38305871 -0.35803183 -0.56122752 -1.51309858]
 [-0.18600229 -1.10190765 -0.04749221 -0.25770508]]
Type: torch.DoubleTensor
Size: torch.Size([3, 4])
Values: 
tensor([[-0.3246,  1.1419,  0.3437,  1.1884],
        [-1.3831, -0.3580, -0.5612, -1.5131],
        [-0.1860, -1.1019, -0.0475, -0.2577]], dtype=torch.float64)


Similarly, `y` directly refers `x`, if one of them changes, the other will also change

In [19]:
y.add_(1)
print_tensor(y)
print(x)
np.add(x, 1, out=x)
print(x)
print_tensor(y)

Type: torch.DoubleTensor
Size: torch.Size([3, 4])
Values: 
tensor([[ 0.6754,  2.1419,  1.3437,  2.1884],
        [-0.3831,  0.6420,  0.4388, -0.5131],
        [ 0.8140, -0.1019,  0.9525,  0.7423]], dtype=torch.float64)
[[ 0.67540707  2.14189097  1.3436654   2.18843369]
 [-0.38305871  0.64196817  0.43877248 -0.51309858]
 [ 0.81399771 -0.10190765  0.95250779  0.74229492]]
[[ 1.67540707  3.14189097  2.3436654   3.18843369]
 [ 0.61694129  1.64196817  1.43877248  0.48690142]
 [ 1.81399771  0.89809235  1.95250779  1.74229492]]
Type: torch.DoubleTensor
Size: torch.Size([3, 4])
Values: 
tensor([[1.6754, 3.1419, 2.3437, 3.1884],
        [0.6169, 1.6420, 1.4388, 0.4869],
        [1.8140, 0.8981, 1.9525, 1.7423]], dtype=torch.float64)


# CUDA

In [4]:
print(torch.cuda.is_available())
x = torch.Tensor(3, 4).to("cpu")
print("Type: {}".format(x.type()))
x = torch.Tensor(3, 4).to("cuda")
print("Type: {}".format(x.type()))

True
Type: torch.FloatTensor
Type: torch.cuda.FloatTensor


# Automatic Differentiation

In [29]:
x = torch.ones(3, 4, requires_grad=True)
print_tensor(x)
y = x + 2
print_tensor(y)
z = y * y * 3
out = z.mean()
print_tensor(z)
print_tensor(out)

Type: torch.FloatTensor
Size: torch.Size([3, 4])
Values: 
tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]], requires_grad=True)
Type: torch.FloatTensor
Size: torch.Size([3, 4])
Values: 
tensor([[3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.]], grad_fn=<AddBackward0>)
Type: torch.FloatTensor
Size: torch.Size([3, 4])
Values: 
tensor([[27., 27., 27., 27.],
        [27., 27., 27., 27.],
        [27., 27., 27., 27.]], grad_fn=<MulBackward0>)
Type: torch.FloatTensor
Size: torch.Size([])
Values: 
27.0


What we were doing is: 
$$\mathbf{z} = (\mathbf{x}+2)*(\mathbf{x}+2)*3$$
As a result:
$$\frac{\partial{\mathbf{z}}}{\partial{\mathbf{x}}} = 6(\mathbf{x}+2)$$

In [31]:
print_tensor(6*(x+2))

Type: torch.FloatTensor
Size: torch.Size([3, 4])
Values: 
tensor([[18., 18., 18., 18.],
        [18., 18., 18., 18.],
        [18., 18., 18., 18.]], grad_fn=<MulBackward0>)


Alternatively, we can use `backward()` in Pytorch:

In [30]:
z.backward(x)
print_tensor(x.grad)

Type: torch.FloatTensor
Size: torch.Size([3, 4])
Values: 
tensor([[18., 18., 18., 18.],
        [18., 18., 18., 18.],
        [18., 18., 18., 18.]])


# Neural Networks

In [33]:
import torch.nn as nn
import torch.nn.functional as F

## Define the network

In [43]:
class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)),  2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
    
net = Net()
print(net)

params = list(net.parameters())
print(len(params))
print(params[0].size())  # conv1's .weight

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)
10
torch.Size([6, 1, 5, 5])


## Loss function

In [53]:
input = torch.randn(1, 1, 32, 32)
out = net(input)
labels = torch.randn(10)
labels = labels.view(1, -1)
print_tensor(out)
print_tensor(labels)

criterion = nn.MSELoss()
loss = criterion(out, labels)
print_tensor(loss)

Type: torch.FloatTensor
Size: torch.Size([1, 10])
Values: 
tensor([[ 0.0899,  0.0004, -0.0706,  0.0298, -0.0630, -0.0761,  0.0394,  0.0406,
          0.0168, -0.0437]], grad_fn=<AddmmBackward>)
Type: torch.FloatTensor
Size: torch.Size([1, 10])
Values: 
tensor([[-0.8335, -1.2581, -0.7336, -2.0533,  1.3898, -0.4556,  1.7538, -0.5374,
          1.4627, -0.9481]])
Type: torch.FloatTensor
Size: torch.Size([])
Values: 
1.5652341842651367


## Backprop

In [54]:
net.zero_grad()  # zeroes the gradient buffers of all parameters

loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad after backward
tensor([-0.0245, -0.0147, -0.0031,  0.0313, -0.0037,  0.0147])


## Optimizer

In [56]:
import torch.optim as optim

# create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)

# in your training loop:
optimizer.zero_grad()   # zero the gradient buffers
output = net(input)
loss = criterion(output, labels)
loss.backward()
optimizer.step()    # Does the update

P.S. callable object in Python

In [48]:
class Test():
    def __init__(self):
        self.x = 3
    def linear(self, w):
        self.x *= w
        
    def __call__(self, w):
        self.linear(w)

t = Test()
t(4)
print(t.x)

12
