In [1]:
import torch

# torch.empty(size): unintialized
x = torch.empty(1) # scalar
print("empty(1):", x)
x = torch.empty(3)  #vector
print("empty(3):", x)
x = torch.empty(2, 3) # matrix
print("empty(2,3):", x)
x = torch.empty(2, 2, 3)  # tensor, 3 dimensions
#x = torch.empty(2, 2, 2, 3) # tensor, 3 dimensions
print("empty(2, 2, 3):", x)

#torch.rand(size): random numbers [0, 1]
x = torch.rand(5, 3)
print("rand(5, 3):", x)

# torch.zeros(size, fill with 0
# torch.ones(size), fill with 1
x = torch.zeros(5, 3)
print("zeros(5, 3):", x)



empty(1): tensor([1.6928e+22])
empty(3): tensor([-4.5750e-37,  3.3179e-41, -4.5797e-37])
empty(2,3): tensor([[-4.5830e-37,  3.3179e-41, -4.5827e-37],
        [ 3.3179e-41,  1.1652e-32,  2.5353e+30]])
empty(2, 2, 3): tensor([[[-1.9865e-07,  4.5489e-41, -3.9918e-37],
         [ 3.3179e-41,  4.4842e-44,  0.0000e+00]],

        [[ 1.1210e-43,  0.0000e+00,  1.7912e+03],
         [ 3.3172e-41,  1.4013e-45,  0.0000e+00]]])
rand(5, 3): tensor([[0.3141, 0.0712, 0.8346],
        [0.6552, 0.7213, 0.2897],
        [0.3214, 0.7029, 0.6014],
        [0.2480, 0.4307, 0.8913],
        [0.3695, 0.9863, 0.0286]])
zeros(5, 3): tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])


In [2]:
# check size
print("size", x.size()) # x.size(0)
print("shape", x.shape) # x.shape[0]

size torch.Size([5, 3])
shape torch.Size([5, 3])


In [3]:
# check data type
print(x.dtype)

#specify types, float32 default
x = torch.zeros(5, 3, dtype=torch.float16)
print(x)

#check type
print(x.dtype)

torch.float32
tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]], dtype=torch.float16)
torch.float16


In [4]:
# construct from data
x = torch.tensor([5.5, 3])
print(x, x.dtype)

tensor([5.5000, 3.0000]) torch.float32


In [5]:
# requires_grad argument
# This will tell pytorch that it will need to calculate the gradient for this tensor
# later in your optimization steps
# i.e. this is a variable in your model that you want to optimize
x = torch.tensor([5.5, 3], requires_grad=True)
print(x)


tensor([5.5000, 3.0000], requires_grad=True)


In [6]:
# Operations
x = torch.ones(2, 2)
y = torch.rand(2, 2)

#elementwise addition
z = x + y
# torch.add(x, y)

# in place addition, everything with a trailing underscore is an inplace operation
# i.e. it will modify the variable
# y.add_x(x)

print(x)
print(y)
print(z)

tensor([[1., 1.],
        [1., 1.]])
tensor([[0.1559, 0.1493],
        [0.4920, 0.5613]])
tensor([[1.1559, 1.1493],
        [1.4920, 1.5613]])


In [7]:
# subtraction
z = x - y
z = torch.sub(x, y)

#multiplication
z = x * y
z = torch.mul(x, y)

# division
z = x / y
z = torch.div(x, y)

In [8]:
# Slicing
x = torch.rand(5, 3)
print(x)
print("x[:, 0]", x[:, 0])  # all rows, column 0
print("x[1, :]", x[1, :])  # row 1, all columns
print("x[1, 1]", x[1, 1])  #  element at 1, 1

# Get the actual value if only 1 elment in your tensor
print("x[1,1].item()", x[1, 1].item())

tensor([[0.1139, 0.6005, 0.6915],
        [0.8331, 0.1322, 0.2562],
        [0.3927, 0.2311, 0.2957],
        [0.8159, 0.1810, 0.4467],
        [0.3680, 0.7948, 0.5824]])
x[:, 0] tensor([0.1139, 0.8331, 0.3927, 0.8159, 0.3680])
x[1, :] tensor([0.8331, 0.1322, 0.2562])
x[1, 1] tensor(0.1322)
x[1,1].item() 0.1321641206741333


In [9]:
# Reshape with torch.view()
x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8) # the size -1 is inferred from other dimensions
# if -1 it pytorch will automatically determine the necessary size
print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


In [10]:
a = torch.ones(5)
print(a)

# torch to numpy with .numpy()
b = a.numpy()
print(b)
print(type(b))

tensor([1., 1., 1., 1., 1.])
[1. 1. 1. 1. 1.]
<class 'numpy.ndarray'>


In [11]:
# Careful:  If the tensor is on the cpu (not the GPU)
# both objects will share the same memory location, so changing one
# will also change the other
a.add_(1)
print(a)
print(b)

tensor([2., 2., 2., 2., 2.])
[2. 2. 2. 2. 2.]


In [12]:
# numpy to torch with.from_numpy(x), or torch.tensor() to copy it
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
c = torch.tensor(a)
print(a)
print(b)
print(c)

#again be careful when modifying
a += 1
print(a)
print(b)
print(c)


[1. 1. 1. 1. 1.]
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)
[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)


In [13]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#x = x.to("cpu")
#x = x.to("cuda")

x = torch.rand(2,2, device=device)  # or directly create them on GPU


In [1]:
import torch

# requires_grad = True -> tracks all operations on he tensor.
x = torch.randn(3, requires_grad=True)
y = x + 2

# y was created as result of an operation, so it has a ggrad_fn attribute
# grad_fn: reference a Function that has created the Tensor
print(x)  # created by the user -> grad_fn is None
print(y)
print(y.grad_fn)

tensor([ 0.3894, -0.2876, -0.2883], requires_grad=True)
tensor([2.3894, 1.7124, 1.7117], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x7c7f9565cdc0>


In [2]:
# Do more operations on y
z = y * y * 3
print(z)
z = z.mean()
print(z)

tensor([17.1278,  8.7971,  8.7897], grad_fn=<MulBackward0>)
tensor(11.5715, grad_fn=<MeanBackward0>)


In [3]:
# Let's compute the gradients with backpropagation
# When we finish our computation we can call .backward() and have all the gradients computed automatically.
# The gradient for this tensor will be accumlated into .grad attribute.
# It is the partial derivate of the function w.r.t. the tensor

print(x.grad)
z.backward()
print(x.grad) # dz/dx

# !!! Careful!!! backward() accumulates the gradient for this tensor into .grad attribute.
# !!! We need to be careful during optimzation !!! optimizer.zero_grad()

None
tensor([4.7788, 3.4248, 3.4234])


Stop a tensor from tracking history:

For example during the training loop when we want to updte our weights, or after trainingg during evaluation.  These operations should not be part of the gradient computation.  To prevent this, wecan use:
x.reuires_grad_(False)
x.detach()
wrap in with torch.no_grad();

In [4]:
# .requires_grad_(...). changes an existing flag in-place.
a = torch.randn(2, 2)
b = (a * a).sum()
print(a.requires_grad)
print(b.grad_fn)

a.requires_grad_(True)
b = (a * a).sum()
print(a.requires_grad)
print(b.grad_fn)


False
None
True
<SumBackward0 object at 0x7c7f9565d7b0>


In [5]:
# .detach():  get a new Tensor with the same content but no gradient computation:
a = torch.randn(2, 2, requires_grad=True)
b = a.detach()
print(a.requires_grad)
print(b.requires_grad)

True
False


In [6]:
# wrap in 'with torch.no_grad():'
a = torch.randn(2, 2, requires_grad=True)
print(a.requires_grad)
with torch.no_grad():
  b = a ** 2
  print(b.requires_grad)


True
False
