<a href="https://colab.research.google.com/github/teias-courses/dl99/blob/gh-pages/resources/pytorch_basics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import torch

# Tensor attributes

In [2]:
x = torch.tensor([[4., 5., 6.]])

print('x:', x)
print('x.ndim:', x.ndim)
print('x.shape:', x.shape)
print('x.size():', x.size())
print('x.dtype:', x.dtype)
print('x.device:', x.device)

x: tensor([[4., 5., 6.]])
x.ndim: 2
x.shape: torch.Size([1, 3])
x.size(): torch.Size([1, 3])
x.dtype: torch.float32
x.device: cpu


In [3]:
y = torch.tensor([[4, 5, 6.]], dtype=torch.long, device=torch.device('cuda:0'))
print('y:', y)
print('y.dtype:', y.dtype)
print('y.device:', y.device)

y: tensor([[4, 5, 6]], device='cuda:0')
y.dtype: torch.int64
y.device: cuda:0


In [4]:
x_long = x.to(torch.long)   # to() returns a copy if conversion needed
print('x_long.dtype:', x_long.dtype)

x_long = x.long()           # alias
print('x_long.dtype:', x_long.dtype)

x_long.dtype: torch.int64
x_long.dtype: torch.int64


In [5]:
x_gpu = x.to(torch.device('cuda:0'))  # to() returns a copy if conversion needed
print('x_gpu.device:', x_gpu.device)

x_gpu = x.cuda()                      # alias
print('x_gpu.device:', x_gpu.device)

x_gpu.device: cuda:0
x_gpu.device: cuda:0


# Skipping tensor creation and indexing :)

# Conversion to numpy and pure python

## python lists, numbers

In [7]:
x = torch.tensor([[4., 5., 6.]])    # accepts python list

x.tolist()    # returns python list

[[4.0, 5.0, 6.0]]

In [8]:
y = torch.tensor([[7.]])

print('y:', y)
print('y.tolist():', y.tolist())
print('y.item():', y.item())    # if y has a single value
                                # returns that as python number

y: tensor([[7.]])
y.tolist(): [[7.0]]
y.item(): 7.0


## numpy arrays


In [9]:
a_np = np.array([[1, 2],
                 [3, 4]])
a_pt = torch.from_numpy(a_np) # shares memory with numpy
print(a_pt)

tensor([[1, 2],
        [3, 4]])


In [10]:
a_pt[0, 0] = 0
a_np

array([[0, 2],
       [3, 4]])

In [11]:
b_pt = torch.tensor([1., 2., 3.])
b_np = b_pt.numpy()   # shares memory with pytorch
b_np

array([1., 2., 3.], dtype=float32)

In [12]:
b_np[1] = -5
b_pt

tensor([ 1., -5.,  3.])

In [13]:
# to avoid sharing memory with numpy use clone()
b_pt = torch.tensor([1., 2., 3.])
b_np = b_pt.clone().numpy()   # not sharing memory with b_pt
b_np

array([1., 2., 3.], dtype=float32)

In [14]:
b_np[1] = -5
b_pt

tensor([1., 2., 3.])

# Tensor methods

In [16]:
x = torch.tensor([1., 2., 3., 4.])

x_clipped = x.clip(min=2, max=3)
x

tensor([1., 2., 3., 4.])

In [17]:
x.clip_(min=2, max=3)   # methods ending with _ perform inplace
x

tensor([2., 2., 3., 3.])

In [18]:
x.clip_(min=2, max=3) is x    # inplace methods also
                              # return the tensor itself

True

# Calculating gradients

In [19]:
X = torch.tensor([1., 2])
W = torch.tensor([3., 4], requires_grad=True)
b = torch.tensor([5.], requires_grad=True)
W

tensor([3., 4.], requires_grad=True)

In [24]:
a = X * W
print('a:', a)
Y = a + b
print('Y:', Y)

a: tensor([3., 8.], grad_fn=<MulBackward0>)
Y: tensor([ 8., 13.], grad_fn=<AddBackward0>)


In [25]:
loss = Y.sum()
print('loss:', loss)

loss.backward()   # loss should be a single value
                  # to call backward without input

loss: tensor(21., grad_fn=<SumBackward0>)


In [26]:
print('W.grad:', W.grad)
print('b.grad:', b.grad)

W.grad: tensor([2., 4.])
b.grad: tensor([6.])


# Applying gradients

In [27]:
from torch.optim import SGD

optimizer = torch.optim.SGD([W, b], lr=0.01)
optimizer.step()

In [28]:
print('updated W:', W)
print('updated b:', b)

updated W: tensor([2.9800, 3.9600], requires_grad=True)
updated b: tensor([4.9400], requires_grad=True)
