In [1]:
import torch

In [2]:
t2 = torch.tensor([[1,2.,3,5.],[1,3,4.,5]])
t2

tensor([[1., 2., 3., 5.],
        [1., 3., 4., 5.]])

In [3]:
t2.dtype

torch.float32

In [4]:
t2.shape

torch.Size([2, 4])

In [None]:
x = torch.tensor([1,2,3])
y = torch.tensor([4,5,6]) 
x + y

tensor([5, 7, 9])

In [None]:
x * y # performs elementwise operation btw two vectors of the same shape

tensor([ 4, 10, 18])

tensors have defined shapes that must be adhered to unlike a list of lists which can be of various sizes. Pytorch lets us compute the derivative of a tensor wrt to the tensors that have `requires_grad` set to `true`. Call `.backward()` on the tensor to compute the derivatives

In [None]:
x = torch.tensor([1,2,3.,4,5], requires_grad=True)
w = torch.tensor([.2,.3, .4, .5, .6], requires_grad=True)
b = torch.tensor([0.1, 0.2, 0.1, 0.2, 0.3], requires_grad=True)

y = torch.sum(x * w + b) 

In [None]:
y.backward() # can only be computed on scalars, not vectors
print("dy/dx: ", x.grad)
print("dy/dw: ", w.grad)
print("dy/db: ", b.grad)

dy/dx:  tensor([0.2000, 0.3000, 0.4000, 0.5000, 0.6000])
dy/dw:  tensor([1., 2., 3., 4., 5.])
dy/db:  tensor([1., 1., 1., 1., 1.])


In [None]:
import numpy as np

In [None]:
x = np.array([1,2])
x1 = torch.from_numpy(x) # uses same space in memory (no copy)
x2 = torch.tensor(x) # creates a copy

# tensor to numpy array using y.numpy()


Torch is written to work well with GPUs unlike numpy on its own.

Rarely have to loop over a tensor, there should always be a tensor operation you can use that vectorizes it, taking advantage of a gpu if available

In [None]:
# general flow of a ml algo
inputs = torch.tensor()
targets = torch.tensor()

w = torch.randn(2,3, requires_grad=True) # creates a 2 x 3 matrix with points randomly sampleded from a std normal distribution
b = torch.randn(2, requires_grad=True) # creates a 2 element vector

def model(x):
  return x @ w.t() + b # .t() reutrns the transpose of a tensor and @ is used for matrix multiplication

def mse(t1, t2):
  diff = t1 - t2
  return torch.sum(diff * diff) / diff.numel() # numel returns the number of elements in a tensor

iterations = 1000
learning_rate = 1e-5
for i in range(iterations):
  pred = model(inputs)
  loss = mse(pred, target)
  loss.backward()

  with torch.no_grad():
    w -= learning_rate * w.grad
    b -= learning_rate * b.grad
    
    # have to zero the gradients before recomputing them b/c pytorch accumulates gradients by default 
    w.grad.zero_()
    b.grad.zero_()

In [None]:
import torchvision
from utilities import train_model

model = torchvision.models.resnet18(pretrained=True)

# Freeze all pretrained weights in model
for param in model.parameters():
    param.requires_grad = False


# replace last layer for new classifier of 10 classes
model.fc = torch.nn.Linear(model.fc.in_features, 10)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)
criterion = torch.nn.CrossEntropyLoss()

trained_model = train_model(model, criterion, optimizer, num_epochs = 20)

In [None]:
# Autograd demonstration
a = torch.tensor([2.0, 3.0], requires_grad=True)
b = torch.tensor([6.0, 4.0], requires_grad=True)

Q = 3*a**3 - b**2

external_grad = torch.tensor([1. , 1.])
Q.backward(gradient=external_grad)

assert 9*a**2 == a.grad
assert -2*b == b.grad