In [1]:
import torch
import numpy as np

In [2]:
data = [[1, 2], [3,4]]
x_data = torch.tensor(data)

In [3]:
np_array = np.array(data)
x_np = torch.from_numpy(np_array)

In [4]:
x_ones = torch.ones_like(x_data) # retains the properties of x_data
print(f"Ones Tensor: \n {x_ones} \n")

x_rand = torch.rand_like(x_data, dtype=torch.float) # overrides the datatype of x_data
print(f"Random Tensor: \n {x_rand} \n")

Ones Tensor: 
 tensor([[1, 1],
        [1, 1]]) 

Random Tensor: 
 tensor([[0.3401, 0.8869],
        [0.5209, 0.9114]]) 



In [5]:
shape = (2, 3,)
rand_tensor = torch.rand(shape)
ones_tensor = torch.ones(shape)
zeros_tensor = torch.zeros(shape)

print(f"Random Tensor: \n {rand_tensor} \n")
print(f"Ones Tensor: \n {ones_tensor} \n")
print(f"Zeros Tensor: \n {zeros_tensor}")

Random Tensor: 
 tensor([[0.8526, 0.3893, 0.1176],
        [0.8442, 0.4453, 0.6873]]) 

Ones Tensor: 
 tensor([[1., 1., 1.],
        [1., 1., 1.]]) 

Zeros Tensor: 
 tensor([[0., 0., 0.],
        [0., 0., 0.]])


In [6]:
tensor = torch.rand(3, 4)

print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")

Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


In [7]:
tensor = torch.ones(4, 4)

In [8]:
tensor[:,1] = 0 # sets column 1 equal to zero

In [9]:
tensor

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])

In [10]:
# torch.cat concatenates tensors along a given dimension
t1 = torch.cat([tensor, tensor, tensor, tensor])

In [11]:
# torch.stack concatenates tensors along a different dimension

In [12]:
# This computes the element-wise product
print(f"tensor.mul(tensor) \n {tensor.mul(tensor)} \n")
# Alternative syntax:
print(f"tensor * tensor \n {tensor * tensor}")

tensor.mul(tensor) 
 tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]]) 

tensor * tensor 
 tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])


In [13]:
print(f"tensor.matmul(tensor.T) \n {tensor.matmul(tensor.T)} \n")
# Alternative syntax:
print(f"tensor @ tensor.T \n {tensor @ tensor.T}")

tensor.matmul(tensor.T) 
 tensor([[3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.]]) 

tensor @ tensor.T 
 tensor([[3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.]])


In [14]:
# operations that have a _ suffix are in place, and will alter the original tensor
print(tensor, "\n")
tensor.add_(5)
print(tensor)
# this saves memory but is problematic when computing derivatives because of an immediate loss of history

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]]) 

tensor([[6., 5., 6., 6.],
        [6., 5., 6., 6.],
        [6., 5., 6., 6.],
        [6., 5., 6., 6.]])


In [15]:
# a tensor on the cpu and a numpy array can share the same memory locations
t = torch.ones(5)
print(f"t: {t}")
n = t.numpy()
print(f"n: {n}")
# changing one will change the other

t: tensor([1., 1., 1., 1., 1.])
n: [1. 1. 1. 1. 1.]


Autograd: torch's automatic differentiation engine

Forward Propagation: the NN makes its best guess about the correct output. It runs the input data through each of its functions to make the guess.

Backward Propagation: the NN adjusts its parameters proportionate to the error in its guess. It traverses backwards from the output, collecting the derivatives of the error with respect to the parameters of the functions (gradients),and optimizing the parameters using gradient descent.

In [17]:
import torch
from torchvision.models import resnet18, ResNet18_Weights
model = resnet18(weights=ResNet18_Weights.DEFAULT)
data = torch.rand(1, 3, 64, 64)
labels = torch.rand(1, 1000)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /Users/jakehenderson/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:01<00:00, 35.9MB/s]


In [20]:
data

tensor([[[[0.6338, 0.3222, 0.4994,  ..., 0.0068, 0.3835, 0.3664],
          [0.8856, 0.9412, 0.6117,  ..., 0.9094, 0.7990, 0.9009],
          [0.6244, 0.2770, 0.2828,  ..., 0.0430, 0.5056, 0.0922],
          ...,
          [0.0664, 0.2799, 0.5516,  ..., 0.4178, 0.6850, 0.9595],
          [0.0928, 0.4030, 0.0028,  ..., 0.2417, 0.9634, 0.6349],
          [0.0833, 0.6556, 0.2444,  ..., 0.7144, 0.5743, 0.1435]],

         [[0.6445, 0.1850, 0.3755,  ..., 0.4932, 0.2732, 0.8346],
          [0.3987, 0.0124, 0.6896,  ..., 0.8163, 0.0734, 0.6570],
          [0.7669, 0.9313, 0.4659,  ..., 0.3415, 0.3844, 0.5303],
          ...,
          [0.0033, 0.4948, 0.0843,  ..., 0.3640, 0.1054, 0.5516],
          [0.3595, 0.9294, 0.9729,  ..., 0.4548, 0.3843, 0.2846],
          [0.5892, 0.7741, 0.2664,  ..., 0.9200, 0.8738, 0.4840]],

         [[0.1030, 0.2278, 0.2510,  ..., 0.4777, 0.9924, 0.7315],
          [0.4070, 0.5261, 0.0410,  ..., 0.7971, 0.2929, 0.1289],
          [0.5347, 0.8237, 0.5207,  ..., 0

In [21]:
prediction = model(data) # forward pass

We use the model’s prediction and the corresponding label to calculate the error (loss). The next step is to backpropagate this error through the network. Backward propagation is kicked off when we call .backward() on the error tensor. Autograd then calculates and stores the gradients for each model parameter in the parameter’s .grad attribute.

In [22]:
loss = (prediction - labels).sum()
loss.backward() #backpropagation

In [23]:
# stochastic gradient descent optimizer
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)

In [None]:
# initiate gradient descent.  Optimizer adjusts each parameter by its gradient stored in .grad()