In [2]:
import torch

# Tensors

- tensors are n dimensional arrays of numbers
- matrices are 2 dimensional arrays of numbers
- vectors are 1 dimensional arrays
- scalars are just numbers


### Scalar
- often denoted by a

In [18]:
scalar = torch.tensor(1) # scalar
scalar

tensor(1)

In [19]:
scalar.ndim

0

In [22]:
scalar.item()

1

### Vector
- denoted by y

In [23]:
vector = torch.tensor([1,2]) # vector
vector

tensor([1, 2])

In [24]:
vector.ndim

1

In [25]:
vector.shape

torch.Size([2])

### Matrix
- denoted by Q, W

In [26]:
matrix = torch.tensor([[1,2], [3,4]]) # matrix
matrix

tensor([[1, 2],
        [3, 4]])

In [27]:
matrix.ndim

2

In [28]:
matrix.shape

torch.Size([2, 2])

### Tensor
- denoted by X
- dimensions go outer to inner
- shape [1,3,3] means 1 3x3 tensor

In [35]:
tensor = torch.tensor([[[1,2,3],[4,5,6],[7,8,9]]]) # tensor
tensor

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [36]:
tensor.ndim

3

In [37]:
tensor.shape

torch.Size([1, 3, 3])

In [47]:
random_tensor = torch.rand(size=(3, 3, 3))
random_tensor, random_tensor.dtype

(tensor([[[0.1651, 0.1454, 0.9371],
          [0.3580, 0.7883, 0.5624],
          [0.7663, 0.4376, 0.1037]],
 
         [[0.8538, 0.9672, 0.3670],
          [0.4303, 0.4508, 0.9327],
          [0.2353, 0.8423, 0.9300]],
 
         [[0.1503, 0.1130, 0.8964],
          [0.9745, 0.8227, 0.4032],
          [0.8689, 0.0210, 0.3597]]]),
 torch.float32)

In [46]:
torch.zeros(size=(3,3,3))

tensor([[[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]])

In [51]:
torch.ones(size=(3,3,3))

tensor([[[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]],

        [[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]],

        [[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]]])

In [53]:
torch.arange(start=0, end=10, step=1)

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [54]:
# use this to create a copy of a tensor with the same shape

one_to_ten = torch.arange(start=0, end=10, step=1)

torch.zeros_like(one_to_ten)

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [55]:
torch.ones_like(one_to_ten)

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

# Tensor Datatypes
- tensors can use many different types of integers and floats
    - 8, 16, 32, and 64 bit ints and floats
- default is 32 bit floats
- decision of which type of number to use is a tradeoff between precision and compute
    - fewer bits --> less accuracy, less compute, faster runtime
    - more bits --> more accuracy, more compute, slower runtime
- You can also set device to cpu or cuda
    - cuda means you need a nvidia gpu with cuda drivers
    - best practice is to design code to be device agnostic 
- Many bugs and issues with pytorch are related mismatches with:
    - shape
    - data type
    - device

In [62]:
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None,
                               device=None,
                               requires_grad=False)

# defaults
float_32_tensor.shape, float_32_tensor.dtype, float_32_tensor.device

(torch.Size([3]), torch.float32, device(type='cpu'))

# Tensor Operations
- in deep learning everything is represented as tensors
- tensor operations are the building blocks of neural networks
- models perform series of tensor operations to find patterns in data
- addition, subtraction, scalar multiplication and division, matrix multiplication
- "its all matrix multiplication"
    - remember for matrix multiplication, the inner dimensions must match, otherwise it is an invalid operation
    - matrix multiplication takes the sum of the products
    
- we can also transpose tensors, or swap any dimensions
    - use .transpose, or .T
    
- pytorch also plays nicely with numpy and we can convert to and from pytorch tensors and numpy arrays easily

In [64]:
tensor = torch.tensor([1, 2, 3])

In [65]:
tensor + 10

tensor([11, 12, 13])

In [66]:
tensor * 10

tensor([10, 20, 30])

In [67]:
tensor - 10

tensor([-9, -8, -7])

In [68]:
tensor / 10

tensor([0.1000, 0.2000, 0.3000])

In [72]:
tensor = torch.tensor([1, 2, 3])

In [73]:
tensor * tensor

tensor([1, 4, 9])

In [74]:
tensor.matmul(tensor)

tensor(14)

In [77]:
A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]], dtype=torch.float32)

B = torch.tensor([[7, 10],
                         [8, 11], 
                         [9, 12]], dtype=torch.float32)

In [82]:
# matrix multiplication fails if inner dimensions don't match
A.matmul(B)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

In [81]:
A.mm(B.T) #mm is a shortcut for matmul

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

### Torch.nn.Linear()

- feed forward layer or fully connected layer
- matrix multiplication between input x and weights matrix A

$$
y = x\cdot{A^T} + b
$$

- y is the output, hoping to find patterns in input x
- x is the input layer
- A is the weights matrix, starts as random numbers, and gets adjusted to better represent patterns in x
- b is bias to offset weights and inputs


- in features – inner dimensions of input
- out features - outer dimensions of input

In [95]:
# Since the linear layer starts with a random weights matrix, let's make it reproducible (more on this later)
torch.manual_seed(42)

tensor_A = torch.tensor([[1, 2, 3],
                         [3, 4, 5],
                         [5, 6, 7]], dtype=torch.float32)

# This uses matrix multiplication
linear = torch.nn.Linear(in_features=3, # in_features = matches inner dimension of input 
                         out_features=3) # out_features = describes outer value 
x = tensor_A
output = linear(x)
print(f"Input shape: {x.shape}\n")
print(f"Output:\n{output}\n\nOutput shape: {output.shape}")

Input shape: torch.Size([3, 3])

Output:
tensor([[0.5705, 1.1287, 2.0320],
        [2.1412, 2.1694, 3.1659],
        [3.7119, 3.2102, 4.2997]], grad_fn=<AddmmBackward0>)

Output shape: torch.Size([3, 3])


### Aggregation

In [98]:
x = tensor_A

print(f"Minimum: {x.min()}")
print(f"Maximum: {x.max()}")
# print(f"Mean: {x.mean()}") # this will error
print(f"Mean: {x.type(torch.float32).mean()}") # won't work without float datatype
print(f"Sum: {x.sum()}")

Minimum: 1.0
Maximum: 7.0
Mean: 4.0
Sum: 36.0


In [106]:
tensor_A[0].argmax()

tensor(2)

In [107]:
tensor_A[0].argmin()

tensor(0)

In [108]:
tensor_A

tensor([[1., 2., 3.],
        [3., 4., 5.],
        [5., 6., 7.]])

In [109]:
tensor_A_float_16 = tensor_A.type(torch.float16)

tensor_A_float_16

tensor([[1., 2., 3.],
        [3., 4., 5.],
        [5., 6., 7.]], dtype=torch.float16)

### Manipulating Shape and Dimensions
- reshape: creates a new reshaped tensor
- view: creates a new view of a tensor with different shape (shares data with original)
- stack
- squeeze
- unsqueeze
- permute

### Indexing
- outer dimension --> inner dimension
- can use :, negative indexing

In [118]:
x = torch.arange(1, 10).reshape(1, 3, 3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [124]:
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [125]:
x[0][0]

tensor([1, 2, 3])

In [128]:
x[0][0][0]

tensor(1)

In [129]:
x[0][-1]

tensor([7, 8, 9])

In [135]:
x[0][:]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

# Reproducibility – Random Seeding
- we can set a random seed for tensors using torch.manual_seed
- must set this value each time we want to create a random tensor
- this is useful if we want to reproduce the same starting point / random tensors

In [146]:
torch.manual_seed(42) 
A = torch.rand(3, 4)

torch.random.manual_seed(42)
B = torch.rand(3, 4)

In [150]:
A, B, A == B

(tensor([[0.8823, 0.9150, 0.3829, 0.9593],
         [0.3904, 0.6009, 0.2566, 0.7936],
         [0.9408, 0.1332, 0.9346, 0.5936]]),
 tensor([[0.8823, 0.9150, 0.3829, 0.9593],
         [0.3904, 0.6009, 0.2566, 0.7936],
         [0.9408, 0.1332, 0.9346, 0.5936]]),
 tensor([[True, True, True, True],
         [True, True, True, True],
         [True, True, True, True]]))

In [149]:
C = torch.rand(3, 4)
D = torch.rand(3, 4)


C, D, C == D

(tensor([[0.5779, 0.9040, 0.5547, 0.3423],
         [0.6343, 0.3644, 0.7104, 0.9464],
         [0.7890, 0.2814, 0.7886, 0.5895]]),
 tensor([[0.7539, 0.1952, 0.0050, 0.3068],
         [0.1165, 0.9103, 0.6440, 0.7071],
         [0.6581, 0.4913, 0.8913, 0.1447]]),
 tensor([[False, False, False, False],
         [False, False, False, False],
         [False, False, False, False]]))

# Using GPUs
- !nvidia-smi to check if you have access to a nvidia gpu
- best practice to write code that works for both cpu or gpu depending on where it is running
- can use one or many GPUs if available

In [152]:
torch.cuda.is_available()

False

In [153]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [154]:
torch.cuda.device_count()

0