In [1]:
import torch 

torch.__version__

'2.4.0'

In [2]:
import numpy as np

np.__version__

'2.0.1'

### Creating tensors

In [3]:
scalar = torch.tensor(7)
scalar

tensor(7)

In [4]:
scalar.ndim

0

In [5]:
scalar.item()

7

In [6]:
vector = torch.tensor([7])
vector

tensor([7])

In [7]:
vector.ndim

1

In [8]:
vector.item()

7

In [9]:
vector.shape

torch.Size([1])

In [10]:
matrix = torch.tensor([[7]])
matrix

tensor([[7]])

In [11]:
matrix.ndim

2

In [12]:
matrix.shape

torch.Size([1, 1])

In [13]:
matrix.item()

7

In [14]:
vector = torch.tensor([7, 8])
vector

tensor([7, 8])

In [15]:
vector.ndim

1

In [16]:
vector.shape

torch.Size([2])

In [17]:
# Only works for scalars and one element tensor
# vector.item()

In [18]:
vector = torch.tensor([7, 7])
vector

tensor([7, 7])

In [19]:
vector.ndim

1

In [20]:
vector.shape

torch.Size([2])

In [21]:
matrix = torch.tensor([[1, 2], 
                       [3, 4]])
matrix

tensor([[1, 2],
        [3, 4]])

In [22]:
matrix.ndim

2

In [23]:
matrix.shape

torch.Size([2, 2])

In [24]:
tensor = torch.tensor([[[1, 2, 3], 
                       [4, 5, 6], 
                       [7, 8, 9]]])
tensor

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [25]:
tensor.ndim

3

In [26]:
tensor.shape

torch.Size([1, 3, 3])

In [27]:
np_vector = np.array([7, 7])
pt_vector = torch.tensor(np_vector)

print(np_vector, np_vector.dtype)
print(pt_vector, pt_vector.dtype)

[7 7] int64
tensor([7, 7]) torch.int64


In [28]:
np_vector[0] = 8

print(np_vector, np_vector.dtype)
print(pt_vector, pt_vector.dtype)

[8 7] int64
tensor([7, 7]) torch.int64


In [29]:
np_vector = np.array([7, 7])
pt_vector = torch.as_tensor(np_vector)

print(np_vector, np_vector.dtype)
print(pt_vector, pt_vector.dtype)

[7 7] int64
tensor([7, 7]) torch.int64


In [30]:
np_vector[0] = 8

print(np_vector, np_vector.dtype)
print(pt_vector, pt_vector.dtype)

[8 7] int64
tensor([8, 7]) torch.int64


### Random tensors

In [31]:
random_tensor = torch.rand(3, 4)
random_tensor, random_tensor.dtype

(tensor([[0.6136, 0.4396, 0.3336, 0.5655],
         [0.7028, 0.7243, 0.8637, 0.4852],
         [0.2603, 0.5805, 0.0118, 0.7381]]),
 torch.float32)

In [32]:
random_image_tensor = torch.rand(224, 224, 3)
random_image_tensor.shape

torch.Size([224, 224, 3])

### Zeros and ones

In [33]:
zeros = torch.zeros(3, 4)
zeros, zeros.dtype

(tensor([[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]),
 torch.float32)

In [34]:
ones = torch.ones(3, 4)
ones, ones.dtype

(tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]),
 torch.float32)

### Creating a range and tensors like

In [35]:
zero_to_ten_deprecated = torch.range(0, 10)
zero_to_ten_deprecated, zero_to_ten_deprecated.dtype

  zero_to_ten_deprecated = torch.range(0, 10)


(tensor([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]),
 torch.float32)

In [36]:
list(range(0, 10))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [37]:
zero_to_ten = torch.arange(start=0, end=10, step=1)
zero_to_ten, zero_to_ten.dtype

(tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), torch.int64)

In [38]:
ten_zeros = torch.zeros_like(zero_to_ten)
ten_zeros, ten_zeros.dtype

(tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), torch.int64)

In [39]:
ten_ones = torch.ones_like(zero_to_ten)
ten_ones, ten_ones.dtype

(tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), torch.int64)

In [40]:
ten_rand_nums = torch.rand_like(zero_to_ten.type(torch.float32))
ten_rand_nums, ten_rand_nums.dtype

(tensor([0.9669, 0.4676, 0.4196, 0.2880, 0.1174, 0.9107, 0.3700, 0.0232, 0.4558,
         0.4991]),
 torch.float32)

In [41]:
zero_to_ten_float = torch.arange(start=0, end=10, step=1, dtype=torch.float32)
zero_to_ten_float, zero_to_ten_float.dtype

(tensor([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.float32)

In [42]:
ten_zeros = torch.zeros_like(zero_to_ten_float)
ten_zeros, ten_zeros.dtype

(tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]), torch.float32)

In [43]:
ten_ones = torch.ones_like(zero_to_ten_float)
ten_ones, ten_ones.dtype

(tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]), torch.float32)

### Tensor datatypes

In [44]:
float_32_tensor = torch.tensor([3.0, 6.0, 9.0], 
                               dtype=None,  # defaults to None, which is torch.float32 or whatever datatype is passed 
                               device=None,  # defaults to None, which uses the cpu
                               requires_grad=False)  # if true, operations performed on the tensor are recorded
float_32_tensor.ndim, float_32_tensor.shape, float_32_tensor.dtype, float_32_tensor.device

(1, torch.Size([3]), torch.float32, device(type='cpu'))

In [45]:
float_16_tensor = torch.tensor([3.0, 6.0, 9.0], dtype=torch.float16)
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

### Getting information from tensors

In [46]:
some_tensor = torch.rand(3, 4)

print(some_tensor)
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Device tensor is stored on: {some_tensor.device}")

tensor([[0.1048, 0.8328, 0.9024, 0.7007],
        [0.2550, 0.0313, 0.9600, 0.4988],
        [0.2400, 0.3003, 0.9072, 0.5137]])
Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


### Manipulating tensors (tensor operations)

In [47]:
tensor = torch.tensor([1, 2, 3])
tensor + 10

tensor([11, 12, 13])

In [48]:
tensor * 10

tensor([10, 20, 30])

In [49]:
tensor

tensor([1, 2, 3])

In [50]:
tensor = tensor - 10
tensor

tensor([-9, -8, -7])

In [51]:
tensor = tensor + 10
tensor

tensor([1, 2, 3])

In [52]:
tensor * tensor

tensor([1, 4, 9])

In [53]:
torch.add(tensor, 10)

tensor([11, 12, 13])

In [54]:
torch.sub(tensor, 10)

tensor([-9, -8, -7])

In [55]:
torch.mul(tensor, 10)

tensor([10, 20, 30])

In [56]:
torch.div(tensor, 10)

tensor([0.1000, 0.2000, 0.3000])

In [57]:
torch.multiply(tensor, 10)

tensor([10, 20, 30])

### Matrix multiplication

In [58]:
tensor = torch.tensor([1, 2, 3], dtype=torch.int32)
tensor

tensor([1, 2, 3], dtype=torch.int32)

In [59]:
tensor * tensor

tensor([1, 4, 9], dtype=torch.int32)

In [60]:
torch.matmul(tensor, tensor)

tensor(14, dtype=torch.int32)

In [61]:
tensor @ tensor

tensor(14, dtype=torch.int32)

In [62]:
%%timeit

value = 0
for i in range(len(tensor)):
    value += (tensor[i] * tensor[i])
value

10.6 μs ± 16.9 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [63]:
%%timeit

tensor @ tensor

732 ns ± 1.5 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [64]:
tensor = tensor.to("mps")
tensor

tensor([1, 2, 3], device='mps:0', dtype=torch.int32)

In [65]:
%%timeit

tensor @ tensor

17 μs ± 24.9 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [66]:
big_tensor = torch.rand(10_000, 10_000)
big_tensor.shape, big_tensor.dtype

(torch.Size([10000, 10000]), torch.float32)

In [67]:
%%timeit

big_tensor @ big_tensor

2.12 s ± 8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [68]:
big_tensor = big_tensor.to("mps")
big_tensor.device

device(type='mps', index=0)

In [69]:
%%timeit 

big_tensor @ big_tensor

113 μs ± 29.7 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [91]:
tensor_a = torch.tensor([[1., 2.], 
                         [3., 4.], 
                         [5., 6.]])
tensor_b = torch.tensor([[7., 10.], 
                         [8., 11.], 
                         [9., 12.]])
tensor_a.shape, tensor_b.shape

(torch.Size([3, 2]), torch.Size([3, 2]))

In [131]:
try:
    tensor_a @ tensor_b
except RuntimeError as e:
    print(f"Failed: {e}")

Failed: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)


In [93]:
print(tensor_a)
print(torch.transpose(tensor_b, 1, 0))

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])
tensor([[ 7.,  8.,  9.],
        [10., 11., 12.]])


In [94]:
print(tensor_a)
print(tensor_b.T)

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])
tensor([[ 7.,  8.,  9.],
        [10., 11., 12.]])


In [95]:
tensor_a @ tensor_b.T

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

In [96]:
torch.matmul(tensor_a, tensor_b.T)

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

In [97]:
torch.mm(tensor_a, tensor_b.T)

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

In [105]:
torch.manual_seed(42)

linear = torch.nn.Linear(in_features=2, out_features=6)

x = tensor_a
output = linear(x)

print(f"Input shape: {x.shape}\n")
print(f"Output:\n{output}\n\nOutput shape: {output.shape}")

Input shape: torch.Size([3, 2])

Output:
tensor([[2.2368, 1.2292, 0.4714, 0.3864, 0.1309, 0.9838],
        [4.4919, 2.1970, 0.4469, 0.5285, 0.3401, 2.4777],
        [6.7469, 3.1648, 0.4224, 0.6705, 0.5493, 3.9716]],
       grad_fn=<AddmmBackward0>)

Output shape: torch.Size([3, 6])


### Finding min, max, mean, sum, etc (aggregation)

In [107]:
x = torch.arange(start=1, end=100, step=10)
x

tensor([ 1, 11, 21, 31, 41, 51, 61, 71, 81, 91])

In [110]:
print(f"Min: {x.min()}")
print(f"Max: {x.max()}")
print(f"Mean: {x.type(torch.float32).mean()}")
print(f"Sum: {x.sum()}")

Min: 1
Max: 91
Mean: 46.0
Sum: 460


In [112]:
print(f"Min: {torch.min(x)}")
print(f"Max: {torch.max(x)}")
print(f"Mean: {torch.mean(x.type(torch.float))}")
print(f"Sum: {torch.sum(x)}")

Min: 1
Max: 91
Mean: 46.0
Sum: 460


In [113]:
tensor = torch.arange(10, 100, 10)

print(f"Tensor: {tensor}")
print(f"Index where max value occurs: {torch.argmax(tensor)}")
print(f"Index where min value occurs: {torch.argmin(tensor)}")

Tensor: tensor([10, 20, 30, 40, 50, 60, 70, 80, 90])
Index where max value occurs: 8
Index where min value occurs: 0


### Changing datatypes

In [116]:
tensor = torch.arange(10., 100., 10.0)
tensor

tensor([10., 20., 30., 40., 50., 60., 70., 80., 90.])

In [117]:
tensor_float16 = tensor.type(torch.float16)
tensor_float16

tensor([10., 20., 30., 40., 50., 60., 70., 80., 90.], dtype=torch.float16)

In [118]:
tensor_int8 = tensor.type(torch.int8)
tensor_int8

tensor([10, 20, 30, 40, 50, 60, 70, 80, 90], dtype=torch.int8)

### Reshaping, stacking, squeezing, unsqueezing

In [120]:
x = torch.arange(1., 8.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7.]), torch.Size([7]))

In [122]:
x_reshaped_1 = x.reshape(1, 7)
x_reshaped_1, x_reshaped_1.shape

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))

In [123]:
x_reshaped_2 = x.reshape(7, 1)
x_reshaped_2, x_reshaped_2.shape

(tensor([[1.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.]]),
 torch.Size([7, 1]))

In [127]:
z = x.view(1, 7)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))

In [128]:
z[:, 0] = 5
z, x

(tensor([[5., 2., 3., 4., 5., 6., 7.]]), tensor([5., 2., 3., 4., 5., 6., 7.]))

In [129]:
x_reshaped_1

tensor([[5., 2., 3., 4., 5., 6., 7.]])

In [130]:
x.is_contiguous()

True

In [135]:
x = torch.arange(1., 13.).reshape(3, 4)
x, x.shape

(tensor([[ 1.,  2.,  3.,  4.],
         [ 5.,  6.,  7.,  8.],
         [ 9., 10., 11., 12.]]),
 torch.Size([3, 4]))

In [134]:
x.is_contiguous()

True

In [137]:
x_t = x.transpose(0, 1)
x_t, x_t.shape

(tensor([[ 1.,  5.,  9.],
         [ 2.,  6., 10.],
         [ 3.,  7., 11.],
         [ 4.,  8., 12.]]),
 torch.Size([4, 3]))

In [138]:
x_t.is_contiguous()

False

In [139]:
try:
    x_reshape = x_t.reshape(6, 2)
    print(x_reshape)
except RuntimeError as e:
    print(f"Failed: {e}")

tensor([[ 1.,  5.],
        [ 9.,  2.],
        [ 6., 10.],
        [ 3.,  7.],
        [11.,  4.],
        [ 8., 12.]])


In [140]:
try:
    x_view = x_t.view(6, 2)
    print(x_view)
except RuntimeError as e:
    print(f"Failed: {e}")

Failed: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.


In [148]:
x = torch.arange(1., 8.)
x

tensor([1., 2., 3., 4., 5., 6., 7.])

In [152]:
x_stacked = torch.stack([x, x, x, x, x], dim=1)
x_stacked

tensor([[1., 1., 1., 1., 1.],
        [2., 2., 2., 2., 2.],
        [3., 3., 3., 3., 3.],
        [4., 4., 4., 4., 4.],
        [5., 5., 5., 5., 5.],
        [6., 6., 6., 6., 6.],
        [7., 7., 7., 7., 7.]])

In [161]:
x = torch.tensor([[1]])
print(f"Previous tensor: {x}")
print(f"Previous shape: {x.shape}")

x_squeezed = x.squeeze()
print(f"\nNew Tensor: {x_squeezed}")
print(f"\nNew shape: {x_squeezed.shape}")

Previous tensor: tensor([[1]])
Previous shape: torch.Size([1, 1])

New Tensor: 1

New shape: torch.Size([])


In [162]:
x = torch.tensor([[1], [2], [3]])
print(f"Previous tensor: {x}")
print(f"Previous shape: {x.shape}")

x_squeezed = x.squeeze()
print(f"\nNew Tensor: {x_squeezed}")
print(f"\nNew shape: {x_squeezed.shape}")

Previous tensor: tensor([[1],
        [2],
        [3]])
Previous shape: torch.Size([3, 1])

New Tensor: tensor([1, 2, 3])

New shape: torch.Size([3])


In [163]:
x = torch.tensor([[[1], [2], [3]]])
print(f"Previous tensor: {x}")
print(f"Previous shape: {x.shape}")

x_squeezed = x.squeeze()
print(f"\nNew Tensor: {x_squeezed}")
print(f"\nNew shape: {x_squeezed.shape}")

Previous tensor: tensor([[[1],
         [2],
         [3]]])
Previous shape: torch.Size([1, 3, 1])

New Tensor: tensor([1, 2, 3])

New shape: torch.Size([3])


In [165]:
print(f"Previous tensor: {x_squeezed}")
print(f"Previous shape: {x_squeezed.shape}")

x_unsqueezed = x_squeezed.unsqueeze(dim=0)
x_unsqueezed = x_unsqueezed.unsqueeze(dim=2)
print(f"\nNew tensor: {x_unsqueezed}")
print(f"New shape: {x_unsqueezed.shape}")

Previous tensor: tensor([1, 2, 3])
Previous shape: torch.Size([3])

New tensor: tensor([[[1],
         [2],
         [3]]])
New shape: torch.Size([1, 3, 1])


In [167]:
tensor = torch.tensor([[[1, 2, 3], 
                        [4, 5, 6]]])
tensor, tensor.shape

(tensor([[[1, 2, 3],
          [4, 5, 6]]]),
 torch.Size([1, 2, 3]))

In [168]:
tensor.is_contiguous()

True

In [170]:
reshaped_tensor = tensor.reshape(3, 1, 2)
reshaped_tensor, reshaped_tensor.shape

(tensor([[[1, 2]],
 
         [[3, 4]],
 
         [[5, 6]]]),
 torch.Size([3, 1, 2]))

In [171]:
reshaped_tensor.is_contiguous()

True

In [174]:
reshaped_tensor.stride()

(2, 2, 1)

In [172]:
permuted_tensor = tensor.permute(2, 0, 1)
permuted_tensor, permuted_tensor.shape

(tensor([[[1, 4]],
 
         [[2, 5]],
 
         [[3, 6]]]),
 torch.Size([3, 1, 2]))

In [173]:
permuted_tensor.is_contiguous()

False

In [175]:
permuted_tensor.stride()

(1, 6, 3)

The concept of the conguity has to due to the row-major order. 

#### Understanding Row-Major Order

In row-major order (used by PyTorch and NumPy), the elements of a multi-dimensional array are stored in memory such that the last dimension changes the fastest, followed by the second-last dimension, and so on. This means that for a tensor with shape (D0, D1, D2):

* Elements along D2 (the last dimension) are stored next to each other in memory.
* Moving along D1 involves skipping all elements in D2.
* Moving along D0 involves skipping all elements in both D1 and D2.

For a tensor to be contiguous in memory, it needs to follow this pattern. The stride for the last dimension must be 1, and for the preceding dimensions, it should be the product of all subsequent dimensions.

#### Impact of permute on Contiguity

When you apply the permute operation, you are changing the order of dimensions without changing the underlying data layout. This affects how the data is accessed according to the strides but does not re-align the data in memory to follow the row-major order for the new shape.

Example Recap

Original tensor (before permute):

* Shape: (1, 2, 3)
* Strides: (6, 3, 1)

This means:
* To move between elements in the last dimension (3), move 1 step in memory.
* To move between elements in the middle dimension (2), move 3 steps.
* To move between elements in the first dimension (1), move 6 steps.

After permute(2, 0, 1):

* New shape: (3, 1, 2)
* New strides: (1, 6, 3)

The new strides indicate:
* To move between elements in the new last dimension (2), move 3 steps in memory.
* To move between elements in the new middle dimension (1), move 6 steps.
* To move between elements in the new first dimension (3), move 1 step.

#### Why It Becomes Non-Contiguous

The new strides after permute do not match the typical row-major layout (where the last dimension should have a stride of 1). Instead:
* The last dimension (2 after permute) has a stride of 3, which breaks the row-major order assumption.

Because of this mismatch between the expected stride pattern for a contiguous layout and the actual strides, the tensor is marked as non-contiguous.

#### Summary

* Row-major order is central to why the tensor becomes non-contiguous after permute.
* Contiguity requires that the last dimension has a stride of 1, and preceding dimensions have strides that reflect the product of all subsequent dimensions.
* permute disrupts this pattern without rearranging the underlying data, leading to a non-contiguous tensor.

### Indexing (selecting data from tensors)

In [181]:
tensor = torch.arange(1., 10.).reshape(1, 3, 3)
tensor, tensor.shape

(tensor([[[1., 2., 3.],
          [4., 5., 6.],
          [7., 8., 9.]]]),
 torch.Size([1, 3, 3]))

In [182]:
print(f"First bracket: {tensor[0]}")
print(f"Second bracket: {tensor[0][0]}")
print(f"Third bracket: {tensor[0][0][0]}")

First bracket: tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])
Second bracket: tensor([1., 2., 3.])
Third bracket: 1.0


In [183]:
# Get all values of 0th dimension and the 0 index of 1st dimension
tensor[:, 0]

tensor([[1., 2., 3.]])

In [184]:
# Get all values of 0th & 1st dimensions but only index 1 of 2nd dimension
tensor[:, :, 1]

tensor([[2., 5., 8.]])

In [185]:
# Get all values of the 0 dimension but only the 1 index value of the 1st and 2nd dimension
tensor[:, 1, 1]

tensor([5.])

In [186]:
# Get index 0 of 0th and 1st dimension and all values of 2nd dimension 
tensor[0, 0, :]

tensor([1., 2., 3.])

### PyTorch tensors & NumPy arrays

In [188]:
array = np.arange(1., 8.)
tensor = torch.from_numpy(array)
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [190]:
array = array + 1
array, tensor

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [191]:
array = np.arange(1, 8)
tensor = torch.from_numpy(array)
array, tensor

(array([1, 2, 3, 4, 5, 6, 7]), tensor([1, 2, 3, 4, 5, 6, 7]))

In [192]:
array.dtype, tensor.dtype

(dtype('int64'), torch.int64)

In [193]:
tensor = torch.ones(7)
np_array = tensor.numpy()
tensor, np_array

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

### Reproducibility (taking random out of random)

In [197]:
random_tensor_a = torch.rand(3, 4)
random_tensor_b = torch.rand(3, 4)

print(random_tensor_a)
print(random_tensor_b)
print(random_tensor_a == random_tensor_b)

tensor([[0.9147, 0.2036, 0.2018, 0.2018],
        [0.9497, 0.6666, 0.9811, 0.0874],
        [0.0041, 0.1088, 0.1637, 0.7025]])
tensor([[0.6790, 0.9155, 0.2418, 0.1591],
        [0.7653, 0.2979, 0.8035, 0.3813],
        [0.7860, 0.1115, 0.2477, 0.6524]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [201]:
RANDOM_SEED = 42
torch.manual_seed(seed=RANDOM_SEED)
random_tensor_a = torch.rand(3, 4)

torch.manual_seed(seed=RANDOM_SEED)
random_tensor_b = torch.rand(3, 4)

print(random_tensor_a)
print(random_tensor_b)
print(random_tensor_a == random_tensor_b)

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


In [223]:
RANDOM_SEED = 42
torch.manual_seed(seed=RANDOM_SEED)
random_tensor_a = torch.rand(3, 4)

random_tensor_b = torch.rand(3, 4)

print(random_tensor_a)
print(random_tensor_b)
print(random_tensor_a == random_tensor_b)

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8694, 0.5677, 0.7411, 0.4294],
        [0.8854, 0.5739, 0.2666, 0.6274],
        [0.2696, 0.4414, 0.2969, 0.8317]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [233]:
torch.manual_seed(seed=RANDOM_SEED)
random_tensor_c = torch.rand(3, 4)
random_tensor_d = torch.rand(3, 4)

print(random_tensor_c)
print(random_tensor_d)

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8694, 0.5677, 0.7411, 0.4294],
        [0.8854, 0.5739, 0.2666, 0.6274],
        [0.2696, 0.4414, 0.2969, 0.8317]])


In [238]:
RANDOM_SEED = 42
torch.manual_seed(seed=RANDOM_SEED)
random_tensor_a = torch.rand(3, 4)
random_tensor_b = torch.rand(3, 4)
random_tensor_c = torch.rand(3, 4)
random_tensor_d = torch.rand(3, 4)

print(random_tensor_a)
print(random_tensor_b)
print(random_tensor_c)
print(random_tensor_d)

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8694, 0.5677, 0.7411, 0.4294],
        [0.8854, 0.5739, 0.2666, 0.6274],
        [0.2696, 0.4414, 0.2969, 0.8317]])
tensor([[0.1053, 0.2695, 0.3588, 0.1994],
        [0.5472, 0.0062, 0.9516, 0.0753],
        [0.8860, 0.5832, 0.3376, 0.8090]])
tensor([[0.5779, 0.9040, 0.5547, 0.3423],
        [0.6343, 0.3644, 0.7104, 0.9464],
        [0.7890, 0.2814, 0.7886, 0.5895]])


### Running tensors on GPU

In [239]:
torch.cuda.is_available()

False

In [240]:
torch.backends.mps.is_available()

True

In [242]:
torch.mps.device_count()

1

In [252]:
if torch.cuda.is_available():
    device = "cuda"  # use NVIDIA GPU if available
elif torch.backends.mps.is_available():
    device = "mps"  # use Apple Silicon GPU if available
else:
    device = "cpu"  # defaults to CPU if no GPU is available

device

'mps'

In [253]:
tensor = torch.arange(1, 4)
tensor_on_gpu = tensor.to(device)

print(tensor)
print(tensor_on_gpu)

tensor([1, 2, 3])
tensor([1, 2, 3], device='mps:0')


In [255]:
try:
    tensor_on_cpu = tensor_on_gpu.numpy()
except Exception as e:
    print(f"Failed: {e}")

Failed: can't convert mps:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.


In [256]:
tensor_on_cpu = tensor_on_gpu.cpu().numpy()
tensor_on_cpu

array([1, 2, 3])

### Exercises

In [258]:
# Create random tensor with shape (7, 7).
tensor = torch.rand(7, 7)
tensor

tensor([[0.7539, 0.1952, 0.0050, 0.3068, 0.1165, 0.9103, 0.6440],
        [0.7071, 0.6581, 0.4913, 0.8913, 0.1447, 0.5315, 0.1587],
        [0.6542, 0.3278, 0.6532, 0.3958, 0.9147, 0.2036, 0.2018],
        [0.2018, 0.9497, 0.6666, 0.9811, 0.0874, 0.0041, 0.1088],
        [0.1637, 0.7025, 0.6790, 0.9155, 0.2418, 0.1591, 0.7653],
        [0.2979, 0.8035, 0.3813, 0.7860, 0.1115, 0.2477, 0.6524],
        [0.6057, 0.3725, 0.7980, 0.8399, 0.1374, 0.2331, 0.9578]])

In [262]:
# Perform a matrix multiplication on the tensor from 2 with another random tensor with shape (1, 7)
tensor_2 = torch.rand(1, 7)

tensor_3 = tensor @ tensor_2.T
tensor_3

tensor([[0.6545],
        [1.0373],
        [1.1938],
        [0.9486],
        [1.0736],
        [0.8781],
        [1.1626]])

In [264]:
# Set the random seed to 0 and do 2 & 3 over again.
# The output should be:

# (tensor([[1.8542],
#          [1.9611],
#          [2.2884],
#          [3.0481],
#          [1.7067],
#          [2.5290],
#          [1.7989]]), torch.Size([7, 1]))

RANDOM_SEED = 0 
torch.manual_seed(seed=RANDOM_SEED)
tensor_a = torch.rand(7, 7)
tensor_b = torch.rand(1, 7)
tensor_c = tensor_a @ tensor_b.T
tensor_c

tensor([[1.8542],
        [1.9611],
        [2.2884],
        [3.0481],
        [1.7067],
        [2.5290],
        [1.7989]])

In [267]:
# Create two random tensors of shape (2, 3) and send them both to the GPU (you'll need access to a GPU for this). Set torch.manual_seed(1234) when creating the tensors (this doesn't have to be the GPU random seed). The output should be something like:
# Device: cuda
# (tensor([[0.0290, 0.4019, 0.2598],
#          [0.3666, 0.0583, 0.7006]], device='cuda:0'),
#  tensor([[0.0518, 0.4681, 0.6738],
#          [0.3315, 0.7837, 0.5631]], device='cuda:0'))

RANDOM_SEED = 1234
torch.manual_seed(seed=RANDOM_SEED)

tensor_a = torch.rand(2, 3).to(device)
tensor_b = torch.rand(2, 3).to(device)

print(tensor_a)
print(tensor_b)

tensor([[0.0290, 0.4019, 0.2598],
        [0.3666, 0.0583, 0.7006]], device='mps:0')
tensor([[0.0518, 0.4681, 0.6738],
        [0.3315, 0.7837, 0.5631]], device='mps:0')


In [269]:
#  Perform a matrix multiplication on the tensors you created in 6
tensor_c = tensor_a @ tensor_b.T
tensor_c

tensor([[0.3647, 0.4709],
        [0.5184, 0.5617]], device='mps:0')

In [270]:
# Find the maximum and minimum values of the output of 7
print(torch.max(tensor_c))
print(torch.min(tensor_c))

tensor(0.5617, device='mps:0')
tensor(0.3647, device='mps:0')


In [272]:
# Find the maximum and minimum index values of the output of 7
print(torch.argmax(tensor_c))
print(torch.argmin(tensor_c))

tensor(3, device='mps:0')
tensor(0, device='mps:0')


In [273]:
#  Make a random tensor with shape (1, 1, 1, 10) and then create a new tensor with all the 1 dimensions removed to be left with a tensor of shape (10). 
# Set the seed to 7 when you create it and print out the first tensor and it's shape as well as the second tensor and it's shape.
# The output should look like:

# tensor([[[[0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297,
#            0.3653, 0.8513]]]]) torch.Size([1, 1, 1, 10])
# 
# tensor([0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297, 0.3653,
#         0.8513]) torch.Size([10])

RANDOM_SEED = 7
torch.manual_seed(seed=RANDOM_SEED)
tensor_a = torch.rand(1, 1, 1, 10)
tensor_b = tensor_a.squeeze()

print(tensor_a)
print(tensor_a.shape)
print(tensor_b)
print(tensor_b.shape)

tensor([[[[0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297,
           0.3653, 0.8513]]]])
torch.Size([1, 1, 1, 10])
tensor([0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297, 0.3653,
        0.8513])
torch.Size([10])


### Fashion MNIST model

In [274]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

In [278]:
# Downloading train and test dataset from open vision datasets
train_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

In [281]:
batch_size = 64

train_dataloader = DataLoader(train_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X, y in train_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape} {X.dtype}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28]) torch.float32
Shape of y: torch.Size([64]) torch.int64


In [289]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

print(f"Using {device} device")


class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits


model = NeuralNetwork().to(device)
model

Using mps device


NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)

In [290]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [291]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [292]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [293]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.306602  [   64/60000]
loss: 2.290009  [ 6464/60000]
loss: 2.269770  [12864/60000]
loss: 2.260979  [19264/60000]
loss: 2.251373  [25664/60000]
loss: 2.214774  [32064/60000]
loss: 2.232035  [38464/60000]
loss: 2.188872  [44864/60000]
loss: 2.188063  [51264/60000]
loss: 2.153989  [57664/60000]
Test Error: 
 Accuracy: 40.6%, Avg loss: 2.150521 

Epoch 2
-------------------------------
loss: 2.161589  [   64/60000]
loss: 2.152027  [ 6464/60000]
loss: 2.087591  [12864/60000]
loss: 2.107097  [19264/60000]
loss: 2.057354  [25664/60000]
loss: 1.984516  [32064/60000]
loss: 2.027601  [38464/60000]
loss: 1.933577  [44864/60000]
loss: 1.946321  [51264/60000]
loss: 1.870152  [57664/60000]
Test Error: 
 Accuracy: 59.5%, Avg loss: 1.870605 

Epoch 3
-------------------------------
loss: 1.898082  [   64/60000]
loss: 1.873238  [ 6464/60000]
loss: 1.747605  [12864/60000]
loss: 1.802230  [19264/60000]
loss: 1.680043  [25664/60000]
loss: 1.627109  [32064/600

In [294]:
torch.save(model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth")

Saved PyTorch Model State to model.pth


In [295]:
model = NeuralNetwork().to(device)
model.load_state_dict(torch.load("model.pth"))

  model.load_state_dict(torch.load("model.pth"))


<All keys matched successfully>

In [296]:
classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]

model.eval()
x, y = test_data[0][0], test_data[0][1]
with torch.no_grad():
    x = x.to(device)
    pred = model(x)
    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f'Predicted: "{predicted}", Actual: "{actual}"')

Predicted: "Ankle boot", Actual: "Ankle boot"
