In [1]:
import torch

In [2]:
first_tensor = torch.tensor([[1,2,3],
                            [4,5,6],
                            [7,8,9]])

first_tensor

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [3]:
ones_tensor = torch.ones(3,3)
ones_tensor

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

In [4]:
rand_tensor = torch.rand(3,3)
rand_tensor

tensor([[0.2164, 0.7192, 0.9491],
        [0.7351, 0.6637, 0.1356],
        [0.5429, 0.7954, 0.5008]])

In [6]:
randint_tensor = torch.randint(2,8,size=(3,3))
randint_tensor

tensor([[7, 6, 6],
        [4, 7, 4],
        [7, 5, 6]])

In [9]:
float_32_tensor = torch.tensor([3.0,6.0,9.0],
                              dtype=None,
                              device=None,
                              requires_grad=False)

float_32_tensor.shape, float_32_tensor.dtype, float_32_tensor.device

(torch.Size([3]), torch.float32, device(type='cpu'))

In [13]:
rand_tensor = torch.rand(3,4)
print(rand_tensor)
print(f"Shape of tensor: {rand_tensor.shape}")
print(f"Device of tensor: {rand_tensor.device}")

tensor([[0.7922, 0.9383, 0.1015, 0.7579],
        [0.7414, 0.2219, 0.7448, 0.9070],
        [0.5045, 0.0948, 0.4908, 0.6073]])
Shape of tensor: torch.Size([3, 4])
Device of tensor: cpu


# Operations

In [15]:
tensor = torch.tensor([1,2,3])
tensor+10

tensor([11, 12, 13])

In [16]:
tensor*10

tensor([10, 20, 30])

In [21]:
tensor_oper = tensor*10
tensor_multi = torch.multiply(tensor,10)

torch.equal(tensor_oper, tensor_multi)

True

## Matrix multiplication

In [24]:
tensor

tensor([1, 2, 3])

In [25]:
tensor * tensor

tensor([1, 4, 9])

In [26]:
torch.matmul(tensor,tensor)

tensor(14)

In [44]:
tensor_2x3 = torch.randint(1,3,size=(2,3))
tensor_3x2 = torch.randint(2,4,size=(3,2))

tensor_matmul = torch.matmul(tensor_2x3,tensor_3x2)
# mm = matmul
tensor_matmul = torch.mm(tensor_2x3,tensor_3x2)

print(f"tensor 2x3:\n{tensor_2x3}")
print(f"tensor 3x3:\n{tensor_3x2}")
print(f"tensor matrix multiplication:\n{tensor_matmul}")

tensor 2x3:
tensor([[2, 2, 2],
        [2, 1, 2]])
tensor 3x3:
tensor([[3, 2],
        [3, 3],
        [3, 3]])
tensor matrix multiplication:
tensor([[18, 16],
        [15, 13]])


## Transpose

In [42]:
print(f"2x3 matrix:\n{tensor_2x3}")
print(f"Transpose matrix:\n{tensor_2x3.T}")

2x3 matrix:
tensor([[1, 1, 2],
        [1, 1, 1]])
Transpose matrix:
tensor([[1, 1],
        [1, 1],
        [2, 1]])


In [46]:
# Shapes need to be in the right way  
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]], dtype=torch.float32)

tensor_B = torch.tensor([[7, 10],
                         [8, 11], 
                         [9, 12]], dtype=torch.float32)

torch.mm(tensor_A, tensor_B.T)

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

## Linear

In [51]:
torch.manual_seed(123)

linear = torch.nn.Linear(in_features=2, # in_features = matches inner dimension of input
                        out_features=6) # out_features = describes outer value

x=tensor_A
output=linear(x)

print(f"Input shape: {x.shape}\n")
print(f"Output:\n{output}\n\nOutput shape: {output.shape}")

Input shape: torch.Size([3, 2])

Output:
tensor([[-0.8416, -0.2469,  0.1746, -1.7773, -0.3457,  0.7310],
        [-1.3715, -0.4160,  0.0062, -3.9295, -0.5989,  0.7376],
        [-1.9013, -0.5850, -0.1621, -6.0818, -0.8520,  0.7443]],
       grad_fn=<AddmmBackward0>)

Output shape: torch.Size([3, 6])


# Min, Max, Mean, Sum

In [54]:
x = torch.arange(0,100,10)
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [60]:
print(f"Maximum: {max(x)}")
print(f"Minimum: {min(x)}")
print(f"Sum: {sum(x)}")
print(f"Mean: {x.type(torch.float32).mean()}")

Maximum: 90
Minimum: 0
Sum: 450
Mean: 45.0


Note: You may find some methods such as torch.mean() require tensors to be in torch.float32 (the most common) or another specific datatype, otherwise the operation will fail.

In [62]:
tensor_x = torch.arange(0,100,10)
print(f"Arnange tensor:\n{tensor}")

# Returns index of max and min
print(f"Index where max value occurs: {tensor_x.argmax()}")
print(f"Index where min value occurs: {tensor_x.argmin()}")

Arnange tensor:
tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])
Index where max value occurs: 9
Index where min value occurs: 0


# Change tensor typem

In [65]:
tensor = torch.arange(10,90,10)
tensor.dtype

torch.int64

In [66]:
tensor_float16 = tensor.type(torch.float16)
tensor_float16.dtype

torch.float16

In [67]:
tensor_float32 = tensor.type(torch.float32)
tensor_float32.dtype

torch.float32

# Reshaping, stacking, squeezing, unsqueezing

In [76]:
x = torch.arange(10.,80.,10)
print(x, x.dtype, x.shape)

tensor([10., 20., 30., 40., 50., 60., 70.]) torch.float32 torch.Size([7])


In [75]:
x_reshaped = x.reshape(1,7)
print(x_reshaped, x_reshaped.shape)
x_reshaped

tensor([[10., 20., 30., 40., 50., 60., 70.]]) torch.Size([1, 7])


tensor([[10., 20., 30., 40., 50., 60., 70.]])

In [78]:
z = x.view(1,7)
z

tensor([[10., 20., 30., 40., 50., 60., 70.]])

In [79]:
# Changing z changes x
z[:, 0] = 5
z, x

(tensor([[ 5., 20., 30., 40., 50., 60., 70.]]),
 tensor([ 5., 20., 30., 40., 50., 60., 70.]))

# Indexingm

In [82]:
x = torch.arange(1,10)
x, x.reshape(1,3,3)

(tensor([1, 2, 3, 4, 5, 6, 7, 8, 9]),
 tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]))

# Testing torch on GPU

## Test script

In [92]:
import torch
import math
# this ensures that the current MacOS version is at least 12.3+
print(torch.backends.mps.is_available())
# this ensures that the current current PyTorch installation was built with MPS activated.
print(torch.backends.mps.is_built())

True
True


In [93]:
dtype = torch.float
device = torch.device("mps")

# Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# Randomly initialize weights
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

# Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights using gradient descent
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d


print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')


99 167.1236114501953
199 114.48543548583984
299 79.39013671875
399 55.97895812988281
499 40.35350799560547
599 29.91857147216797
699 22.946025848388672
799 18.284086227416992
899 15.165090560913086
999 13.076950073242188
1099 11.677961349487305
1199 10.740017890930176
1299 10.110695838928223
1399 9.688103675842285
1499 9.40411376953125
1599 9.213098526000977
1699 9.084510803222656
1799 8.997859001159668
1899 8.939417839050293
1999 8.899964332580566
Result: y = -0.005785301793366671 + 0.8496687412261963 x + 0.0009980625472962856 x^2 + -0.09232445806264877 x^3


In [94]:
device

device(type='mps')