# Import and dependencies

In [1]:
import torch
from torch.nn import functional as F
import numpy as np
import time

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

block_size = 8
batch_size = 4

cuda


# Torch methods

## randint method

In [2]:
"""
initialise a tensor containing 6 random numbers that are between -100 inclusive and 100 exclusive
"""

randint = torch.randint(-100, 100, (6,))
randint

tensor([  8,  -7,  73,  46,  49, -66])

## tensor method

In [3]:
"""
initialise a tensor by providing the values
"""

tensor = torch.tensor([[0.1, 1.2], [2.2, 3.1], [4.9, 5.2]])
tensor

tensor([[0.1000, 1.2000],
        [2.2000, 3.1000],
        [4.9000, 5.2000]])

## zeros method

In [4]:
"""
initialise a zero tensor by providing the shape (row by column) 
"""

zeros = torch.zeros(2, 3)
zeros

tensor([[0., 0., 0.],
        [0., 0., 0.]])

## ones method

In [5]:
"""
initialise a ones tensor by providing the shape (row by column) 
"""

ones = torch.ones(3,4)
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

## arange method

In [6]:
"""
initialise a 1D tensor from 0 to given number 
"""

arange = torch.arange(5)
arange

tensor([0, 1, 2, 3, 4])

## linspace method

In [7]:
"""
initialise a 1D tensor from 3 to 10, with a length of 5 numbers 
"""

linspace = torch.linspace(3,10,steps=5)
linspace

tensor([ 3.0000,  4.7500,  6.5000,  8.2500, 10.0000])

## logspace method

In [8]:
"""
initialise a 1D tensor from -10 to 10, with a length of 6 numbers, equal spaced based on their power 
"""

logspace = torch.logspace(start=-10,end=10,steps=6)
logspace

tensor([1.0000e-10, 1.0000e-06, 1.0000e-02, 1.0000e+02, 1.0000e+06, 1.0000e+10])

## eye method

In [9]:
"""
initialise a diagonal tensor with a row and column of 5
"""

eye = torch.eye(5)
eye

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])

## empty_like

In [10]:
"""
initialise an empty tensor with a similar shape to the provide tensor
"""

empty_like = torch.empty_like(tensor, dtype=int)
empty_like

tensor([[5318465558448,             0],
        [            0,             0],
        [            0,             0]])

## gpu (torch) vs cpu (numpy)

In [11]:
start_time = time.time()
zeros = torch.zeros(1,1)
end_time = time.time()

elapsed_time = end_time-start_time
print(f"{elapsed_time: .8f}")

 0.00000000


In [12]:
torch_rand1 = torch.rand(10000, 10000).to(device)
torch_rand2 = torch.rand(10000, 10000).to(device)

np_rand1 = torch.rand(10000, 10000)
np_rand2 = torch.rand(10000, 10000)

start_time = time.time()

rand = (torch_rand1 @ torch_rand2)

end_time = time.time()
elapsed_time = end_time-start_time
print(f"torch with cuda enabled took: {elapsed_time: .8f} seconds")


rand = np.multiply(np_rand1, np_rand2)
end_time = time.time()

end_time = time.time()
elapsed_time = end_time-start_time
print(f"numpy using cpu only took: {elapsed_time: .8f} seconds")

torch with cuda enabled took:  0.03300190 seconds
numpy using cpu only took:  0.12624860 seconds


## multinomial method

In [13]:
# define a probability tensor, must add up to 1
probabilities = torch.tensor([0.1, 0.9])
# 10% chance of 0 and 90% chance of 1. each probability points to the index of the probability in the tensor

# draw 10 samples from the defined distribution, with replacement in this case
samples = torch.multinomial(probabilities, num_samples=10, replacement=True)
print(samples)

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 0])


## cat method

In [14]:
"""
coancatenate 2 tensors. keep in mind that the shape of both tensors must be the same.
"""

tensor = torch.tensor([1,2,3,4])
out = torch.cat((tensor, torch.tensor([5])), dim=0)
out

tensor([1, 2, 3, 4, 5])

## tril method

In [15]:
# initialise a lower triangle tensor

out = torch.tril(torch.ones(5,5))
out

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

## triu method

In [16]:
# initialise an upper triangle tensor

out = torch.triu(torch.ones(5,5))
out

tensor([[1., 1., 1., 1., 1.],
        [0., 1., 1., 1., 1.],
        [0., 0., 1., 1., 1.],
        [0., 0., 0., 1., 1.],
        [0., 0., 0., 0., 1.]])

## masked_fill method

In [17]:
# we start by creating a zero tensor of shape 5 by 5. this is to initialise the base tensor before applying the mask
# next we create a lower triangle tensor with a ones tensor of shape 5 by 5. the condition of equal to zero converts all 0's in the tensor to True and every other value to be False, creating a boolean mask.
# the masked_fill method then changes all the true values in the tensor to -infinity, and everything else stays the same. since we are masking over the zero tensor, the value that remains is 0. 

out = torch.zeros(5,5).masked_fill(torch.tril(torch.ones(5,5)) == 0, float('-inf'))
out

tensor([[0., -inf, -inf, -inf, -inf],
        [0., 0., -inf, -inf, -inf],
        [0., 0., 0., -inf, -inf],
        [0., 0., 0., 0., -inf],
        [0., 0., 0., 0., 0.]])

## exp method

In [18]:
out = torch.exp(out)
out

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

## transpose method

In [21]:
input = torch.zeros(2,3,4)
# swap dimension at index 0 and 2. => swap 2 and 4.
out = input.transpose(0,2)
out.shape

torch.Size([4, 3, 2])

## stack method

In [23]:
tensor1 = torch.tensor([1,2,3])
tensor2 = torch.tensor([4,5,6])
tensor3 = torch.tensor([7,8,9])

stacked_tensor = torch.stack([tensor1, tensor2, tensor3])
stacked_tensor

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

## nn.linear method

In [26]:
import torch.nn as nn
# to make things learnable

sample = torch.tensor([10., 10., 10.])
# perform linear transformation to ensure consistency in shapes
# a learnable linear layer that transforms an input of size 3 to an output of size 3. y = Wx + b
# W is the weight matrix that converts the input shpae to the output shape, x is the input tensor, b is the optional bias. 
# in this case, the weight is randomly initialised.
linear = nn.Linear(3, 3, bias=False)
print(linear(sample))

tensor([11.1186,  6.4585,  1.9329], grad_fn=<SqueezeBackward4>)


In [29]:
import torch.nn.functional as F
# find the sum of the exponential of each element in the tensor, and divide each number in the tensor by this sum.

# softmax_output = [1/(e^1 + e^2 + e^3), 2/(e^1 + e^2 + e^3), 3/(e^1 + e^2 + e^3)]

tensor1 = torch.tensor([1.0,2.0,3.0])
softmax_output = F.softmax(tensor1, dim=0)
softmax_output

tensor([0.0900, 0.2447, 0.6652])

# Embeddings

In [52]:
# initialise an embedding layer
vocab_size = 1000
embedding_dim = 100
embedding = nn.Embedding(vocab_size, embedding_dim)

# create some random input
input = torch.randint(0, 100,(4,))
embedded_output = embedding(input)
print(embedded_output.shape)

torch.Size([4, 100])


## Matrix multiplication

In [55]:
a = torch.tensor([[1,2],[3,4],[5,6]])
b = torch.tensor([[7,8,9],[10,11,12]])
print(a @ b)
print(torch.matmul(a, b))

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])
tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])


In [65]:
# torch does not allow multiplication between float and int

# create a tensor containing only integers from 0 to 1, of shape 3 by 2.
int_64 = torch.randint(1, (3,2))
# create a random tensor containing floats, of shape 3 by 2.
float_32 = torch.rand(2,3)

print("integer: ", int_64)
print("float: " ,float_32)
print("\n")
try:
    result = torch.matmul(int_64, float_32)
    print(result)
except Exception as e:
    print(e)
    print("\n")

#  workaround:
print("after casting to type float")
result = torch.matmul(torch.randint(1, (3,2)).float(), float_32)
print(result)

# note that there ar erounding issues with this workaround. generally always use float.

integer:  tensor([[0, 0],
        [0, 0],
        [0, 0]])
float:  tensor([[0.3620, 0.1403, 0.6970],
        [0.9850, 0.0821, 0.2065]])


expected m1 and m2 to have the same dtype, but got: __int64 != float


after casting to type float
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])


## view method

In [75]:
a = torch.rand(2,3,5)
print(a)

# flattening using view
x, y, z = a.shape
a = a.view(-1)
print(a)

# reshaping using view
print(a.view(2,3,5))

tensor([[[0.9581, 0.1800, 0.9921, 0.9364, 0.8607],
         [0.2878, 0.5146, 0.9212, 0.0265, 0.4513],
         [0.9235, 0.2298, 0.8398, 0.5072, 0.0039]],

        [[0.1536, 0.8389, 0.8119, 0.9829, 0.2149],
         [0.2569, 0.3291, 0.3270, 0.6362, 0.9774],
         [0.2609, 0.0280, 0.8595, 0.4348, 0.5776]]])
tensor([0.9581, 0.1800, 0.9921, 0.9364, 0.8607, 0.2878, 0.5146, 0.9212, 0.0265,
        0.4513, 0.9235, 0.2298, 0.8398, 0.5072, 0.0039, 0.1536, 0.8389, 0.8119,
        0.9829, 0.2149, 0.2569, 0.3291, 0.3270, 0.6362, 0.9774, 0.2609, 0.0280,
        0.8595, 0.4348, 0.5776])
tensor([[[0.9581, 0.1800, 0.9921, 0.9364, 0.8607],
         [0.2878, 0.5146, 0.9212, 0.0265, 0.4513],
         [0.9235, 0.2298, 0.8398, 0.5072, 0.0039]],

        [[0.1536, 0.8389, 0.8119, 0.9829, 0.2149],
         [0.2569, 0.3291, 0.3270, 0.6362, 0.9774],
         [0.2609, 0.0280, 0.8595, 0.4348, 0.5776]]])


# Normalisation

## Relu

if input is less than zero, output is zero, but if input is zero or more, output is whatever the input is.

In [82]:
x = torch.tensor([-0.05], dtype=torch.float32,)
y = F.relu(x)
print(y)

tensor([0.])


## Sigmoid

maps input to a number between 0 and 1

In [83]:
y = F.sigmoid(x)
print(y)

tensor([0.4875])


## Tanh

maps input to a number between -1 and 1

In [89]:
y = F.tanh(torch.tensor([1]))
print(y)

tensor([1.])
