In [7]:
import torch
import torch.nn as nn
from torch.nn import functional as F
import numpy as np
import time
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [8]:
%%time
start_time = time.time()
# matrix operations here
zeros = torch.zeros(1, 1)
end_time = time.time()

elapsed_time = end_time - start_time
print(f"{elapsed_time:.8f}")

0.00000000
CPU times: total: 0 ns
Wall time: 0 ns


In [11]:
torch_rand1 = torch.rand(100, 100, 100, 100).to(device)
torch_rand2 = torch.rand(100, 100, 100, 100).to(device)

np_rand1 = torch.rand(100, 100, 100, 100)
np_rand2 = torch.rand(100, 100, 100, 100)

start_time = time.time()

rand = (torch_rand1 @ torch_rand2)

end_time = time.time()

elapsed_time = end_time - start_time
print(f"{elapsed_time:.8f}")


start_time = time.time()

rand = np.multiply(np_rand1, np_rand2)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"{elapsed_time:.8f}")

0.00800014
0.07700062


In [10]:
torch.zeros(1).cuda()

tensor([0.], device='cuda:0')

In [14]:
# multinomial distribution - has to equal to 1 and it's the probability of it being one or the other 

probabilities = torch.tensor([0.1, 0.9])
# 10% or 0.1 => 0, 90% or 0.9 => 1. each probability points to the index of the probability in the tensor
# Draw 5 samples from the multinomial distribution
samples = torch.multinomial(probabilities, num_samples=10, replacement=True)
print(samples)

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])


In [15]:
# Cat - refers to concatenation 

tensor = torch.tensor([1, 2, 3, 4])
out = torch.cat((tensor, torch.tensor([5])), dim=0)
out

tensor([1, 2, 3, 4, 5])

In [16]:
# Tril - triangle lower - top left to bottom right 
# important because when predicting the future so they are talking to each other in history 
# we cannot communicate with the answer - only know the history of knowledge which answer

out = torch.tril(torch.ones(5, 5))
out

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

In [17]:
# similar but for upper

out = torch.triu(torch.ones(5, 5))
out

tensor([[1., 1., 1., 1., 1.],
        [0., 1., 1., 1., 1.],
        [0., 0., 1., 1., 1.],
        [0., 0., 0., 1., 1.],
        [0., 0., 0., 0., 1.]])

In [19]:
# mask_fill - to exponentiate and fill in values 
# in the example provided it's applying a fill using the torch trill where if the values are equal to 0 turn into -inf
out = torch.zeros(5, 5).masked_fill(torch.tril(torch.ones(5, 5)) == 0, float('-inf'))
out

tensor([[0., -inf, -inf, -inf, -inf],
        [0., 0., -inf, -inf, -inf],
        [0., 0., 0., -inf, -inf],
        [0., 0., 0., 0., -inf],
        [0., 0., 0., 0., 0.]])

In [20]:
tensor1 = torch.tensor([1, 2, 3])
tensor2 = torch.tensor([4, 5, 6])
tensor3 = torch.tensor([7, 8, 9])

# Stack the tensors along a new dimension
stacked_tensor = torch.stack([tensor1, tensor2, tensor3])
stacked_tensor

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [21]:
import torch.nn as nn
sample = torch.tensor([10.,10.,10.])
linear = nn.Linear(3, 3, bias=False) # a sequential container - linear transformation- make sure the inputs aligns with the hidden layers 
print(linear(sample))

tensor([ -0.6873, -11.2861,   6.2165], grad_fn=<SqueezeBackward4>)


In [22]:
import torch.nn.functional as F

# Create a tensor
tensor1 = torch.tensor([1.0, 2.0, 3.0])

# Apply softmax using torch.nn.functional.softmax()
softmax_output = F.softmax(tensor1, dim=0)

print(softmax_output)

tensor([0.0900, 0.2447, 0.6652])


In [28]:
# Initialize an embedding layer
vocab_size = 80
embedding_dim = 6
embedding = nn.Embedding(vocab_size, embedding_dim)

# Create some input indices
input_indices = torch.LongTensor([1, 5, 3, 2])

# Apply the embedding layer
embedded_output = embedding(input_indices) # Batch size / Sequence Length/ Embedding Dimension

# The output will be a tensor of shape (4, 100), where 4 is the number of inputs
# and 100 is the dimensionality of the embedding vectors
print(embedded_output.shape)
print(embedded_output)

torch.Size([4, 6])
tensor([[-0.4694,  1.6054,  0.7627,  1.9307, -1.7235,  1.1781],
        [ 0.6608,  0.6857,  1.2715,  0.3952, -1.6943, -0.9012],
        [ 0.7242,  0.3792,  0.5763,  1.0025,  0.4864,  0.5782],
        [ 0.5652,  0.2914,  0.0850,  0.4376,  1.8669,  0.5797]],
       grad_fn=<EmbeddingBackward0>)


In [33]:
embedding = nn.Embedding(10, 3)  # Adjust the vocabulary size (10) as needed

# Input tensor of shape [1, 5, 3, 2]
input_tensor = torch.tensor([[[[1, 5],
                               [3, 2],
                               [4, 0],
                               [9, 7],
                               [8, 6]]]])

print(input_tensor.shape)

# Apply the embedding layer
embedded_output = embedding(input_tensor)
print(embedded_output.shape)
print(embedded_output)

torch.Size([1, 1, 5, 2])
torch.Size([1, 1, 5, 2, 3])
tensor([[[[[ 0.1618, -0.9043, -1.0215],
           [-0.7190, -2.0227, -1.2690]],

          [[ 1.4281,  0.2476,  0.3629],
           [ 1.6815,  0.2511,  0.6559]],

          [[ 0.7968,  0.1098,  0.3375],
           [-1.8245, -0.5562, -0.6919]],

          [[ 0.5841, -0.6739,  0.0392],
           [ 0.9161,  1.0895,  0.6676]],

          [[ 0.9298, -0.5394,  0.0564],
           [ 0.1600, -1.2228, -0.9606]]]]], grad_fn=<EmbeddingBackward0>)


In [34]:
a = torch.tensor([[1,2],[3,4],[5,6]])
b = torch.tensor([[7,8,9],[10,11,12]])
# print(a @ b)
print(torch.matmul(a, b))

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])
