In [1]:
import torch
print(torch.__version__)
import time
import numpy as np

2.6.0.dev20240927


## 1) Generate tensors with random numbers
- we can generate a tensor with specific number of random numbers in a specified range.
- we will use this while getting random batches from our training data (e.g. the romeo and juliet book)

In [2]:
randint = torch.randint(-100,100, (6,))
print(randint)

tensor([-66, -87,  75,  37, -49,  78])


## 2) Creating tensors
- we use ``` torch.tensor() ``` to create a tensor where pass in values to create a tensor

In [3]:
tensor = torch.tensor([[19.10,25.06],[0.6,0.4],[1.8,2.0]])
print(tensor)

tensor([[19.1000, 25.0600],
        [ 0.6000,  0.4000],
        [ 1.8000,  2.0000]])


## 3) Creating a tensor filled with zeros
- creating a tensor with set dimensions filled with zeros
- we use ``` torch.zeros(x,y,z,...) ``` we can make a tensor filled with 0s of any valid dimension with it

In [4]:
zeros = torch.zeros(2,3)
print(zeros)

tensor([[0., 0., 0.],
        [0., 0., 0.]])


## 3) Creating a tensor filled with ones
- creating a tensor with set dimensions filled with ones
- we use ``` torch.ones(x,y,z,...) ``` we can make a tensor filled with 1s of any valid dimension with it

In [5]:
ones = torch.ones(2,3)
print(ones)

tensor([[1., 1., 1.],
        [1., 1., 1.]])


## 3) Creating an empty tensor
- creating a tensor with set dimensions filled with very small or large numbers
- we use ``` torch.empty(x,y,z,...) ``` we can make a tensor filled with  very small or large number of any valid dimension with it

In [6]:
empty = torch.empty(3,6)
print(empty)

emptyType = torch.empty((3,4), dtype=torch.int64)
print(emptyType)

tensor([[0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.]])
tensor([[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]])


## 4) Creating a sorted tensor
- we use ``` torch.arange() ``` to create a tensor that is sorted, we can pass in a step value and start and end values so that it is sorted and has values which go from start to finish with the step value as difference between each value. If we just pass in a number it will have a step valye of 1 and end after that many numbers (the number is treated as the end value)

In [7]:
aranged = torch.arange(7)
print(aranged)

tensor([0, 1, 2, 3, 4, 5, 6])


## 5) Using linspace to create a tensor
- linspace takes in a start end and step value it goes from start to end in that many steps. in arrange the step is the differnce between each value, in linspace steps is the amount of total values that should be there.
- e.g in linspace if you have a step of 4 , there will be 4 values in total and difference is based on the start and end but in arrange the difference will be 4 if the step is 4
- a tensor is created with linspace using ``` torch.linspace() ```

In [8]:
linspace = torch.linspace(19,25, steps=5)
print(linspace)

tensor([19.0000, 20.5000, 22.0000, 23.5000, 25.0000])


## 10) Using logspace to create a tensor
- same as above but in log

In [9]:
logspace = torch.logspace( -15 , 15 , steps=5)
print(logspace)

tensor([1.0000e-15, 3.1623e-08, 1.0000e+00, 3.1623e+07, 1.0000e+15])


## 11) Creating a tensor with eye
- using ``` torch.eye() ``` we make a tensor with diagonal 1s , kinda looks like reduced row echelon form

In [10]:
eye = torch.eye(5)
print(eye)

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])


## 12) Using empty like to make a tensor
- create an empty tensor like the one that is passed in (sane dimensions)

In [11]:
like = torch.empty_like(eye)
print(like)

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])


## 13) Multinomial distrubtion with tensors
- we will use this for predictions
    - [ 0 ,    1]
    - [0.3,  0.7]
    - 0.3 is index 0 , 0.7 is index 1
    - 0.3 * 100 = 30%
    - <u> \+ 0.7 * 100 = 70% </u>
    - 1 * 100 = 100%

In [12]:
probabilities = torch.tensor([0.3,0.7]) #30% , 70% , adds up to 100%
# 30% chance we get 0 (index), 70% change we get 1 (index)

distributed = torch.multinomial(probabilities, num_samples=10, replacement=True)

print(distributed)

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])


## 14) Concatanating tensors
- we will use this while generating texts, we will concatnate what we predicted with what we are predicting nopw and so on
- e.g. [1,2,3,4] with [8,9,5,4] combined to make one tensor [1,2,3,4,8,9,5,4] which decoded back might be 'yungting' for example

In [13]:
tensor1 = torch.tensor([1,2,3,4])
tensor2 = torch.tensor([5,6,7,8])

combined = torch.cat((tensor1,tensor2),dim=0)

print(combined)

tensor([1, 2, 3, 4, 5, 6, 7, 8])


## 15) Using tril (Triangle Lower)
- this blocks the future and gives one more context each time so that so that there is more knowledge/history as you go down so that we wont predict while seeing/ copying the answer
- tril means triangle lower as when you go lower more future context is there

In [14]:
tril = torch.tril(torch.ones(6,6))
anotherTensor = torch.tensor([[19.10,25.06,3,4,5,6],[0.6,0.4,3,4,5,6],[1.8,2.0,3,4,5,6],[19.10,25.06,3,4,5,6],[0.6,0.4,3,4,5,6],[1.8,2.0,3,4,5,6]])
tril2 = torch.tril(anotherTensor)
print(tril)
print(tril2)

tensor([[1., 0., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0., 0.],
        [1., 1., 1., 0., 0., 0.],
        [1., 1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1., 1.]])
tensor([[19.1000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.6000,  0.4000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 1.8000,  2.0000,  3.0000,  0.0000,  0.0000,  0.0000],
        [19.1000, 25.0600,  3.0000,  4.0000,  0.0000,  0.0000],
        [ 0.6000,  0.4000,  3.0000,  4.0000,  5.0000,  0.0000],
        [ 1.8000,  2.0000,  3.0000,  4.0000,  5.0000,  6.0000]])


## 16) Using triu (Triangle Upper)
- opposite of triangle lower shown above


In [15]:
triu = torch.triu(torch.ones(5,5))
print(triu)

tensor([[1., 1., 1., 1., 1.],
        [0., 1., 1., 1., 1.],
        [0., 0., 1., 1., 1.],
        [0., 0., 0., 1., 1.],
        [0., 0., 0., 0., 1.]])


## 17) Masked fill & exponentiation
- very important
- to reach the stage above , all that needs to be done is exponentiate each element ,explained better with the example below :
- e.g.
    - the  ```torcj.exp()``` function uses a constant of 2.71
    - when 2.71 is exponentiated with 0 it results in 1
    - when it is exponentiated with 1 it results in 2.71
    - when it is exponentiated with '-inf'it results in 0

In [16]:
masked_fill = torch.zeros(6,6).masked_fill(torch.tril(torch.ones(6,6)) == 0, float('-inf'))
print('masked : ')
print(masked_fill)
print()
print('exponentiated : ')
torch.exp(masked_fill)

masked : 
tensor([[0., -inf, -inf, -inf, -inf, -inf],
        [0., 0., -inf, -inf, -inf, -inf],
        [0., 0., 0., -inf, -inf, -inf],
        [0., 0., 0., 0., -inf, -inf],
        [0., 0., 0., 0., 0., -inf],
        [0., 0., 0., 0., 0., 0.]])

exponentiated : 


tensor([[1., 0., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0., 0.],
        [1., 1., 1., 0., 0., 0.],
        [1., 1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1., 1.]])

## 18) Transposing tensors
- for the example below we swap the 0 index with index 2

In [17]:
to_transpose = torch.zeros(3,4,5)
transposed = to_transpose.transpose(0, 2)
transposed.shape

torch.Size([5, 4, 3])

## 19) torch stack
- we will use this for batches soon , if we have a bunch of one dimensionals we can stack to make two dimenionals, we can also stack two dimensionals to make three dimensionals and so on

In [18]:
tensorA = torch.tensor([1,2,3])
tensorB = torch.tensor([4,5,6])
tensorC = torch.tensor([7,8,9])

tensor_stacked = torch.stack([tensorA,tensorB,tensorC])
print(tensor_stacked)

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])


## 20) nn. Module
- very important and we will use it a lot
- nn module contains stuff with learnable parameters
- when you add a bias it will learn and train with that bias, learnable
- mostly just layers
- example below we don't have a bias, how it works:
    - we have an input of tensor([7.,7.,7.])
    - we apply an linear transformayion on it

In [19]:
import torch.nn as nn

sample = torch.tensor([7.,7.,7.])

linear = nn.Linear(3,3, bias=False)

print(linear(sample))

tensor([-3.1440, -1.9018, -0.5780], grad_fn=<SqueezeBackward4>)


## 21) Softmax function 
- we exponentiate each value and divide the exponentiated value by the sum of all the exponentiated values
- e.g. showing the math behind for the code below :
    - exponentiation of each num :
        - 2.71...^2 = 7.3891
        - 2.71...^3 = 20.0855
        - 2.71...^4 = 54.5981
    - total = 7.3891 + 20.0855 + 54.5981 = 82.0727
    - divide each exponentiated value by the sum of 82.0727 :
        -  7.3891 / 82.0727 = 0.0900311553
        - 20.0855 / 82.0727 = 0.24472814955
        - 54.5981 / 82.0727 = 0.66524069514
    - make a tensor with the values ``` torch.tensor([0.0900311553,0.24472814955,0.66524069514]) ```
    - this give same result as using the softmax function

In [20]:
import torch.nn.functional as F

tensorF = torch.tensor([2.0,3.0,4.0])
soft_max = F.softmax(tensorF, dim=0)

print(soft_max)

tensor([0.0900, 0.2447, 0.6652])


## 22) Matrix Multiplication
- 

In [21]:
tensorW = torch.tensor([[1,2],[3,4],[5,6]])
tensorQ = torch.tensor([[7,8,9],[10,11,12]])
print(tensorW @ tensorQ)
mul_result = torch.matmul(tensorW,tensorQ)
print(mul_result)

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])
tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])


## 23) Vector embedding
- l

In [22]:
vocabulary_size = 10000
embedding_dimension = 100

embedding = nn.Embedding(vocabulary_size, embedding_dimension)

input_indices = torch.LongTensor([1,5,3,2])

embedded_output = embedding(input_indices)

print(embedded_output.shape)

torch.Size([4, 100])


#### Before continuing or completing explanation for vector embedding just go over the Linear_Yung_Ting_Revision.pdf quickly as a reresher for the math involved

## 24) the matrix mul from ipad in code 
- this is simpler and we can use the gpu to scale more calculations

In [23]:
tensor_a = torch.tensor([[1,2],[3,4],[5,6]])
tensor_b = torch.tensor([[7,8,9],[10,11,12]])
print(tensor_a @ tensor_b)

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])


## 25) matrix mul of int and float matrices
- if you have to do a float and int always better to convert the int to float as when you try and convert float to int there will be rounding
- you can only matrix mul the same types together

In [24]:
int_tensor = torch.randint(1,(3,4)).float()
float_tensor = torch.rand(4,3)

print(int_tensor @ float_tensor)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])


## 26) Tensor Shape and reshape
- you can get the shape of the tensor using .shape
- you can use view to reshape the tensor as shown

In [40]:
tensor_s = torch.rand(2, 3, 5)
x, y, z = tensor_s.shape
print(x, y, z)
print(tensor_s.shape)
print(tensor_s)
tensor_s = tensor_s.view(x, z, y)
print(tensor_s.shape)
print(tensor_s)

2 3 5
torch.Size([2, 3, 5])
tensor([[[0.6788, 0.5161, 0.6100, 0.4892, 0.3879],
         [0.1979, 0.2356, 0.1491, 0.0714, 0.3703],
         [0.7914, 0.8634, 0.3329, 0.0193, 0.6528]],

        [[0.5112, 0.6584, 0.9677, 0.4133, 0.5212],
         [0.4956, 0.8592, 0.3825, 0.4282, 0.1891],
         [0.1159, 0.3159, 0.1288, 0.3981, 0.2595]]])
torch.Size([2, 5, 3])
tensor([[[0.6788, 0.5161, 0.6100],
         [0.4892, 0.3879, 0.1979],
         [0.2356, 0.1491, 0.0714],
         [0.3703, 0.7914, 0.8634],
         [0.3329, 0.0193, 0.6528]],

        [[0.5112, 0.6584, 0.9677],
         [0.4133, 0.5212, 0.4956],
         [0.8592, 0.3825, 0.4282],
         [0.1891, 0.1159, 0.3159],
         [0.1288, 0.3981, 0.2595]]])


# CPU vs GPU 

In [25]:
device = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
print(device)

mps


## 24) how long does our gpu take for creating a 1x1 tensor with zeros?

In [26]:
# %%time
# start_time = time.time()

# zeros = torch.zeros(1,1)

# end_time = time.time()

# time_elapsed = end_time - start_time
# print(f"{time_elapsed:.1000f}")

- since the above example is quite simple and too small we cannot see how much time it takes ,we can copare a calculation that is done with numpy on cpu and the same with torch on gpu for a better comparison below : 

In [29]:
# %%time
# # #pls change this to 10k or lower based on your hardware i have a fully specced out pc with a 4090 so wanted to play around hahah
# torch_rand1 = torch.rand(40000, 40000).to(device)
# torch_rand2 = torch.rand(40000, 40000).to(device)
# np_rand1 = torch.rand(40000,40000)
# np_rand2 = torch.rand(40000,40000)

# print("torch using gpu: ")

# start_time = time.time()

# rand = (torch_rand1 @ torch_rand2)

# end_time = time.time()
# time_elapsed = end_time - start_time
# print(f"{time_elapsed:.10f}")


# print("numpy using cpu: ")

# start_time = time.time()

# rand = np.multiply(np_rand1, np_rand2)
# end_time = time.time()
# time_elapsed = end_time - start_time
# print(f"{time_elapsed:.10f}")