In [1]:
import torch
import numpy as np
import time

device = 'mps' if torch.backends.mps.is_available else 'cpu'
device

'mps'

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
block_size = 8
batch_size = 4

In [4]:
with open('wizard_of_oz.txt','r',encoding='utf-8') as f:
    text = f.read()

print(text[:200])

  DOROTHY AND THE WIZARD IN OZ

  BY

  L. FRANK BAUM

  AUTHOR OF THE WIZARD OF OZ, THE LAND OF OZ, OZMA OF OZ, ETC.

  ILLUSTRATED BY JOHN R. NEILL

  BOOKS OF WONDER WILLIAM MORROW & CO., INC. NEW 


In [5]:
len(text)

232309

In [6]:
chars = sorted(set(text))
print(chars)
print(len(chars))

['\n', ' ', '!', '"', '&', "'", '(', ')', '*', ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
80


In [7]:
# conversions

strings_to_int = {ch:i for i,ch in enumerate(chars)}
int_to_string = {i:ch for i,ch in enumerate(chars)}

encode = lambda s: [strings_to_int[c] for c in s]
decode = lambda l: ''.join([int_to_string[i] for i in l])

In [8]:
print(encode('hello'))
print(decode([61, 58, 65, 65, 68]))

[61, 58, 65, 65, 68]
hello


In [9]:
data = torch.tensor(encode(text), dtype=torch.long)
data[:100]

tensor([ 1,  1, 28, 39, 42, 39, 44, 32, 49,  1, 25, 38, 28,  1, 44, 32, 29,  1,
        47, 33, 50, 25, 42, 28,  1, 33, 38,  1, 39, 50,  0,  0,  1,  1, 26, 49,
         0,  0,  1,  1, 36, 11,  1, 30, 42, 25, 38, 35,  1, 26, 25, 45, 37,  0,
         0,  1,  1, 25, 45, 44, 32, 39, 42,  1, 39, 30,  1, 44, 32, 29,  1, 47,
        33, 50, 25, 42, 28,  1, 39, 30,  1, 39, 50,  9,  1, 44, 32, 29,  1, 36,
        25, 38, 28,  1, 39, 30,  1, 39, 50,  9])

In [10]:
# train test split

n = int(0.8*len(data))

train_data = data[:n]
test_data = data[n:]

In [11]:
x = train_data[:block_size]
y = train_data[1:block_size+1]

In [12]:
x

tensor([ 1,  1, 28, 39, 42, 39, 44, 32])

In [13]:
y

tensor([ 1, 28, 39, 42, 39, 44, 32, 49])

In [14]:
for t in range(block_size):
    context = x[:t+1]
    target = y[t]
    print('when input is', context, 'target is', target)

when input is tensor([1]) target is tensor(1)
when input is tensor([1, 1]) target is tensor(28)
when input is tensor([ 1,  1, 28]) target is tensor(39)
when input is tensor([ 1,  1, 28, 39]) target is tensor(42)
when input is tensor([ 1,  1, 28, 39, 42]) target is tensor(39)
when input is tensor([ 1,  1, 28, 39, 42, 39]) target is tensor(44)
when input is tensor([ 1,  1, 28, 39, 42, 39, 44]) target is tensor(32)
when input is tensor([ 1,  1, 28, 39, 42, 39, 44, 32]) target is tensor(49)


In [15]:
# useful 
randint = torch.randint(-100, 100, (6,))
tensor = torch.tensor([[0.1, 0.3],[0.4, 0.6],[0.9, 0.2]])
zeros = torch.zeros(2,3)
ones = torch.ones(3,4)
arrange = torch.arange(5)
linespace = torch.linspace(3,10,steps=5)
logspace = torch.logspace(start=-10, end=10, steps=5) # creates a 1D tensor of steps values spaced evenly on a logarithmic scale, with the exponents ranging from 10^start to 10^end.
eye = torch.eye(5)

a = torch.empty((2,3), dtype=torch.int64)
empty_like = torch.empty_like(a)



In [16]:
randint

tensor([ 31,  94, -94,  93,  17, -30])

In [17]:
arrange

tensor([0, 1, 2, 3, 4])

In [18]:
linespace

tensor([ 3.0000,  4.7500,  6.5000,  8.2500, 10.0000])

In [19]:
logspace

tensor([1.0000e-10, 1.0000e-05, 1.0000e+00, 1.0000e+05, 1.0000e+10])

In [20]:
eye

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])

In [21]:
empty_like

tensor([[0, 0, 0],
        [0, 0, 0]])

In [22]:
start_time = time.time()
zeros = torch.zeros(1,1)
end_time = time.time()

elapsed_time = end_time-start_time
print(f"{elapsed_time: .8f}")

 0.00009322


In [23]:
torch_rand1 = torch.rand(100,100,100,100).to(device)
torch_rand2 = torch.rand(100,100,100,100).to(device)

np_rand1 = torch.rand(100,100,100,100)
np_rand2 = torch.rand(100,100,100,100)

In [24]:
%%time
start_time = time.time()
rand = (torch_rand1 @ torch_rand2)
end_time = time.time()
elapsed_time = end_time-start_time
print(f"{elapsed_time: .8f}")

 0.07036710
CPU times: user 18.4 ms, sys: 32.9 ms, total: 51.2 ms
Wall time: 70.5 ms


In [25]:
%%time
start_time = time.time()
rand = np.multiply(np_rand1,np_rand2)
end_time = time.time()
elapsed_time = end_time-start_time
print(f"{elapsed_time: .8f}")

 0.18038607
CPU times: user 46.7 ms, sys: 134 ms, total: 181 ms
Wall time: 181 ms


In [26]:
# define a probability tensor
probs = torch.tensor([0.1,0.9]) # getting zero 10% probability, getting 1 90% probability
# sampling from the above
samples = torch.multinomial(probs,num_samples=10,replacement=True)

In [27]:
samples

tensor([1, 1, 1, 1, 1, 1, 1, 1, 0, 1])

In [28]:
tensor = torch.tensor([1,2,3,4])
out = torch.cat((tensor, torch.tensor([5,7])),dim=0) # concat 2 tensors
out

tensor([1, 2, 3, 4, 5, 7])

In [29]:
out = torch.tril(torch.ones(5,5)) # lower triangle
out

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

In [30]:
out = torch.triu(torch.ones(5,5)) # upper triangle
out

tensor([[1., 1., 1., 1., 1.],
        [0., 1., 1., 1., 1.],
        [0., 0., 1., 1., 1.],
        [0., 0., 0., 1., 1.],
        [0., 0., 0., 0., 1.]])

In [31]:
out = torch.zeros(5,5).masked_fill(torch.tril(torch.ones(5,5))==0,float('-inf'))
out

tensor([[0., -inf, -inf, -inf, -inf],
        [0., 0., -inf, -inf, -inf],
        [0., 0., 0., -inf, -inf],
        [0., 0., 0., 0., -inf],
        [0., 0., 0., 0., 0.]])

In [32]:
torch.exp(out)

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

In [33]:
input = torch.zeros(2,3,4)
out = input.transpose(0,2)
out.shape

torch.Size([4, 3, 2])

In [34]:
tensor1 = torch.tensor([1,2,3])
tensor2 = torch.tensor([4,5,6])
tensor3 = torch.tensor([7,8,9])

stacked_tensor = torch.stack([tensor1,tensor2,tensor3])
stacked_tensor

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [35]:
import torch.nn as nn
sample  = torch.tensor([10., 10.,10])
linear = nn.Linear(3, 3, bias=False)
print(linear(sample))

tensor([ 8.0123, -4.5862, -1.6701], grad_fn=<SqueezeBackward4>)


In [36]:
sample

tensor([10., 10., 10.])

In [37]:
import torch.nn.functional as F

tensor1 = torch.tensor([1.0,2.0,3.0])
softmax_out = F.softmax(tensor1, dim=0)

print(softmax_out)

tensor([0.0900, 0.2447, 0.6652])


In [38]:
vocab_size = 1000
embedding_dim = 100
embedding = nn.Embedding(vocab_size, embedding_dim)

input_indices = torch.LongTensor([1,5,3,2])

embedded_output = embedding(input_indices)
embedded_output.shape

torch.Size([4, 100])

In [39]:
# matrix multiplication @for dot product

mat1 = torch.tensor([[5,5], [2,3], [5,9]])
mat1.shape

torch.Size([3, 2])

In [40]:
mat2 = torch.tensor([[3,4,7],[8,9,1]])
mat2.shape

torch.Size([2, 3])

In [41]:
print(mat1@mat2)

tensor([[ 55,  65,  40],
        [ 30,  35,  17],
        [ 87, 101,  44]])


In [42]:
torch.randint(10,(2,3)).float()

tensor([[6., 1., 5.],
        [0., 2., 4.]])