In [None]:
# sets up the necessary imports, selects the appropriate computation device, and defines key configuration parameters for a machine learning task using PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(device)
block_size = 8
batch_size = 4
max_iters = 100000
learning_rate = 3e-4

mps


In [None]:
# Reading the contents of a text file and processing it to determine the number of unique characters in the text
with open('arte_de_amar.txt', 'r', encoding='utf-8') as f:
    text = f.read()
chars = sorted(set(set(text)))
print(chars)
vocabulary_size = len(chars)

['\n', ' ', '!', '#', '(', ')', ',', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'Y', '[', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'x', 'y', 'z', '¡', '¿', 'Á', 'É', 'Í', 'Ú', 'á', 'é', 'í', 'ñ', 'ó', 'ú', 'ü', '\ufeff']


In [None]:
# Encode a text into numerical format using a character-level encoding scheme 
# and then converting it into a PyTorch tensor.
string_to_int = { ch: i for i, ch in enumerate(chars) }
int_to_string = { i: ch for i, ch in enumerate(chars) }

# Define two lambda functions to encode and decode the text
encode = lambda s: [string_to_int[ch] for ch in s]
decode = lambda l: ''.join([int_to_string[i] for i in l])

# Convert the text into a PyTorch tensor
data = torch.tensor(encode(text), dtype=torch.long)

print(data.size())
print(data[:100])

torch.Size([138559])
tensor([85, 25, 58,  1, 47, 64, 66, 51,  1, 50, 51,  1, 47, 59, 47, 64,  0,  0,
        21, 67, 66, 54, 61, 64, 18,  1, 35, 68, 55, 50,  0,  0, 38, 51, 58, 51,
        47, 65, 51,  1, 50, 47, 66, 51, 18,  1, 33, 47, 70,  1,  9,  6,  1, 10,
         8, 10, 10,  1, 44, 51, 22, 61, 61, 57,  1,  3, 14, 15, 17, 14,  9, 45,
         0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, 33,
        61, 65, 66,  1, 64, 51, 49, 51, 60, 66])


In [None]:
# Preparing data for training and validation in a machine learning task using PyTorch
n = int(0.8*len(data))
train_data = data[:n]
val_data = data[n:]

# Generates a batch of data for either training or validation, depending on the value of the split parameter.
def get_batch(split):
    data = train_data if split == 'train' else val_data
    lx = torch.randint(0, len(data)-block_size, (batch_size,))
    print(lx)
    x = torch.stack([data[l:l+block_size] for l in lx])
    y = torch.stack([data[l+1:l+block_size+1] for l in lx])
    x, y = x.to(device), y.to(device)
    return x, y

x, y = get_batch('train')
print('input:', x)
print('target:', y)

tensor([ 7461, 93116, 96869, 38829])
input: tensor([[ 1, 58, 61, 65,  1, 63, 67, 51],
        [56, 67, 60, 66, 61, 65,  1, 47],
        [67, 51, 53, 61,  1, 50, 55, 68],
        [78, 60, 50, 61, 65, 51,  1, 61]], device='mps:0')
target: tensor([[58, 61, 65,  1, 63, 67, 51,  1],
        [67, 60, 66, 61, 65,  1, 47, 58],
        [51, 53, 61,  1, 50, 55, 68, 55],
        [60, 50, 61, 65, 51,  1, 61, 66]], device='mps:0')


In [None]:
# Examining sequences from the training data to understand the relationship between input contexts and their corresponding targets.
x = train_data[:block_size]
y = train_data[1:block_size+1]

for t in range(block_size):
    context = x[:t+1]
    target = y[t]
    print('when input is', context, 'target is', target)

when input is tensor([85]) target is tensor(25)
when input is tensor([85, 25]) target is tensor(58)
when input is tensor([85, 25, 58]) target is tensor(1)
when input is tensor([85, 25, 58,  1]) target is tensor(47)
when input is tensor([85, 25, 58,  1, 47]) target is tensor(64)
when input is tensor([85, 25, 58,  1, 47, 64]) target is tensor(66)
when input is tensor([85, 25, 58,  1, 47, 64, 66]) target is tensor(51)
when input is tensor([85, 25, 58,  1, 47, 64, 66, 51]) target is tensor(1)


In [None]:
# Perform the forward pass to compute logits and loss, generate new tokens based on the model
class BigramLenguageModel(nn.Module):
    def __init__(self, vocab_size):
        super().__init__()
        self.token_embeding_table = nn.Embedding(vocab_size, vocab_size)
        
    def forward(self, index, targets=None):
        logits = self.token_embeding_table(index)

        if targets is None:
            loss = None 
        else:
            B, T, V = logits.shape
            logits = logits.view(B*T, V)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits, targets)
            
        return logits, loss
    
    def generate(self, index, max_new_tokens):
        for _ in range(max_new_tokens):
            logits, loss = self.forward(index)
            logits = logits[:, -1, :]
            probs = F.softmax(logits, dim=-1)
            index_next = torch.multinomial(probs, num_samples=1)
            index = torch.cat([index, index_next], dim=-1)

        return index
    
model = BigramLenguageModel(vocabulary_size)
m = model.to(device)

context = torch.zeros((1,1), dtype=torch.long, device=device)
generated_chars = decode(m.generate(context, max_new_tokens=500)[0].tolist())
print(generated_chars)
        


Bq)d_C¿Fn4il¡RSEÍlI!ÍhUÚ.5o2uñP43O#2Uo2vJoeÚobxLEBá:úÉjtn.NuT9p(o4PH(Qbáyy3!nÁa1YJc(éAr7sD,7cs(áYs:lq1ñoA2núxS?xSa?;Á.G¡ 6íMaE¡_GqpnÍhUñkHtIU,FfEs
ú!rYvseMI;K:qRPg#lbéuTGfoñÁoFRbA1 LÉps5MhK9ñkU?bQ_lxGRhU7K﻿Náíg#eÚ lIü0ülDú,!nSjóoKLsoáOíD0réqiuR#(A#sbJVP bQbéóñÁé.5fzAfgNOJx¡mu7LMID
?0]lÍ8óiLCMr;;grfásqNzÍt_s¿8PvQy3N?svQhjfíER3hGp(gVífz﻿x(iQñkadzá109yÉjr;Y1?CzÁ23jc..Ac;Bñ¡dÚÚFaskC3q;ugAohÁdÉS6J8]?ilPTOt7B¿fEüdÍ¡aTrPxVÚLgzFaóléKí9FÍ]có4miBUsa_d.L]]Éd3(iEÚ¿F2TD4g¿06Q6INé3xgO9Vú?xMVJHIyB,uRQg1DÍ?vps7


In [9]:
optimizer = torch.optim.AdamW(m.parameters(), lr=learning_rate)

for epoch in range(1, max_iters):
    xb, yb = get_batch('train')

    logits, loss = model.forward(xb, yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()

print('loss:', loss.item())

tensor([ 36339, 104606,  79737,  61816])
tensor([ 91911,  38293, 106599,   8645])
tensor([37931, 73346, 45713, 88675])
tensor([ 54365,   5428,  53172, 110087])
tensor([48812, 54751, 65500, 70783])
tensor([82060, 62664, 68531, 65685])
tensor([84739, 32702, 93479, 23533])
tensor([ 36324, 108472,  99616,  85669])
tensor([85810, 50409, 31699, 33537])
tensor([37829, 60135, 73811, 84232])
tensor([11553, 30602,  8241, 40726])
tensor([81142, 32708, 71625, 74370])
tensor([43127,  6678, 23209, 60645])
tensor([20009,  9057, 75489, 83796])
tensor([89679, 88976, 85300, 88077])
tensor([71449, 86687, 66366, 21041])
tensor([75044, 46797, 58321, 47152])
tensor([78301, 60709, 98144, 31541])
tensor([ 4009, 13138, 61109, 91292])
tensor([81966, 45088,  2488, 88674])
tensor([ 33725,  41005, 106917,  22965])
tensor([60187, 42854, 44495, 13342])
tensor([  5018, 104732,  77023,  63274])
tensor([ 5144, 36038, 23472,  8753])
tensor([18500, 71529,  9887, 83101])
tensor([76198, 74965, 23063, 39880])
tensor([ 89560