In [1]:
import torch
import torch.nn as nn
import math
from torch.utils.data import TensorDataset, DataLoader, Dataset
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
from collections import Counter
import re
import numpy as np

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float32).unsqueeze(1)
        div_term = torch.exp(
            torch.arange(0, d_model, 2, dtype=torch.float32) * (-math.log(10000.0) / d_model)
        )
        pe[:, 0::2] = torch.sin(position * div_term)  # dim 2i
        pe[:, 1::2] = torch.cos(position * div_term)  # dim 2i+1
        pe = pe.unsqueeze(0)  # Shape (1, max_len, d_model)
        self.register_buffer("pe", pe)

    def forward(self, x):
        x = x + self.pe[:, : x.size(1), :].to(x.device)
        return x


# Exemplo
batch_size = 8
max_len = 50
d_model = 32
pos_encoding = PositionalEncoding(d_model=d_model, max_len=max_len)

# (sequence_length, batch_size, d_model)
input_tensor = torch.randn(max_len, batch_size, d_model)
output_tensor = pos_encoding(input_tensor)

print(f'Input shape: {input_tensor.shape}')  # Input shape: (sequence_length, batch_size, d_model
print(f'Output shape: {output_tensor.shape}')  # Output shape: (sequence_length, batch_size, d_model)

Input shape: torch.Size([50, 8, 32])
Output shape: torch.Size([50, 8, 32])


In [4]:
class MultiHeadAttention(nn.Module):
    def __init__(self, d_model, num_heads):
        super().__init__()
        # Verifica se o número de dimensões do modelo é divisível pelo número de cabeças
        assert d_model % num_heads == 0, "d_model deve ser divisível por num_heads"

        # Número de dimensões por cabeça
        self.d_k = d_model // num_heads
        self.num_heads = num_heads

        # Inicializa as camadas lineares para Q, K e V
        self.W_q = nn.Linear(d_model, d_model)
        self.W_k = nn.Linear(d_model, d_model)
        self.W_v = nn.Linear(d_model, d_model)
        self.W_o = nn.Linear(d_model, d_model)

    def scaled_dot_product_attention(self, Q, K, V, mask=None):
        # Calcula os scores fazendo o produto escalar entre Q e K e dividindo pela raiz quadrada de d_k
        scores = torch.matmul(Q, K.transpose(-2, -1)) / math.sqrt(self.d_k)

        # Se a máscara for fornecida, aplica a máscara para os scores
        if mask is not None:
            scores = scores.masked_fill(mask == 0, float('-inf'))

        # Calcula a softmax nos scores
        attention = torch.softmax(scores, dim=-1)

        # Multiplica a matriz de atenção pelo valor V
        output = torch.matmul(attention, V)
        return output

    def split_heads(self, x):
        # Divide a última dimensão em (num_heads, d_k)
        N, seq_len, d_model = x.size()
        x = x.view(N, seq_len, self.num_heads, self.d_k)
        x = x.transpose(1, 2)  # (N, num_heads, seq_len, d_k)
        return x

    def combine_heads(self, x):
        # Inverte a operação de split_heads
        x = x.transpose(1, 2)  # (N, seq_len, num_heads, d_k)
        N, seq_len, num_heads, d_k = x.size()
        x = x.contiguous().view(N, seq_len, num_heads * d_k)
        return x

    def forward(self, query, key, value, mask=None):
        # Passa os valores de Q, K e V pela camada linear
        Q = self.split_heads(self.W_q(query))
        K = self.split_heads(self.W_k(key))
        V = self.split_heads(self.W_v(value))

        # Calcula a atenção
        attention = self.scaled_dot_product_attention(Q, K, V, mask)

        # Combina as cabeças e aplica a camada linear final
        output = self.combine_heads(attention)
        output = self.W_o(output)
        return output


# Exemplo
batch_size = 8
max_len = 50
d_model = 32
num_heads = 4
multi_head_attn = MultiHeadAttention(d_model, num_heads)

# (batch_size, sequence_length, d_model)
query = torch.randn(batch_size, max_len, d_model)
key = torch.randn(batch_size, max_len, d_model)
value = torch.randn(batch_size, max_len, d_model)

output = multi_head_attn(query, key, value)

print(f'Input shape: {query.shape}')  # Input shape: (batch_size, sequence_length, d_model
print(f'Output shape: {output.shape}')  # Output shape: (batch_size, sequence_length, d_model)

Input shape: torch.Size([8, 50, 32])
Output shape: torch.Size([8, 50, 32])


In [5]:
class FeedForward(nn.Module):
    def __init__(self, d_model, d_ff):
        super().__init__()
        self.fc1 = nn.Linear(d_model, d_ff)
        self.fc2 = nn.Linear(d_ff, d_model)
        self.relu = nn.ReLU()

    def forward(self, x):
        return self.fc2(self.relu(self.fc1(x)))


# Exemplo
d_model = 512
d_ff = 2048

feed_forward = FeedForward(d_model, d_ff)

print(f'Input shape: {query.shape}')  # Input shape: (batch_size, sequence_length, d_model)
print(f'Output shape: {output.shape}')  # Output shape: (batch_size, sequence_length, d_model)

Input shape: torch.Size([8, 50, 32])
Output shape: torch.Size([8, 50, 32])


In [6]:
class EncoderLayer(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, dropout=0.1):
        super().__init__()
        self.attention = MultiHeadAttention(d_model, num_heads)
        self.norm1 = nn.LayerNorm(d_model)
        self.ffn = FeedForward(d_model, d_ff)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, mask=None):
        attn_output = self.attention(x, x, x, mask)
        x = self.norm1(x + self.dropout(attn_output))
        ffn_output = self.ffn(x)
        x = self.norm2(x + self.dropout(ffn_output))
        return x


# Exemplo
batch_size = 8
max_len = 50
d_model = 32
num_heads = 4
d_ff = 128

encoder_layer = EncoderLayer(d_model, num_heads, d_ff)

x = torch.randn(batch_size, max_len, d_model)
output = encoder_layer(x)

print(f'Input shape: {x.shape}')  # Input shape: (batch_size, sequence_length, d_model)
print(f'Output shape: {output.shape}')  # Output shape: (batch_size, sequence_length, d_model)

Input shape: torch.Size([8, 50, 32])
Output shape: torch.Size([8, 50, 32])


In [7]:
class Encoder(nn.Module):
    def __init__(
        self, src_vocab_size, d_model, num_heads, num_layers, d_ff, max_len, dropout=0.1
    ):
        super().__init__()
        self.embedding = nn.Embedding(src_vocab_size, d_model)
        self.positional_encoding = PositionalEncoding(d_model, max_len)
        self.layers = nn.ModuleList(
            [EncoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)]
        )
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, mask=None):
        x = self.embedding(x)
        x = self.positional_encoding(x)
        x = self.dropout(x)
        for layer in self.layers:
            x = layer(x, mask)
        return x

# Exemplo
batch_size = 8
max_len = 50
src_vocab_size = 1000
d_model = 32
num_heads = 4
num_layers = 2
d_ff = 128

encoder = Encoder(src_vocab_size, d_model, num_heads, num_layers, d_ff, max_len)

src_sequence = torch.randint(0, src_vocab_size, (batch_size, max_len))
output = encoder(src_sequence)

print(f'Input shape: {src_sequence.shape}')  # Input shape: (batch_size, sequence_length)
print(f'Output shape: {output.shape}')  # Output shape: (batch_size, sequence_length, d_model)

Input shape: torch.Size([8, 50])
Output shape: torch.Size([8, 50, 32])


In [8]:
class DecoderLayer(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, dropout=0.1):
        super().__init__()
        self.self_attention = MultiHeadAttention(d_model, num_heads)
        self.norm1 = nn.LayerNorm(d_model)
        self.cross_attention = MultiHeadAttention(d_model, num_heads)
        self.norm2 = nn.LayerNorm(d_model)
        self.ffn = FeedForward(d_model, d_ff)
        self.norm3 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, enc_out=None, src_mask=None, trg_mask=None):
        # Self-attention na sequência de destino
        self_attn_output = self.self_attention(x, x, x, trg_mask)
        x = self.norm1(x + self.dropout(self_attn_output))

        # Cross-attention entre a saída do self-attention e a saída do encoder
        if enc_out is not None:
            cross_attn_output = self.cross_attention(x, enc_out, enc_out, src_mask)
            x = self.norm2(x + self.dropout(cross_attn_output))

        # Feed-forward
        ffn_output = self.ffn(x)
        x = self.norm3(x + self.dropout(ffn_output))

        return x


# Exemplo
batch_size = 8
max_len = 50
d_model = 32
num_heads = 4
d_ff = 128

decoder_layer = DecoderLayer(d_model, num_heads, d_ff)

x = torch.randn(batch_size, max_len, d_model)
enc_out = torch.randn(batch_size, max_len, d_model)
output = decoder_layer(x, enc_out)

print(f'Input shape: {x.shape}')  # Input shape: (batch_size, sequence_length, d_model)
print(f'Output shape: {output.shape}')  # Output shape: (batch_size, sequence_length, d_model)

Input shape: torch.Size([8, 50, 32])
Output shape: torch.Size([8, 50, 32])


In [9]:
class Decoder(nn.Module):
    def __init__(self, vocab_size, d_model, num_heads, num_layers, d_ff, max_len, dropout=0.1):
        super(Decoder, self).__init__()
        self.embedding = nn.Embedding(vocab_size, d_model)
        self.positional_encoding = PositionalEncoding(d_model, max_len)
        self.layers = nn.ModuleList(
            [DecoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)]
        )
        self.fc_out = nn.Linear(d_model, vocab_size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, enc_out=None, src_mask=None, trg_mask=None):
        # Embedding + positional encoding + dropout
        x = self.embedding(x)
        x = self.positional_encoding(x)
        x = self.dropout(x)

        # Passa a entrada por cada camada do decoder
        for layer in self.layers:
            x = layer(x, enc_out, src_mask, trg_mask)

        # Camada final
        out = self.fc_out(x)
        return out


# Exemplo
batch_size = 8
max_len = 50
vocab_size = 1000
d_model = 32
num_heads = 4
num_layers = 2
d_ff = 128

decoder = Decoder(vocab_size, d_model, num_heads, num_layers, d_ff, max_len)

enc_out = torch.randn(batch_size, max_len, d_model)
tgt_sequence = torch.randint(0, vocab_size, (batch_size, max_len))
output = decoder(tgt_sequence, enc_out)

print(f'Input shape: {tgt_sequence.shape}')  # Input shape: (batch_size, sequence_length)
print(f'Output shape: {output.shape}')  # Output shape: (batch_size, sequence_length, vocab_size)

Input shape: torch.Size([8, 50])
Output shape: torch.Size([8, 50, 1000])


In [10]:
class Transformer(nn.Module):
    def __init__(self, src_vocab_size, trg_vocab_size, d_model, num_heads, num_encoder_layers, num_decoder_layers, d_ff, max_len, dropout=0.1):
        super().__init__()
        self.encoder = Encoder(src_vocab_size, d_model, num_heads, num_encoder_layers, d_ff, max_len, dropout)
        self.decoder = Decoder(trg_vocab_size, d_model, num_heads, num_decoder_layers, d_ff, max_len, dropout)

    def generate_mask(self, src, trg):
        src_mask = (src != 0).unsqueeze(1).unsqueeze(2)
        trg_mask = (trg != 0).unsqueeze(1).unsqueeze(3)
        seq_length = trg.size(1)
        nopeak_mask = (1 - torch.triu(torch.ones(1, seq_length, seq_length), diagonal=1)).bool()
        trg_mask = trg_mask & nopeak_mask
        return src_mask, trg_mask

    def forward(self, src, trg, src_mask=None, trg_mask=None):
        src_mask, trg_mask = self.generate_mask(src, trg)
        enc_out = self.encoder(src, src_mask)
        out = self.decoder(trg, enc_out, src_mask, trg_mask)
        return out


# Exemplo
batch_size = 8
max_len = 50
src_vocab_size = 1000
trg_vocab_size = 1000
d_model = 32
num_heads = 4
num_encoder_layers = 2
num_decoder_layers = 2
d_ff = 128

transformer = Transformer(src_vocab_size, trg_vocab_size, d_model, num_heads, num_encoder_layers, num_decoder_layers, d_ff, max_len)
src = torch.randint(1, src_vocab_size, (batch_size, max_len))
trg = torch.randint(1, trg_vocab_size, (batch_size, max_len))

out = transformer(src, trg)

print(f'Input shape: {src.shape}')  # Input shape: (batch_size, sequence_length)
print(f'Output shape: {out.shape}')  # Output shape: (batch_size, sequence_length, trg_vocab_size)

Input shape: torch.Size([8, 50])
Output shape: torch.Size([8, 50, 1000])


In [11]:
def create_padding_mask(seq, pad_token):
    """
    Cria uma máscara de padding para sequências.

    Args:
        seq: Tensor de sequência de entrada de forma (batch_size, seq_len)
        pad_token: O token que representa o padding no vocabulário

    Retorna:
        mask: Máscara de padding de forma (batch_size, 1, 1, seq_len)
    """
    mask = (seq != pad_token).unsqueeze(1).unsqueeze(2)  # Forma: (batch_size, 1, 1, seq_len)
    return mask

def create_causal_mask(seq_len):
    """
    Cria uma máscara causal para prevenir atenção a tokens futuros.

    Args:
        seq_len: O comprimento da sequência

    Retorna:
        mask: Máscara causal de forma (1, 1, seq_len, seq_len)
    """
    mask = torch.tril(torch.ones((seq_len, seq_len), dtype=torch.bool))  # Forma: (seq_len, seq_len)
    mask = mask.unsqueeze(0).unsqueeze(0)  # Forma: (1, 1, seq_len, seq_len)
    return mask

In [12]:
# Exemplo
batch_size = 8
max_len = 50
pad_token = 0

padding_mask = create_padding_mask(tgt_sequence, pad_token)
print(f'Padding mask shape: {padding_mask.shape}')  # Padding mask shape: torch.Size([8, 1, 1, 50])

causal_mask = create_causal_mask(max_len)
print(f'Causal mask shape: {causal_mask.shape}')  # Causal mask shape: torch.Size([1, 1, 50, 50])

Padding mask shape: torch.Size([8, 1, 1, 50])
Causal mask shape: torch.Size([1, 1, 50, 50])


## Classificador com Encoder

In [13]:
categories = ['sci.electronics', 'comp.graphics', 'sci.med', 'rec.motorcycles']

newsgroups_data = fetch_20newsgroups(subset='all', categories=categories)
texts = newsgroups_data.data
labels = newsgroups_data.target

texts, labels = texts[:5000], labels[:5000]

train_texts, val_texts, train_labels, val_labels = train_test_split(
    texts, labels, test_size=0.2, random_state=42
)

In [14]:
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '', text)
    tokens = text.split()
    return tokens

def build_vocab(texts, min_freq=1):
    word_freq = {}
    for text in texts:
        tokens = preprocess_text(text)
        for token in tokens:
            word_freq[token] = word_freq.get(token, 0) + 1
    vocab = {'<pad>': 0, '<unk>': 1}
    index = 2
    for word, freq in word_freq.items():
        if freq >= min_freq:
            vocab[word] = index
            index += 1
    return vocab

vocab = build_vocab(train_texts)
vocab_size = len(vocab)

In [15]:
class NewsGroupsDataset(Dataset):
    def __init__(self, texts, labels, vocab, max_len):
        self.texts = texts
        self.labels = labels
        self.vocab = vocab
        self.max_len = max_len

    def __len__(self):
        return len(self.labels)

    def encode_text(self, text):
        tokens = preprocess_text(text)
        token_ids = [self.vocab.get(token, self.vocab['<unk>']) for token in tokens]
        if len(token_ids) > self.max_len:
            token_ids = token_ids[:self.max_len]
        else:
            token_ids += [self.vocab['<pad>']] * (self.max_len - len(token_ids))
        return torch.tensor(token_ids, dtype=torch.long)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        token_ids = self.encode_text(text)
        return token_ids, label

In [16]:
max_len = 100
batch_size = 32

train_dataset = NewsGroupsDataset(train_texts, train_labels, vocab, max_len)
val_dataset = NewsGroupsDataset(val_texts, val_labels, vocab, max_len)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)

In [17]:
class TransformerClassifier(nn.Module):
    def __init__(
        self,
        src_vocab_size,
        d_model,
        num_heads,
        num_layers,
        d_ff,
        max_len,
        num_classes,
        dropout=0.1,
    ):
        super().__init__()
        self.encoder = Encoder(
            src_vocab_size, d_model, num_heads, num_layers, d_ff, max_len, dropout
        )
        self.classifier = nn.Linear(d_model, num_classes)

    def forward(self, x, mask=None):
        x = self.encoder(x, mask)
        x = x.mean(dim=1)  # Global average pooling
        logits = self.classifier(x)
        return logits

In [18]:
d_model = 128
num_heads = 8
d_ff = 512
num_classes = len(categories)
num_layers = 2
dropout = 0.1

model = TransformerClassifier(
    vocab_size,
    d_model,
    num_heads,
    num_layers,
    d_ff,
    max_len,
    num_classes,
    dropout,
).to(device)

In [19]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [20]:
epochs = 50

for epoch in range(epochs):
    model.train()
    total_loss = 0
    for data, labels in train_dataloader:
        data, labels = data.to(device), labels.to(device)
        optimizer.zero_grad()

        padding_mask = create_padding_mask(data, vocab['<pad>']).to(device)

        outputs = model(data, mask=padding_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    avg_train_loss = total_loss / len(train_dataloader)

    model.eval()
    val_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for data, labels in val_dataloader:
            data, labels = data.to(device), labels.to(device)

            padding_mask = create_padding_mask(data, vocab['<pad>']).to(device)

            outputs = model(data, mask=padding_mask)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    avg_val_loss = val_loss / len(val_dataloader)
    val_accuracy = correct / total

    if (epoch + 1) % 5 == 0:
        print(f"Epoch {epoch+1}/{epochs}")
        print(f"Train Loss: {avg_train_loss:.4f}")
        print(f"Validation Loss: {avg_val_loss:.4f}, Accuracy: {val_accuracy*100:.2f}%\n")

Epoch 5/50
Train Loss: 0.9306
Validation Loss: 0.8338, Accuracy: 64.89%

Epoch 10/50
Train Loss: 0.3465
Validation Loss: 0.4784, Accuracy: 82.26%

Epoch 15/50
Train Loss: 0.1692
Validation Loss: 0.4094, Accuracy: 86.31%

Epoch 20/50
Train Loss: 0.0801
Validation Loss: 0.3582, Accuracy: 88.97%

Epoch 25/50
Train Loss: 0.0533
Validation Loss: 0.3868, Accuracy: 89.48%

Epoch 30/50
Train Loss: 0.0286
Validation Loss: 0.3645, Accuracy: 90.37%

Epoch 35/50
Train Loss: 0.0234
Validation Loss: 0.3693, Accuracy: 91.00%

Epoch 40/50
Train Loss: 0.0173
Validation Loss: 0.4048, Accuracy: 91.25%

Epoch 45/50
Train Loss: 0.0166
Validation Loss: 0.4080, Accuracy: 91.25%

Epoch 50/50
Train Loss: 0.0177
Validation Loss: 0.3818, Accuracy: 91.51%



## Modelo de Linguagem com Decoder

In [21]:
text = """
####-Chapter I <br>

An Unexpected Party

In a hole in the ground there lived a hobbit. Not a nasty, dirty, wet hole, filled with
the ends of worms and an oozy smell, nor yet a dry, bare, sandy hole with nothing
in it to sit down on or to eat: it was a hobbit-hole, and that means comfort.
It  had  a  perfectly  round  door  like  a  porthole,  painted  green,  with  a  shiny
yellow  brass  knob  in  the  exact middle. The  door  opened  on  to  a  tube-shaped hall
like  a  tunnel:  a  very  comfortable  tunnel without  smoke, with  panelled walls,  and
floors  tiled  and  carpeted,  provided with  polished  chairs,  and  lots  and  lots of pegs
for  hats  and  coats - the hobbit was  fond of visitors. The  tunnel wound on  and on,
going  fairly  but  not  quite  straight  into  the  side  of  the  hill  -  The  Hill,  as  all  the
people for many miles round called  it - and many little round doors opened out of
it,  first  on  one  side  and  then  on  another.  No  going  upstairs  for  the  hobbit:
bedrooms,  bathrooms,  cellars,  pantries  (lots  of  these),  wardrobes  (he  had  whole
rooms devoted to clothes), kitchens, dining-rooms, all were on the same floor, and
indeed  on  the  same  passage. The  best  rooms were  all  on  the  left-hand side (going
in),  for  these  were  the  only  ones  to  have  windows,  deep-set  round  windows
looking over his garden and meadows beyond, sloping down to the river.
This  hobbit  was  a  very  well-to-do  hobbit,  and  his  name  was  Baggins.  The
Bagginses  had  lived  in  the  neighbourhood  of  The  Hill  for  time  out  of mind,  and
people considered them very respectable, not only because most of them were rich,
but  also  because  they  never had  any  adventures or did  anything unexpected: you
could tell what a Baggins would say on any question without the bother of asking
him. This  is a story of how a Baggins had an adventure,  found himself doing and
saying things altogether unexpected. He may have lost the neighbours' respect, but
he gained-well, you will see whether he gained anything in the end.
The  mother  of  our  particular  hobbit  …  what  is  a  hobbit?  I  suppose  hobbits
need  some description nowadays,  since  they have become  rare  and  shy of the Big
People,  as  they  call  us.  They  are  (or  were)  a  little  people,  about  half  our  height,
and  smaller  than  the  bearded Dwarves. Hobbits  have  no  beards. There  is  little  or
no  magic  about  them,  except  the  ordinary  everyday  sort  which  helps  them  to
disappear  quietly  and  quickly  when  large  stupid  folk  like  you  and  me  come
blundering  along,  making  a  noise  like  elephants  which  they  can  hear  a  mile  off.
They  are  inclined  to  be    at  in  the  stomach;  they  dress  in  bright  colours  (chiefly

####-green  and  yellow);  wear  no  shoes,  because  their  feet  grow  natural  leathery  soles
and thick warm brown hair like the stuff on their heads (which is curly); have long
clever brown fingers, good-natured faces, and laugh deep fruity laughs (especially
after  dinner,  which  they  have  twice  a  day when  they  can  get  it). Now  you  know
enough  to  go  on  with.  As  I  was  saying,  the  mother  of  this  hobbit  -  of  Bilbo
Baggins, that is  - was  the fabulous Belladonna Took, one of  the  three remarkable
daughters  of  the  Old  Took,  head  of  the  hobbits  who  lived  across  The Water,  the
small river that ran at the foot of The Hill. It was often said (in other families) that
long  ago  one  of  the  Took  ancestors  must  have  taken  a  fairy  wife.  That  was,  of
course,  absurd,  but  certainly  there  was  still  something  not  entirely  hobbit-like
about  them,  -  and  once  in  a while members  of  the  Took-clan would go and have
adventures. They discreetly disappeared, and  the  family hushed  it up; but  the  fact
remained  that  the  Tooks  were  not  as  respectable  as  the  Bagginses,  though  they
were undoubtedly richer. Not that Belladonna Took ever had any adventures after
she  became  Mrs.  Bungo  Baggins.  Bungo,  that  was  Bilbo's  father,  built  the  most
luxurious  hobbit-hole  for  her  (and  partly  with  her  money)  that  was  to  be  found
either  under  The  Hill  or  over  The  Hill  or  across  The  Water,  and  there  they
remained  to  the  end  of  their  days.  Still  it  is  probable  that  Bilbo,  her  only  son,
although  he  looked  and  behaved  exactly  like  a  second  edition  of  his  solid  and
comfortable  father,  got  something  a  bit  queer  in  his makeup  from  the Took  side,
something  that  only  waited  for  a  chance  to  come  out.  The  chance  never  arrived,
until Bilbo Baggins was grown up, being about fifty years old or so, and living in
the  beautiful  hobbit-hole  built  by  his  father,  which  I  have  just described  for you,
until he had in fact apparently settled down immovably.
By some curious chance one morning long ago in the quiet of the world, when
there  was  less  noise  and  more  green,  and  the  hobbits  were  still  numerous  and
prosperous,  and  Bilbo  Baggins  was  standing  at  his  door  after  breakfast  smoking
an  enormous  long  wooden  pipe  that  reached  nearly  down  to  his  woolly  toes
(neatly brushed)  - Gandalf  came  by. Gandalf!  If  you  had  heard  only  a  quarter  of
what  I  have  heard  about  him,  and  I  have  only  heard  very  little  of  all  there  is  to
hear,  you  would  be  prepared  for  any  sort  I  of  remarkable  tale.  Tales  and
adventures  sprouted  up  all  over  the  place  wherever  he  went,  in  the  most
extraordinary  fashion.  He  had  not  been  down  that  way  under  The  Hill  for  ages
and  ages,  not  since  his  friend  the  Old  Took  died,  in  fact,  and  the  hobbits  had
almost forgotten what he looked like. He had been away over The Hill and across
The  Water  on  business  of  his  own  since  they  were  all  small  hobbit-boys  and
hobbit-girls.

####-All that the unsuspecting Bilbo saw that morning was an old man with a staff.
He had a tall pointed blue hat, a long grey cloak, a silver scarf over which a white
beard hung down below his waist, and immense black boots.
"Good  morning!"  said  Bilbo,  and  he  meant  it.  The  sun  was  shining,  and  the
grass was very green. But Gandalf looked at him from under long bushy eyebrows
that  stuck  out  further  than  the  brim  of  his  shady  hat.  "What  do  you  mean?"  be
said. "Do you wish me a good morning, or mean that it is a good morning whether
I  want  not;  or  that  you  feel  good  this  morning;  or  that  it  is  morning  to  be  good
on?"
"All  of  them  at  once,"  said  Bilbo.  "And  a  very  fine  morning  for  a  pipe  of
tobacco out of doors, into the bargain. If you have a pipe about you, sit down  and
have  a  fill of mine! There's no hurry, we have  all  the day before us!" Then Bilbo
sat down on a seat by his door, crossed his legs, and blew out a beautiful grey ring
of  smoke  that  sailed  up  into  the  air without  breaking  and  floated  away  over  The
Hill.
"Very  pretty!"  said  Gandalf.  "But  I  have  no  time  to  blow  smoke-rings  this
morning.  I  am  looking  for  someone  to  share  in  an  adventure  that  I  am  arranging,
and it's very difficult to find anyone."
«I should think so  - in these parts! We are plain quiet folk and have no use for
adventures.  Nasty  .disturbing  uncomfortable  things! Make  you  late  for  dinner!  I
can’t  think  what  anybody  sees  in  them,»  said  our  Mr.  Baggins,  and  stuck  one
thumb  behind  his  braces,  and  blew  out  another  even  bigger  smoke-ring. Then he
took out his morning  letters, and begin  to  read, pretending  to  take no more notice
of  the old man. He had decided  that he was not quite his  sort,  and wanted him  to
go away. But  the old man did not move. He stood  leaning on his stick and gazing
at the hobbit without saying anything, till Bilbo got quite uncomfortable and even
a little cross.
"Good  morning!"  he  said  at  last.  "We  don't  want  any  adventures  here,  thank
you! You might try over The Hill or across The Water." By this he meant that the
conversation was at an end.
"What a lot of things you do use Good morning for!" said Gandalf. "Now you
mean that you want to get rid of me, and that it won't be good till I move off."
"Not  at  all,  not  at  all,  my  dear  sir!  Let  me  see,  I  don't  think  I  know  your
name?"
"Yes,  yes,  my  dear  sir  -  and  I  do  know  your  name, Mr.  Bilbo  Baggins.  And
you  do  know  my  name,  though  you  don't  remember  that  I  belong  to  it.  I  am

####-Gandalf,  and  Gandalf  means  me!  To  think  that  I  should  have  lived  to  be  good-
morninged by Belladonna Took's son, as if I was selling buttons at the door!"
"Gandalf,  Gandalf!  Good  gracious  me!  Not  the  wandering  wizard  that  gave
Old Took a pair of magic diamond studs that fastened themselves and never came
undone  till  ordered?  Not  the  fellow  who  used  to  tell  such  wonderful  tales  at
parties, about dragons and goblins and giants and  the rescue of princesses and  the
unexpected  luck  of  widows'  sons?  Not  the  man  that  used  to  make  such
particularly  excellent  fireworks!  I  remember  those!  Old  Took  used  to  have  them
on  Midsummer's  Eve.  Splendid!  They  used  to  go  up  like  great  lilies  and
snapdragons and laburnums of fire and hang in the twilight all evening!" You will
notice already that Mr. Baggins was not quite so prosy as he liked to believe, also
that  he was  very  fond  of  flowers. "Dear me!" she went on. "Not  the Gandalf who
was  responsible  for so many quiet  lads and  lasses going off  into  the Blue  for mad
adventures.  Anything  from  climbing  trees  to  visiting  Elves  -  or  sailing  in  ships,
sailing  to other  shores! Bless me,  life used  to be quite  inter  - I mean, you used  to
upset things badly in these parts once upon a time. I beg your pardon, but I had no
idea you were still in business."
"Where else  should  I be?"  said  the wizard.  "All  the  same  I am pleased  to  find
you  remember  something about me. You seem  to  remember my  fireworks kindly,
at any  rate,  land  that  is not without hope.  Indeed  for your old grand-father Took's
sake, and for the sake of poor Belladonna, I will give you what you asked for."
"I beg your pardon, I haven't asked for anything!"
"Yes, you have! Twice now. My pardon. I give it you. In fact I will go so far as
to  send  you  on  this  adventure.  Very  amusing  for  me,  very  good  for  you  and
profitable too, very likely, if you ever get over it."
"Sorry!  I  don't  want  any  adventures,  thank  you.  Not  today.  Good  morning!
But please come to tea - any time you like! Why not tomorrow? Come tomorrow!
Good-bye!"
With  that  the hobbit  turned and  scuttled  inside his  round green door, and  shut
it as quickly as he dared, not to seen rude. Wizards after all are wizards.
"What on earth did I ask him to tea for!" he said to him-self, as he went to the
pantry. He had only  just had break  fast, but he  thought a cake or  two and a drink
of  something  would  do  him  good  after  his  fright.  Gandalf  in  the  meantime  was
still  standing  outside  the  door,  and  laughing  long  but  quietly.  After  a  while  he
stepped  up,  and  with  the  spike  of  his  staff  scratched  a  queer  sign  on  the  hobbit's
beautiful  green  front-door.  Then  he  strode  away,  just  about  the  time  when  Bilbo

####-was  finishing  his  second  cake  and  beginning  to  think  that  he  had  escape
adventures very well.
The  next  day  he  had  almost  forgotten  about  Gandalf  He  did  not  remember
things  very  well,  unless  he  put  them  down  on  his  Engagement  Tablet:  like  this:
Gandalf ’¥a Wednesday. Yesterday he had been too flustered to do anything of the
kind. Just before tea-time there came a tremendous ring on the front-door bell, and
then he remembered! He rushed and put on the kettle, and put out another cup and
saucer and an extra cake or two, and ran to the door.
"I am so sorry  to keep you waiting!" he was going  to say, when he saw  that  it
was not Gandalf at all. It was a dwarf with a blue beard tucked into a golden belt,
and  very  bright  eyes  under  his  dark-green hood.  As  soon a  the door was opened,
he pushed inside, just as if he had been expected.
He hung his hooded cloak on the nearest peg, and "Dwalin at your service!" he
said with a low bow.
"Bilbo  Baggins  at  yours!"  said  the  hobbit,  too  surprised  to  ask  any  questions
for  the  moment.  When  the  silence  that  followed  had  become  uncomfortable,  he
added:  "I  am  just  about  to  take  tea;  pray  come  and  have  some with me." A  little
stiff  perhaps,  but  he  meant  it  kindly.  And  what  would  you  do,  if  an  uninvited
dwarf came and hung his things up in your hall without a word of explanation?
They had not been at table long, in fact they had hardly reached the third cake,
when there came another even louder ring at the bell.
"Excuse me!" said the hobbit, and off he went to the door.
"So  you  have  got  here  at  last!" was what  he was  going  to  say  to Gandalf  this
time.  But  it  was  not  Gandalf.  Instead  there  was  a  very  old-looking  dwarf  on  the
step with a white beard and a scarlet hood; and he too hopped inside as soon as the
door was open, just as if he had been invited.
"I  see  they  have  begun  to  arrive  already,"  he  said  when  he  caught  sight  of
Dwalin's  green  hood  hanging  up.  He  hung  his  red  one  next  to  it,  and  "Balin  at
your service!" he said with his hand on his breast.
"Thank  you!"  said  Bilbo with  a  gasp.  It was  not  the  correct  thing  to  say,  but
they have begun  to  arrive had  flustered him badly. He  liked visitors, but he  liked
to know  them before  they arrived, and he preferred  to ask  them himself. He had a
horrible  thought  that  the  cakes might  run  short,  and  then  he-as  the host: he knew
his duty and stuck to it however painful-he might have to go without.
"Come  along  in,  and  have  some  tea!"  he  managed  to  say  after  taking  a  deep
breath.

####-"A  little  beer  would  suit  me  better,  if  it  is  all  the  same  to  you, my  good  sir,"
said  Balin  with  the  white  beard.  "But  I  don't  mind  some  cake-seed-cake,  if  you
have any."
"Lots!"  Bilbo  found  himself  answering,  to  his  own  surprise;  and  he  found
himself  scuttling  off,  too,  to  the  cellar  to  fill  a  pint  beer-mug, and to the pantry to
fetch  two  beautiful  round  seed-cakes  which  he  had  baked  that  afternoon  for  his
after-supper morsel.
When he got back Balin and Dwalin were  talking at  the  table  like old  friends
(as  a  matter  of  fact  they  were  brothers).  Bilbo  plumped  down  the  beer  and  the
cake  in  front  of  them, when  loud  came  a  ring  at  the  bell  again,  and  then  another
ring.
"Gandalf for certain this time," he thought as he puffed along the passage. But
it was not. It was two more dwarves, both with blue hoods, silver belts, and yellow
beards;  and  each  of  them  carried  a  bag  of  tools  and  a  spade.  In  they  hopped,  as
soon as the door began to open-Bilbo was hardly surprised at all.
"What can I do for you, my dwarves?" he said. "Kili at your service!" said the
one.  "And  Fili!"  added  the  other;  and  they  both  swept  off  their  blue  hoods  and
bowed.
"At  yours  and  your  family's!"  replied  Bilbo,  remembering  his  manners  this
time.
"Dwalin and Balin here already, I see," said Kili. "Let us join the throng!"
"Throng!" thought Mr. Baggins. "I don't like the sound of that. I really must sit
down for a minute and collect my wits, and have a drink." He had only just had a
sip-in  the  corner,  while  the  four  dwarves  sat  around  the  table,  and  talked  about
mines  and  gold  and  troubles  with  the  goblins,  and  the  depredations  of  dragons,
and lots of other things which he did not understand, and did not want to, for they
sounded much too adventurous-when, ding-dong-a-ling-' dang, his bell rang again,
as if some naughty little hobbit-boy was trying to pull the handle off. "Someone at
the  door!"  he  said,  blinking.  "Some  four,  I  should  say  by  the  sound,"  said  Fili.
"Be-sides, we saw them coming along behind us in the distance."
The  poor  little  hobbit  sat  down  in  the  hall  and  put  his  head  in  his  hands,  and
wondered  what  had  happened,  and  what  was  going  to  happen,  and  whether  they
would all stay  to supper. Then  the bell rang again  louder  than ever, and he had  to
run  to  the  door.  It  was  not  four  after  all,  t  was  FIVE.  Another  dwarf  had  come
along while he was wondering in the hall. He had hardly turned the knob, be-x)re
they were all  inside, bowing and  saying  "at your  service" one after another. Dori,
Nori,  Ori,  Oin,  and  Gloin  were  their  names;  and  very  soon  two  purple  hoods,  a

####-grey hood, a brown hood, and a white hood were hanging on the pegs, and off they
marched  with  their  broad  hands  stuck  in  their  gold  and  silver  belts  to  join  the
others. Already  it had almost become a  throng. Some called  for ale, and some  for
porter,  and  one  for  coffee,  and  all  of  them  for  cakes;  so  the hobbit was kept very
busy for a while.
A  big  jug  of  coffee  bad  just  been  set  in  the  hearth,  the  seed-cakes were gone,
and  the  dwarves  were  starting  on  a  round  of  buttered  scones,  when  there  came-a
loud  knock.  Not  a  ring,  but  a  hard  rat-tat  on  the  hobbit's  beautiful  green  door.
Somebody was banging with a stick!
Bilbo  rushed  along  the  passage,  very  angry,  and  altogether  bewildered  and
bewuthered-this  was  the  most  awkward  Wednesday  he  ever  remembered.  He
pulled open the door with a jerk, and they all fell in, one on top of the other. More
dwarves,  four  more!  And  there  was  Gandalf  behind,  leaning  on  his  staff  and
laughing. He had made quite a dent on the beautiful door; he had also, by the way,
knocked out the secret mark that he had put there the morning before.
"Carefully!  Carefully!"  he  said.  "It  is  not  like  you,  Bilbo,  to  keep  friends
waiting  on  the  mat,  and  then  open  the  door  like  a  pop-gun!  Let  me  introduce
Bifur, Bofur, Bombur, and especially Thorin!"
"At your service!" said Bifur, Bofur, and Bombur standing in a row. Then they
hung  up  two  yellow  hoods  and  a  pale  green  one;  and  also  a  sky-blue one with  a
long silver tassel. This last belonged to Thorin, an enormously important dwarf, in
fact  no  other  than  the  great  Thorin  Oakenshield  himself,  who  was  not  at  all
pleased at falling flat on Bilbo's mat with Bifur, Bofur, and Bombur on top of him.
For  one  thing  Bombur  was  immensely  fat  and  heavy.  Thorin  indeed  was  very
haughty,  and  said  nothing  about  service;  but  poor Mr. Baggins  said  he was  sorry
so  many  times,  that  at  last  he  grunted  "pray  don't  mention  it,"  and  stopped
frowning.
"Now we  are  all here!"  said Gandalf,  looking  at  the  row of  thirteen hoods-the
best  detachable  party  hoods-and his own hat hanging on  the pegs. "Quite a merry
gathering!
I hope there is something left for the late-comers to eat and drink! What's that?
Tea! No thank you! A little red wine, I think, for me." "And for me," said Thorin.
"And raspberry jam and apple-tart," said Bifur. "And mince-pies and cheese," said
Bofur.  "And  pork-pie  and  salad,"  said  Bombur.  "And  more  cakes-and  ale-and
coffee, if you don't mind," called the other dwarves through the door.

####-"Put  on  a  few  eggs,  there's  a  good  fellow!"  Gandalf  called  after  him,  as  the
hobbit  stumped  off  to  the  pantries.  "And  just  bring  out  the  cold  chicken  and
pickles!"
"Seems  to  know  as  much  about  the  inside  of  my  larders  as  I  do  myself!"
thought Mr. Baggins, who was  feeling  positively  flummoxed,  and was  beginning
to wonder whether  a most wretched  adventure  had  not  come  right  into his house.
By  the  time he had got all  the bottles and dishes and knives and forks and glasses
and  plates  and  spoons  and  things  piled  up  on  big  trays,  he  was  getting  very  hot,
and red in the face, and annoyed.
"Confusticate  and  bebother  these  dwarves!"  he  said  aloud.  "Why  don't  they
come and lend a hand?" Lo and behold! there stood Balin and Dwalin at the door
of  the  kitchen,  and  Fili  and Kili  behind  them,  and  before  he  could  say  knife  they
had  whisked  the  trays  and  a  couple  of  small  tables  into  the  parlour  and  set  out
everything afresh.
Gandalf  sat  at  the  head  of  the  party with  the  thirteen,  dwarves  all  round:  and
Bilbo  sat  on  a  stool  at  the  fireside,  nibbling  at  a  biscuit  (his  appetite  was  quite
taken away), and trying to look as if this was all perfectly  ordinary and. not in the
least  an  adventure. The  dwarves  ate  and  ate,  and  talked  and  talked,  and  time  got
on.  At  last  they  pushed  their  chairs  back,  and  Bilbo  made  a  move  to  collect  the
plates and glasses.
"I suppose you will all stay to supper?" he said in his politest unpressing tones.
"Of  course!"  said Thorin.  "And  after. We  shan't  get  through  the  business  till  late,
and we must have some music first. Now to clear up!"
Thereupon  the  twelve  dwarves-not  Thorin,  he  was  too  important,  and  stayed
talking  to  Gandalf-jumped  to  their  feet  and  made  tall  piles  of  all  the  things.  Off
they went, not waiting for trays, balancing columns of plates, each with a bottle on
the  top,  with  one  hand,  while  the  hobbit  ran  after  them  almost  squeaking  with
fright:  "please  be  careful!"  and  "please,  don't  trouble!  I  can  manage."  But  the
dwarves only started to sing:


Chip the glasses and crack the plates!
Blunt the knives and bend the forks!
That's what Bilbo Baggins hates-
Smash the bottles and burn the corks!

Cut the cloth and tread on the fat!
Pour the milk on the pantry floor!
Leave the bones on the bedroom mat!

####-Splash the wine on every door!

Dump the crocks in a boiling bawl;
Pound them up with a thumping pole;
And when you've finished, if any are whole,
Send them down the hall to roll !

That's what Bilbo Baggins hates!
So, carefully! carefully with the plates!


And  of  course  they  did  none  of  these  dreadful  things,  and  everything  was
cleaned  and  put  away  safe  as  quick  as  lightning,  while  the  hobbit  was  turning
round  and  round  in  the middle of  the kitchen  trying  to  see what  they were doing.
Then  they  went  back,  and  found  Thorin  with  his  feet  on  the  fender  smoking  a
pipe. He was  blowing  the most  enormous  smoke-rings, and wherever he  told one
to  go,  it  went-up  the  chimney,  or  behind  the  clock  on  the  man-telpiece,  or  under
the  table,  or  round  and  round  the  ceiling;  but  wherever  it  went  it  was  not  quick
enough  to escape Gandalf. Pop! he  sent a  smaller  smoke-ring from his short clay-
pipe  straight  through  each  one  of  Thorin's.  The  Gandalf's  smoke-ring  would  go
green  and  come  back  to  hover  over  the  wizard's  head.  He  had  quite  a  cloud  of
them  about  him  already,  and  in  the  dim  light  it  made  him  look  strange  and
sorcerous. Bilbo stood still and watched-he loved smoke-rings-and then be blushed
to think how proud he had been yesterday morning of the smoke-rings he had sent
up the wind over The Hill.
"Now for some music!" said Thorin. "Bring out the instruments!"
Kili  and  Fili  rushed  for  their  bags  and  brought  back  little  fiddles; Dori, Nori,
and Ori brought out flutes from somewhere inside their coats; Bombur produced a
drum  from  the  hall; Bifur  and Bofur went  out  too,  and  came  back with  clarinets
that they had left among the walking-sticks Dwalin and Balin said: "Excuse me, I
left  mine  in  the  porch!"  "Just  bring  mine  in  with  you,"  said  Thorin.  They  came
back with  viols  as  big  as  themselves,  and with  Thorin’s  harp wrapped  in  a  green
cloth.  It was  a  beautiful  gold-en harp, and when Thorin struck it the music began
all  at  once,  so  sudden  and  sweet  that Bilbo  forgot everything else, and was  swept
away  into dark  lands under  strange moons,  far over The Water and very  far  from
his hobbit-hole under The Hill.
The dark came into the room from the little window that opened in the side of
The  Hill;  the  firelight  flickered-it  was  April-and  still  they  played  on,  while  the
shadow of Gandalf's beard wagged against the wall.

####-The  dark  filled  all  the  room,  and  the  fire  died  down,  and  the  shadows  were
lost,  and  still  they  played  on.  And  suddenly  first  one  and  then  another  began  to
sing  as  they  played,  deep-throated  singing  of  the  dwarves  in  the  deep  places  of
their ancient homes; and this is like a fragment of their song, if it can be like their
song without their music.


Far over the misty mountains cold
To dungeons deep and caverns old
We must away ere break of day
To seek the pale enchanted gold.

The dwarves of yore made mighty spells,
While hammers fell like ringing bells
In places deep, where dark things sleep,
In hollow halls beneath the fells.

For ancient king and elvish lord
There many a gloaming golden hoard
They shaped and wrought, and light they caught
To hide in gems on hilt of sword.

On silver necklaces they strung
The flowering stars, on crowns they hung
The dragon-fire, in twisted wire
They meshed the light of moon and sun.

Far over the misty mountains cold
To dungeons deep and caverns old
We must away, ere break of day,
To claim our long-forgotten gold.

Goblets they carved there for themselves
And harps of gold; where no man delves
There lay they long, and many a song
Was sung unheard by men or elves.

The pines were roaring on the height,
The winds were moaning in the night.
The fire was red, it flaming spread;
The trees like torches biased with light,

The bells were ringing in the dale
And men looked up with faces pale;

####-The dragon's ire more fierce than fire
Laid low their towers and houses frail.

The mountain smoked beneath the moon;
The dwarves, they heard the tramp of doom.
They fled their hall to dying -fall
Beneath his feet, beneath the moon.

Far over the misty mountains grim
To dungeons deep and caverns dim
We must away, ere break of day,
To win our harps and gold from him!


As they sang the hobbit felt the love of beautiful things made by hands and by
cunning and by magic moving through him, a fierce and jealous love, the desire of
the  hearts  of  dwarves.  Then  something  Tookish  woke  up  inside  him,  and  he
wished  to  go  and  see  the  great  mountains,  and  hear  the  pine-trees  and  the
waterfalls, and explore the caves, and wear a sword instead of a walking-stick. He
looked  out  of  the  window.  The  stars  were  out  in  a  dark  sky  above  the  trees.  He
thought  of  the  jewels  of  the  dwarves  shining  in  dark  caverns.  Suddenly  in  the
wood  beyond  The  Water  a  flame  leapt  up--probably  somebody  lighting  a  wood-
fire-and he thought of plundering dragons settling on his quiet Hill and kindling it
all  to  flames. He  shuddered;  and  very  quickly  he was  plain Mr.  Baggins  of  Bag-
End, Under-Hill, again.
"""

In [22]:
# Tokenize o texto
tokens = re.findall(r'\b\w+\b', text.lower())

# Constrói o vocabulário
word_counts = Counter(tokens)
vocab = sorted(word_counts.keys())
vocab_size = len(vocab)
word2idx = {word: idx for idx, word in enumerate(vocab)}
idx2word = {idx: word for word, idx in word2idx.items()}

# Converte tokens para índices
indices = [word2idx[word] for word in tokens]

# Gera as sequências
sequence_length = 5
inputs = []
targets = []

for i in range(len(indices) - sequence_length):
    inputs.append(indices[i:i+sequence_length])
    targets.append(indices[i+1:i+sequence_length+1])

# Converte para tensores
inputs = torch.tensor(inputs, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

# Cria o dataset e o dataloader
batch_size = 2
dataset = TensorDataset(inputs, targets)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [23]:
d_model = 64
num_heads = 8
d_ff = 256
num_layers = 2
dropout = 0.1
max_len = 500

model = Decoder(vocab_size, d_model, num_heads, num_layers, d_ff, max_len, dropout).to(device)

In [24]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [25]:
epochs = 10

for epoch in range(epochs):
    model.train()
    total_loss = 0
    for batch_inputs, batch_targets in dataloader:
        batch_inputs = batch_inputs.to(device)
        batch_targets = batch_targets.to(device)

        batch_size, seq_len = batch_inputs.size()
        mask = create_causal_mask(seq_len).to(device)

        optimizer.zero_grad()
        outputs = model(batch_inputs, trg_mask=mask)
        outputs = outputs.view(-1, vocab_size)
        batch_targets = batch_targets.view(-1)

        loss = criterion(outputs, batch_targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    avg_loss = total_loss / len(dataloader)
    print(f'Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}')

Epoch 1/10, Loss: 5.2329
Epoch 2/10, Loss: 3.6422
Epoch 3/10, Loss: 2.9080
Epoch 4/10, Loss: 2.5085
Epoch 5/10, Loss: 2.2195
Epoch 6/10, Loss: 2.0167
Epoch 7/10, Loss: 1.8574
Epoch 8/10, Loss: 1.7387
Epoch 9/10, Loss: 1.6534
Epoch 10/10, Loss: 1.5759


In [26]:
def generate_text(model, start_tokens, generate_length, idx2word, device):
    model.eval()
    tokens = start_tokens.copy()

    for _ in range(generate_length):
        input_tensor = torch.tensor([tokens], dtype=torch.long).to(device)
        seq_len = input_tensor.size(1)
        mask = create_causal_mask(seq_len).to(device)

        with torch.no_grad():
            output = model(input_tensor, trg_mask=mask)
            next_token_logits = output[:, -1, :]
            next_token = torch.argmax(next_token_logits, dim=-1).item()
            tokens.append(next_token)
    words = [idx2word[idx] for idx in tokens]
    return ' '.join(words)

start_word = "the"
start_token = [word2idx[start_word]]
generated_text = generate_text(model, start_token, generate_length=20, idx2word=idx2word, device=device)
print("Texto gerado:")
print(generated_text)

Texto gerado:
the hobbit bedrooms bathrooms cellars pantries lots of sword of his dark sky blue his house by this time so far


## Exercícios

### Exercício 1
Implemente um módulo que utilize apenas o módulo Encoder para a classificação de texto em `num_classes` classes. Para a obtenção do vetor de embedding de toda a sequência que será enviado para a cabeça de classificação, faça um pooling de média através da dimensão de sequência.

### Exercício 2
Vamos implementar um modelo baseado em stack de decoders. Uma vez que não é necessário cross-attention, pois não há encoders, utilize o módulo `EncoderLayer`. O tamanho do vocabulário deverá ser de 50257, o tamanho dos embeddings de 768, 12 cabeças de atenção, 12 camadas, dimensão da camada feedforward de 3072 e tamanho máximo de sequência 1024. Em seguida, teste com valores aleatórios simulando uma sequência de tokens.