<a href="https://colab.research.google.com/github/stephanyresque/Modelos_multimodais/blob/main/Transformes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Simple Transformer Encoder (para texto)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

EMBED_DIM = 64
NUM_HEADS = 4
FF_DIM = 128
VOCAB_SIZE = 1000
SEQ_LEN = 10

class PositionalEncoding(nn.Module):
    def __init__(self, embed_dim, max_len=5000):
        super().__init__()
        pe = torch.zeros(max_len, embed_dim)
        position = torch.arange(0, max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, embed_dim, 2) * (-math.log(10000.0) / embed_dim))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:, :x.size(1)]
        return x


class EncoderBlock(nn.Module):
    def __init__(self, embed_dim, num_heads, ff_dim):
        super().__init__()
        self.attn = nn.MultiheadAttention(embed_dim, num_heads, batch_first=True)
        self.ff = nn.Sequential(
            nn.Linear(embed_dim, ff_dim),
            nn.ReLU(),
            nn.Linear(ff_dim, embed_dim)
        )
        self.norm1 = nn.LayerNorm(embed_dim)
        self.norm2 = nn.LayerNorm(embed_dim)

    def forward(self, x):
        attn_output, _ = self.attn(x, x, x)
        x = self.norm1(x + attn_output)
        ff_output = self.ff(x)
        x = self.norm2(x + ff_output)
        return x


class SimpleTransformerEncoder(nn.Module):
    def __init__(self, vocab_size, embed_dim, num_heads, ff_dim, seq_len):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.positional = PositionalEncoding(embed_dim, max_len=seq_len)
        self.encoder_block = EncoderBlock(embed_dim, num_heads, ff_dim)

    def forward(self, x):
        x = self.embedding(x)
        x = self.positional(x)
        x = self.encoder_block(x)
        return x


In [None]:
model = SimpleTransformerEncoder(
    vocab_size=VOCAB_SIZE,
    embed_dim=EMBED_DIM,
    num_heads=NUM_HEADS,
    ff_dim=FF_DIM,
    seq_len=SEQ_LEN
)

x = torch.randint(0, VOCAB_SIZE, (1, SEQ_LEN))
output = model(x)

print("Entrada:", x)
print("Saída do encoder:", output.shape)

Entrada: tensor([[662,  54, 173, 828,   3, 156,  43, 595, 151, 294]])
Saída do encoder: torch.Size([1, 10, 64])


# BERT Encoder

In [None]:
!pip install transformers --quiet

In [None]:
from transformers import BertModel, BertTokenizer
import torch

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertModel.from_pretrained("bert-base-uncased")

texto = "The cat sat on the mat."

inputs = tokenizer(texto, return_tensors="pt")

with torch.no_grad():
    outputs = model(**inputs)

last_hidden_state = outputs.last_hidden_state
pooled_output = outputs.pooler_output

print("Tokens:", tokenizer.convert_ids_to_tokens(inputs['input_ids'][0]))
print("last_hidden_state shape:", last_hidden_state.shape)
print("pooled_output shape:", pooled_output.shape)

print("Vetor da primeira palavra (CLS):", last_hidden_state[0, 0, :5])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Tokens: ['[CLS]', 'the', 'cat', 'sat', 'on', 'the', 'mat', '.', '[SEP]']
last_hidden_state shape: torch.Size([1, 9, 768])
pooled_output shape: torch.Size([1, 768])
Vetor da primeira palavra (CLS): tensor([-0.3642, -0.0531, -0.3673, -0.0297, -0.4608])
