# Inferencia

Una vez hemos entrenado nuestro transformer podemos probar a ver qué tal lo hace

## Transformer

Vamos primero a implementar un transformer con todo el código que hemos usado antes, primero escribimos las funciones de bajo nivel

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Embedding(nn.Module):
    def __init__(self, vocab_size, embedding_dim):
        super().__init__()
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim

        self.embedding = nn.Embedding(vocab_size, embedding_dim)

    def forward(self, x):
        return self.embedding(x)

class PositionalEncoding(nn.Module):
    def __init__(self, max_sequence_len, embedding_model_dim):
        super().__init__()
        self.embedding_dim = embedding_model_dim
        positional_encoding = torch.zeros(max_sequence_len, self.embedding_dim)
        for pos in range(max_sequence_len):
            for i in range(0, self.embedding_dim, 2):
                positional_encoding[pos, i]     = torch.sin(torch.tensor(pos / (10000 ** ((2 * i) / self.embedding_dim))))
                positional_encoding[pos, i + 1] = torch.cos(torch.tensor(pos / (10000 ** ((2 * (i+1)) / self.embedding_dim))))
        positional_encoding = positional_encoding.unsqueeze(0)
        self.register_buffer('positional_encoding', positional_encoding)

    def forward(self, x):
        x = x * torch.sqrt(torch.tensor(self.embedding_dim))
        sequence_len = x.size(1)
        x = x + self.positional_encoding[:,:sequence_len]
        return x

class ScaledDotProductAttention(nn.Module):
    def __init__(self, dim_embedding):
        super().__init__()
        self.dim_embedding = dim_embedding
    
    def forward(self, query, key, value, mask=None):
        key_trasposed = key.transpose(-1,-2)
        product = torch.matmul(query, key_trasposed)
        scale = product / torch.sqrt(torch.tensor(self.dim_embedding))
        if mask is not None:
            scale = scale.masked_fill(mask == 0, float('-inf'))
        attention_matrix = torch.softmax(scale, dim=-1)
        output = torch.matmul(attention_matrix, value)
        return output

class MultiHeadAttention(nn.Module):
    def __init__(self, heads, dim_embedding):
        super().__init__()
        
        self.dim_embedding = dim_embedding
        self.dim_proyection = dim_embedding // heads
        self.heads = heads
        self.proyection_Q = nn.Linear(dim_embedding, dim_embedding)
        self.proyection_K = nn.Linear(dim_embedding, dim_embedding)
        self.proyection_V = nn.Linear(dim_embedding, dim_embedding)
        self.attention = nn.Linear(dim_embedding, dim_embedding)
        self.scaled_dot_product_attention = ScaledDotProductAttention(self.dim_proyection)
    
    def forward(self, Q, K, V, mask=None):
        batch_size = Q.size(0)
        proyection_Q = self.proyection_Q(Q).view(batch_size, -1, self.heads, self.dim_proyection)
        proyection_K = self.proyection_K(K).view(batch_size, -1, self.heads, self.dim_proyection)
        proyection_V = self.proyection_V(V).view(batch_size, -1, self.heads, self.dim_proyection)
        proyection_Q = proyection_Q.transpose(1,2)
        proyection_K = proyection_K.transpose(1,2)
        proyection_V = proyection_V.transpose(1,2)
        scaled_dot_product_attention = self.scaled_dot_product_attention(proyection_Q, proyection_K, proyection_V, mask=mask)
        concat = scaled_dot_product_attention.transpose(1,2).contiguous().view(batch_size, -1, self.dim_embedding)
        output = self.attention(concat)
        return output

class AddAndNorm(nn.Module):
    def __init__(self, dim_embedding):
        super().__init__()
        self.normalization = nn.LayerNorm(dim_embedding)

    def forward(self, x, sublayer):
        return self.normalization(torch.add(x, sublayer))

class FeedForward(nn.Module):
    def __init__(self, dim_embedding, increment=4):
        super().__init__()
        self.feed_forward = nn.Sequential(
            nn.Linear(dim_embedding, dim_embedding*increment),
            nn.ReLU(),
            nn.Linear(dim_embedding*increment, dim_embedding)
        )
    
    def forward(self, x):
        x = self.feed_forward(x)
        return x

class Linear(nn.Module):
    def __init__(self, dim_embedding, vocab_size):
        super().__init__()
        self.linear = nn.Linear(dim_embedding, vocab_size)
        
    def forward(self, x):
        x = self.linear(x)
        return x

class Softmax(nn.Module):
    def __init__(self):
        super().__init__()
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        x = self.softmax(x)
        return x

class Dropout(torch.nn.Module):
    def __init__(self, p=0.1):
        super().__init__()
        self.p = p

    def forward(self, x):
        if self.training:
            return torch.nn.functional.dropout(x, p=self.p)
        else:
            return x


Ahora las clases de medio nivel

In [2]:
class EncoderLayer(nn.Module):
    def __init__(self, heads, dim_embedding, prob_dropout=0.1):
        super().__init__()
        self.multi_head_attention = MultiHeadAttention(heads, dim_embedding)
        self.dropout_1 = Dropout(prob_dropout)
        self.add_and_norm_1 = AddAndNorm(dim_embedding)
        self.feed_forward = FeedForward(dim_embedding)
        self.dropout_2 = Dropout(prob_dropout)
        self.add_and_norm_2 = AddAndNorm(dim_embedding)
    
    def forward(self, x):
        multi_head_attention = self.multi_head_attention(x, x, x)
        dropout1 = self.dropout_1(multi_head_attention)
        add_and_norm_1 = self.add_and_norm_1(x, dropout1)
        feed_forward = self.feed_forward(add_and_norm_1)
        dropout2 = self.dropout_2(feed_forward)
        add_and_norm_2 = self.add_and_norm_2(add_and_norm_1, dropout2)
        return add_and_norm_2

class Encoder(nn.Module):
    def __init__(self, heads, dim_embedding, Nx, prob_dropout=0.1):
        super().__init__()
        self.encoder_layers = nn.ModuleList([EncoderLayer(heads, dim_embedding, prob_dropout) for _ in range(Nx)])
    
    def forward(self, x):
        for encoder_layer in self.encoder_layers:
            x = encoder_layer(x)
        return x

class TransformerEncoder(nn.Module):
    def __init__(self, vocab_size, dim_embedding, max_sequence_len, heads, Nx, prob_dropout=0.1):
        super().__init__()
        self.input_embedding = Embedding(vocab_size, dim_embedding)
        self.positional_encoding = PositionalEncoding(max_sequence_len, dim_embedding)
        self.encoder = Encoder(heads, dim_embedding, Nx, prob_dropout)
    
    def forward(self, x):
        input_embedding = self.input_embedding(x)
        positional_encoding = self.positional_encoding(input_embedding)
        encoder = self.encoder(positional_encoding)
        return encoder

class DecoderLayer(nn.Module):
    def __init__(self, heads, dim_embedding, prob_dropout=0.1):
        super().__init__()
        self.masked_multi_head_attention = MultiHeadAttention(heads, dim_embedding)
        self.dropout_1 = Dropout(prob_dropout)
        self.add_and_norm_1 = AddAndNorm(dim_embedding)
        self.encoder_decoder_multi_head_attention = MultiHeadAttention(heads, dim_embedding)
        self.dropout_2 = Dropout(prob_dropout)
        self.add_and_norm_2 = AddAndNorm(dim_embedding)
        self.feed_forward = FeedForward(dim_embedding)
        self.dropout_3 = Dropout(prob_dropout)
        self.add_and_norm_3 = AddAndNorm(dim_embedding)
    
    def forward(self, x, encoder_output, mask=None):
        Q = x
        K = x
        V = x
        masked_multi_head_attention = self.masked_multi_head_attention(Q, K, V, mask=mask)
        dropout1 = self.dropout_1(masked_multi_head_attention)
        add_and_norm_1 = self.add_and_norm_1(dropout1, x)

        Q = add_and_norm_1
        K = encoder_output
        V = encoder_output
        encoder_decoder_multi_head_attention = self.encoder_decoder_multi_head_attention(Q, K, V)
        dropout2 = self.dropout_2(encoder_decoder_multi_head_attention)
        add_and_norm_2 = self.add_and_norm_2(dropout2, add_and_norm_1)

        feed_forward = self.feed_forward(add_and_norm_2)
        dropout3 = self.dropout_3(feed_forward)
        add_and_norm_3 = self.add_and_norm_3(dropout3, add_and_norm_2)

        return add_and_norm_3

class Decoder(nn.Module):
    def __init__(self, heads, dim_embedding, Nx, prob_dropout=0.1):
        super().__init__()
        self.layers = nn.ModuleList([DecoderLayer(heads, dim_embedding, prob_dropout) for _ in range(Nx)])
    
    def forward(self, x, encoder_output, mask=None):
        for decoder_layer in self.layers:
            x = decoder_layer(x, encoder_output, mask)
        return x

class TransformerDecoder(nn.Module):
    def __init__(self, heads, dim_embedding, Nx, vocab_size, max_sequence_len, prob_dropout=0.1):
        super().__init__()
        self.embedding = Embedding(vocab_size, dim_embedding)
        self.positional_encoding = PositionalEncoding(max_sequence_len, dim_embedding)
        self.decoder = Decoder(heads, dim_embedding, Nx, prob_dropout)
        self.linear = Linear(dim_embedding, vocab_size)
        self.softmax = Softmax()
    
    def forward(self, x, encoder_output, mask=None):
        x = self.embedding(x)
        x = self.positional_encoding(x)
        x = self.decoder(x, encoder_output, mask)
        x = self.linear(x)
        x = self.softmax(x)
        return x


Y por último la clase transformer

In [3]:
class Transformer(nn.Module):
    def __init__(self, vocab_size, dim_embedding, max_sequence_len, heads, Nx, prob_dropout=0.1):
        super().__init__()
        self.encoder = TransformerEncoder(vocab_size, dim_embedding, max_sequence_len, heads, Nx, prob_dropout)
        self.decoder = TransformerDecoder(heads, dim_embedding, Nx, vocab_size, max_sequence_len, prob_dropout)
    
    def forward(self, source, target, mask=None):
        encoder_output = self.encoder(source)
        decoder_output = self.decoder(target, encoder_output, mask)
        return decoder_output


Creamos la máscara

In [4]:
def create_mask(sequence_len):
    mask = torch.tril(torch.ones((sequence_len, sequence_len)))
    return mask

max_secuence_length = 10 + 2
mask = create_mask(max_secuence_length)

Y ahora creamos un objeto del transformer

In [11]:
import tiktoken

encoder = tiktoken.get_encoding("cl100k_base")
vocab_size = encoder.n_vocab
dim_embedding = 512
heads = 8
Nx = 6
prob_dropout = 0.1
print(f"vocab_size: {vocab_size}, dim_embedding: {dim_embedding}, max_secuence_length: {max_secuence_length}, heads: {heads}, Nx: {Nx}, prob_dropout: {prob_dropout}")

transformer = Transformer(vocab_size=vocab_size,
                          dim_embedding=dim_embedding,
                          max_sequence_len=max_secuence_length,
                          heads=heads,
                          Nx=Nx,
                          prob_dropout=prob_dropout)
print(f"Modelo de {sum(p.numel() for p in transformer.parameters())/1e6} millones de parámetros")


vocab_size: 100277, dim_embedding: 64, max_secuence_length: 12, heads: 8, Nx: 6, prob_dropout: 0.1
Modelo de 20.053877 millones de parámetros


In [7]:
print(f"Modelo de {sum(p.numel() for p in transformer.parameters())/1e6} millones de parámetros")

Modelo de 198.264245 millones de parámetros


## Inferencia con modelo sin entrenar

Vamos a ver el resultado del transformer sin entrenarlo

In [6]:
start_token = chr(1)
start_token = encoder.encode(start_token)

end_token = chr(2)
end_token = encoder.encode(end_token)

padding_token = chr(3)
padding_token = encoder.encode(padding_token)

print(f"start_token: {start_token}, end_token: {end_token}, padding_token: {padding_token}")

start_token: [189], end_token: [190], padding_token: [191]


In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = "cpu"

In [8]:
def prepare_source_sentence(sentence, start_token, end_token, pad_token, max_length, device):
    sentence = encoder.encode(sentence)
    sentence = start_token + sentence + end_token
    if len(sentence) < max_length:
        sentence = sentence + pad_token * (max_length - len(sentence))
    else:
        sentence = sentence[:max_length]
    sentence = torch.tensor([sentence]).to(device)
    return sentence

def prepare_target_sentence(sentence, start_token, pad_token, max_length, device):
    sentence = encoder.encode(sentence)
    sentence = start_token + sentence + end_token
    if len(sentence) < max_length:
        sentence = sentence + pad_token * (max_length - len(sentence))
    else:
        sentence = sentence[:max_length]
    sentence = torch.tensor([sentence]).to(device)
    return sentence


In [9]:
sentence_en = "I have learned a lot from this course"
encode_sentence_en = prepare_source_sentence(sentence_en, start_token, end_token, padding_token, max_secuence_length, device)
print(f"Encode english sencence: {encode_sentence_en}")
print(f"English sencence shape: {encode_sentence_en.shape}")

Encode english sencence: tensor([[ 189,   40,  617, 9687,  264, 2763,  505,  420, 3388,  190,  191,  191]])
English sencence shape: torch.Size([1, 12])


In [10]:
sentence_es = ""
encode_sentence_es = prepare_target_sentence(sentence_es, start_token, padding_token, max_secuence_length, device)
print(f"Encode spanish sentence: {encode_sentence_es}")
print(f"Spanish sentence shape: {encode_sentence_es.shape}")

Encode spanish sentence: tensor([[189, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191]])
Spanish sentence shape: torch.Size([1, 12])


In [11]:
def get_target_sentence(source, target, mask, model, device, end_token, max_len):
    model = model.to(device)
    # model.eval()
    source = source.to(device)
    target = target.to(device)
    mask = mask.to(device)
    end_token = torch.tensor(end_token)
    output_sentence = target.clone()

    for i in range(max_len-2):
        with torch.no_grad():
            output = model(source, target, mask)
            next_token = output[0, i+1].argmax().item()
            output_sentence[0, i+1] = next_token
            if next_token == end_token:
                break
    output_sentence[0, max_len-1] = end_token

    return output_sentence

def decode_sentence(sentence, decoder, end_token):
    decoded = ""
    if isinstance(end_token, list):
        end_token = end_token[0]
    if isinstance(sentence, torch.Tensor):
        sentence = sentence.cpu().numpy()
    if end_token in sentence:
        position_end_token = int(np.where(sentence == end_token)[0])
        sentence = sentence[:position_end_token+1]
    sentence = sentence[1:-1]   # Remove start and end token
    decoded = decoder(sentence)
    return decoded


In [12]:
encoded_output = get_target_sentence(encode_sentence_en, encode_sentence_es, mask, transformer, device, end_token, max_secuence_length).squeeze(0).cpu().numpy()
print(f"Encoded output: {encoded_output}")

decoded_output = decode_sentence(encoded_output, encoder.decode, max_secuence_length)
print(f"Decoded output: {decoded_output}")

Encoded output: [  189 20175 50518 38440 39050 90818 90818 90818 90818 62407 62407   190]
Decoded output: Aut diagnostics.AbstractImGuiremaremaremarema assemblies assemblies


## Inferencia con modelo entrenado

### Step 188

Cargamos los pesos

In [13]:
weights = "model/transformer_model_188_138556.pth"
transformer = torch.load(weights, map_location='cpu')

if isinstance(transformer, nn.DataParallel):
    print("DataParallel")
    transformer = transformer.module

DataParallel


In [14]:
encoded_output = get_target_sentence(encode_sentence_en, encode_sentence_es, mask, transformer, device, end_token, max_secuence_length).squeeze(0).cpu().numpy()
print(f"Encoded output: {encoded_output}")

decoded_output = decode_sentence(encoded_output, encoder.decode, max_secuence_length)
print(f"Decoded output: {decoded_output}")

Encoded output: [  189 24652  1933 11532 47012 92908  2247  2247  2247  2247  2247   190]
Decoded output: ajs color_qu获取 nex","","","","","


### Step 296

Cargamos los pesos

In [15]:
weights = "model/transformer_model_296_218152.pth"
transformer = torch.load(weights, map_location='cpu')

if isinstance(transformer, nn.DataParallel):
    print("DataParallel")
    transformer = transformer.module

DataParallel


In [16]:
encoded_output = get_target_sentence(encode_sentence_en, encode_sentence_es, mask, transformer, device, end_token, max_secuence_length).squeeze(0).cpu().numpy()
print(f"Encoded output: {encoded_output}")

decoded_output = decode_sentence(encoded_output, encoder.decode, max_secuence_length)
print(f"Decoded output: {decoded_output}")

Encoded output: [   189 100068   1933  18572  23245  90347   2247   2247   2247   2247
   2247    190]
Decoded output: Seats color399 Antonio awakened","","","","","


### Step 309

Cargamos los pesos

In [17]:
weights = "model/transformer_model_309_227733.pth"
transformer = torch.load(weights, map_location='cpu')

if isinstance(transformer, nn.DataParallel):
    print("DataParallel")
    transformer = transformer.module

DataParallel


In [18]:
encoded_output = get_target_sentence(encode_sentence_en, encode_sentence_es, mask, transformer, device, end_token, max_secuence_length).squeeze(0).cpu().numpy()
print(f"Encoded output: {encoded_output}")

decoded_output = decode_sentence(encoded_output, encoder.decode, max_secuence_length)
print(f"Decoded output: {decoded_output}")

Encoded output: [   189 100068   1933  18572  23245  90347   2247   2247   2247   2247
   2247    190]
Decoded output: Seats color399 Antonio awakened","","","","","


### Step 331

Cargamos los pesos

In [19]:
weights = "model/transformer_model_331_243947.pth"
transformer = torch.load(weights, map_location='cpu')

if isinstance(transformer, nn.DataParallel):
    print("DataParallel")
    transformer = transformer.module

DataParallel


In [20]:
encoded_output = get_target_sentence(encode_sentence_en, encode_sentence_es, mask, transformer, device, end_token, max_secuence_length).squeeze(0).cpu().numpy()
print(f"Encoded output: {encoded_output}")

decoded_output = decode_sentence(encoded_output, encoder.decode, max_secuence_length)
print(f"Decoded output: {decoded_output}")

Encoded output: [   189 100068   1933  18572  40955  90347   2247   2247   2247   2247
   2247    190]
Decoded output: Seats color399ต awakened","","","","","


### Step 509

Cargamos los pesos

In [21]:
weights = "model/transformer_model_509_375133.pth"
transformer = torch.load(weights, map_location='cpu')

if isinstance(transformer, nn.DataParallel):
    print("DataParallel")
    transformer = transformer.module

DataParallel


In [22]:
encoded_output = get_target_sentence(encode_sentence_en, encode_sentence_es, mask, transformer, device, end_token, max_secuence_length).squeeze(0).cpu().numpy()
print(f"Encoded output: {encoded_output}")

decoded_output = decode_sentence(encoded_output, encoder.decode, max_secuence_length)
print(f"Decoded output: {decoded_output}")

Encoded output: [   189 100068   1933  40955  26066  92908   2247   2247   2247   2247
   2247    190]
Decoded output: Seats colorตIGNED nex","","","","","


### Step 564

Cargamos los pesos

In [13]:
weights = "model/transformer_model_564_1104078.pth"
transformer = torch.load(weights, map_location='cpu')

if isinstance(transformer, nn.DataParallel):
    print("DataParallel")
    transformer = transformer.module

DataParallel


In [14]:
encoded_output = get_target_sentence(encode_sentence_en, encode_sentence_es, mask, transformer, device, end_token, max_secuence_length).squeeze(0).cpu().numpy()
print(f"Encoded output: {encoded_output}")

decoded_output = decode_sentence(encoded_output, encoder.decode, max_secuence_length)
print(f"Decoded output: {decoded_output}")

Encoded output: [  189 50165 68892 68892 86785 68983 56827 23109 23109 23109 23109   190]
Decoded output: .NAMEFIRSTFIRST Salvation_spaces watcher Ka Ka Ka Ka


### Step 595

Cargamos los pesos

In [13]:
weights = "model/transformer_model_595_1256474.pth"
transformer = torch.load(weights, map_location='cpu')

if isinstance(transformer, nn.DataParallel):
    print("DataParallel")
    transformer = transformer.module

DataParallel


In [14]:
encoded_output = get_target_sentence(encode_sentence_en, encode_sentence_es, mask, transformer, device, end_token, max_secuence_length).squeeze(0).cpu().numpy()
print(f"Encoded output: {encoded_output}")

decoded_output = decode_sentence(encoded_output, encoder.decode, max_secuence_length)
print(f"Decoded output: {decoded_output}")

Encoded output: [  189 50165 47097 47097 86785 93615 56827 23109 23109 23109 23109   190]
Decoded output: .NAME marca marca Salvation()=> watcher Ka Ka Ka Ka


### Step 619

Cargamos los pesos

In [13]:
weights = "model/transformer_model_619_1374458.pth"
transformer = torch.load(weights, map_location='cpu')

if isinstance(transformer, nn.DataParallel):
    print("DataParallel")
    transformer = transformer.module

DataParallel


In [14]:
encoded_output = get_target_sentence(encode_sentence_en, encode_sentence_es, mask, transformer, device, end_token, max_secuence_length).squeeze(0).cpu().numpy()
print(f"Encoded output: {encoded_output}")

decoded_output = decode_sentence(encoded_output, encoder.decode, max_secuence_length)
print(f"Decoded output: {decoded_output}")

Encoded output: [  189 50165 47097 47097 86785 93615 56827 23109 23109 23109 23109   190]
Decoded output: .NAME marca marca Salvation()=> watcher Ka Ka Ka Ka


### Step 643

Cargamos los pesos

In [14]:
weights = "model/transformer_model_643_1492442.pth"
transformer = torch.load(weights, map_location='cpu')

if isinstance(transformer, nn.DataParallel):
    print("DataParallel")
    transformer = transformer.module

DataParallel


In [15]:
encoded_output = get_target_sentence(encode_sentence_en, encode_sentence_es, mask, transformer, device, end_token, max_secuence_length).squeeze(0).cpu().numpy()
print(f"Encoded output: {encoded_output}")

decoded_output = decode_sentence(encoded_output, encoder.decode, max_secuence_length)
print(f"Decoded output: {decoded_output}")

Encoded output: [  189 50165 19541 47097 86785 93615 56827 23109 23109 23109 23109   190]
Decoded output: .NAME ny marca Salvation()=> watcher Ka Ka Ka Ka


### Step 667

Cargamos los pesos

In [28]:
weights = "model/transformer_model_667_1610426.pth"
transformer = torch.load(weights, map_location='cpu')

if isinstance(transformer, nn.DataParallel):
    print("DataParallel")
    transformer = transformer.module

DataParallel


In [29]:
encoded_output = get_target_sentence(encode_sentence_en, encode_sentence_es, mask, transformer, device, end_token, max_secuence_length).squeeze(0).cpu().numpy()
print(f"Encoded output: {encoded_output}")

decoded_output = decode_sentence(encoded_output, encoder.decode, max_secuence_length)
print(f"Decoded output: {decoded_output}")

Encoded output: [  189 19541 19541 56060 86785 93615 93075 56827 23109 23109 23109   190]
Decoded output:  ny ny Prayer Salvation()=> Restrictions watcher Ka Ka Ka


### Step 691

Cargamos los pesos

In [13]:
weights = "model/transformer_model_691_1728410.pth"
transformer = torch.load(weights, map_location='cpu')

if isinstance(transformer, nn.DataParallel):
    print("DataParallel")
    transformer = transformer.module

DataParallel


In [14]:
encoded_output = get_target_sentence(encode_sentence_en, encode_sentence_es, mask, transformer, device, end_token, max_secuence_length).squeeze(0).cpu().numpy()
print(f"Encoded output: {encoded_output}")

decoded_output = decode_sentence(encoded_output, encoder.decode, max_secuence_length)
print(f"Decoded output: {decoded_output}")

Encoded output: [  189 19541 19541 56060   409 93615 93075 56827 23109 23109  2158   190]
Decoded output:  ny ny Prayer de()=> Restrictions watcher Ka Kaftware


### Step 718

Cargamos los pesos

In [13]:
weights = "model/transformer_model_718_1861142.pth"
transformer = torch.load(weights, map_location='cpu')

if isinstance(transformer, nn.DataParallel):
    print("DataParallel")
    transformer = transformer.module

DataParallel


In [14]:
encoded_output = get_target_sentence(encode_sentence_en, encode_sentence_es, mask, transformer, device, end_token, max_secuence_length).squeeze(0).cpu().numpy()
print(f"Encoded output: {encoded_output}")

decoded_output = decode_sentence(encoded_output, encoder.decode, max_secuence_length)
print(f"Decoded output: {decoded_output}")

Encoded output: [  189 19541 19541 56060   409 93615 93075 93075 23109  2158  2158   190]
Decoded output:  ny ny Prayer de()=> Restrictions Restrictions Kaftwareftware


### Step 748

Cargamos los pesos

In [14]:
weights = "model/transformer_model_748_2008622.pth"
transformer = torch.load(weights, map_location='cpu')

if isinstance(transformer, nn.DataParallel):
    print("DataParallel")
    transformer = transformer.module

DataParallel


In [15]:
encoded_output = get_target_sentence(encode_sentence_en, encode_sentence_es, mask, transformer, device, end_token, max_secuence_length).squeeze(0).cpu().numpy()
print(f"Encoded output: {encoded_output}")

decoded_output = decode_sentence(encoded_output, encoder.decode, max_secuence_length)
print(f"Decoded output: {decoded_output}")

Encoded output: [  189 19541 56060 22180   409   409 93075 65612 65612  2158  2158   190]
Decoded output:  ny Prayer ale de de Restrictions.Cmd.Cmdftwareftware


### Step 813

Cargamos los pesos

In [13]:
weights = "model/transformer_model_813_2328162.pth"
transformer = torch.load(weights, map_location='cpu')

if isinstance(transformer, nn.DataParallel):
    print("DataParallel")
    transformer = transformer.module

DataParallel


In [14]:
encoded_output = get_target_sentence(encode_sentence_en, encode_sentence_es, mask, transformer, device, end_token, max_secuence_length).squeeze(0).cpu().numpy()
print(f"Encoded output: {encoded_output}")

decoded_output = decode_sentence(encoded_output, encoder.decode, max_secuence_length)
print(f"Decoded output: {decoded_output}")

Encoded output: [  189 74426 56060 22180   409   409 93075 93075   329  1832  2158   190]
Decoded output:  getModel Prayer ale de de Restrictions Restrictionsadóftware


### Step 830

Cargamos los pesos

In [None]:
weights = "model/transformer_model_830_2411734.pth"
transformer = torch.load(weights, map_location='cpu')

if isinstance(transformer, nn.DataParallel):
    print("DataParallel")
    transformer = transformer.module

DataParallel


In [None]:
encoded_output = get_target_sentence(encode_sentence_en, encode_sentence_es, mask, transformer, device, end_token, max_secuence_length).squeeze(0).cpu().numpy()
print(f"Encoded output: {encoded_output}")

decoded_output = decode_sentence(encoded_output, encoder.decode, max_secuence_length)
print(f"Decoded output: {decoded_output}")

Encoded output: [  189 74426 56060 22180   409   409 93075 93075   329   329  2158   190]
Decoded output:  getModel Prayer ale de de Restrictions Restrictionsadadftware


### Step 845

Cargamos los pesos

In [None]:
weights = "model/transformer_model_845_2485474.pth"
transformer = torch.load(weights, map_location='cpu')

if isinstance(transformer, nn.DataParallel):
    print("DataParallel")
    transformer = transformer.module

DataParallel


In [None]:
encoded_output = get_target_sentence(encode_sentence_en, encode_sentence_es, mask, transformer, device, end_token, max_secuence_length).squeeze(0).cpu().numpy()
print(f"Encoded output: {encoded_output}")

decoded_output = decode_sentence(encoded_output, encoder.decode, max_secuence_length)
print(f"Decoded output: {decoded_output}")

Encoded output: [  189 74426 56060 22180   409   409 93075 93075   329   329  2158   190]
Decoded output:  getModel Prayer ale de de Restrictions Restrictionsadadftware


### Step 917

Cargamos los pesos

In [None]:
weights = "model/transformer_model_917_2839426.pth"
transformer = torch.load(weights, map_location='cpu')

if isinstance(transformer, nn.DataParallel):
    print("DataParallel")
    transformer = transformer.module

DataParallel


In [None]:
encoded_output = get_target_sentence(encode_sentence_en, encode_sentence_es, mask, transformer, device, end_token, max_secuence_length).squeeze(0).cpu().numpy()
print(f"Encoded output: {encoded_output}")

decoded_output = decode_sentence(encoded_output, encoder.decode, max_secuence_length)
print(f"Decoded output: {decoded_output}")

Encoded output: [  189 74426   300   409    11    11  1832   329   329   329  2158   190]
Decoded output:  getModelas de,,óadadadftware


### Step 937

Cargamos los pesos

In [13]:
weights = "model/transformer_model_937_2937746.pth"
transformer = torch.load(weights, map_location='cpu')

if isinstance(transformer, nn.DataParallel):
    print("DataParallel")
    transformer = transformer.module

DataParallel


In [14]:
encoded_output = get_target_sentence(encode_sentence_en, encode_sentence_es, mask, transformer, device, end_token, max_secuence_length).squeeze(0).cpu().numpy()
print(f"Encoded output: {encoded_output}")

decoded_output = decode_sentence(encoded_output, encoder.decode, max_secuence_length)
print(f"Decoded output: {decoded_output}")

Encoded output: [  189 74426   735   409   409   409  1832  9779   329   329  1832   190]
Decoded output:  getModel K de de deóaceradadó
