In [1]:
from transformers import AutoTokenizer
from diffusers import DDPMScheduler
import numpy as np
import matplotlib.pyplot as plt
from modeling_diffbert import DiffBertForDiffusion
from configuration_diffbert import DiffBertConfig
import torch
import inspect
from typing import Any, Callable, Dict, List, Optional, Union
from tqdm.auto import tqdm



    

    
# model(inputs_embeds=inputs_embeds, timesteps=timesteps).logits.shape

In [2]:
tokenizer = AutoTokenizer.from_pretrained("neuralmind/bert-base-portuguese-cased")
scheduler = DDPMScheduler()
model = DiffBertForDiffusion.from_pretrained("diffbert-mini-trained").to("cuda")
device = model.device
embedding = torch.nn.Embedding(model.config.vocab_size, model.config.hidden_size).to(device)
embedding.load_state_dict(torch.load('diffbert-mini/embedding_weights.bin'))

<All keys matched successfully>

In [3]:

def retrieve_timesteps(
    scheduler,
    num_inference_steps: Optional[int] = None,
    device: Optional[Union[str, torch.device]] = None,
    timesteps: Optional[List[int]] = None,
    **kwargs,
):
    """
    Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
    custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.

    Args:
        scheduler (`SchedulerMixin`):
            The scheduler to get timesteps from.
        num_inference_steps (`int`):
            The number of diffusion steps used when generating samples with a pre-trained model. If used,
            `timesteps` must be `None`.
        device (`str` or `torch.device`, *optional*):
            The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
        timesteps (`List[int]`, *optional*):
                Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default
                timestep spacing strategy of the scheduler is used. If `timesteps` is passed, `num_inference_steps`
                must be `None`.

    Returns:
        `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
        second element is the number of inference steps.
    """
    if timesteps is not None:
        accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
        if not accepts_timesteps:
            raise ValueError(
                f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
                f" timestep schedules. Please check whether you are using the correct scheduler."
            )
        scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
        timesteps = scheduler.timesteps
        num_inference_steps = len(timesteps)
    else:
        scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
        timesteps = scheduler.timesteps
    return timesteps, num_inference_steps

def id_to_one_hot(token_ids, vocab_size=tokenizer.vocab_size):
    one_hot_vectors = []
    for token_id in token_ids:
        # Create a zero-filled array with length equal to vocab_size
        one_hot = torch.zeros(vocab_size)
        # Set the value at the index of the token ID to 1
        one_hot[token_id] = 1
        one_hot_vectors.append(one_hot)
    return torch.stack(one_hot_vectors, dim=0)

def get_max_indices(list_of_tensors):
    max_indices = []
    for tensor in list_of_tensors:
        # Get the index of the maximum value in the tensor
        index = torch.argmax(tensor).item()
        max_indices.append(index)
    return max_indices
# Function to transform vectors back to indices
def vectors_to_indices(vectors, embedding):
    # Calculate cosine similarity between vectors and all embedding weights
    similarity = torch.matmul(vectors, embedding.weight.T)
    
    # Get the index of the most similar embedding for each vector
    indices = torch.argmax(similarity, dim=1)
    
    return indices

In [10]:




latents = torch.rand((1, 64, 768), device=device)
print(latents)
num_inference_steps = 1000
timesteps=None#[999, 500, 1]
timesteps, num_inference_steps = retrieve_timesteps(scheduler, num_inference_steps, device, timesteps)
print(timesteps)
for i, t in tqdm(enumerate(timesteps)):
    # expand the latents if we are doing classifier free guidance
    latent_model_input =  latents
    latent_model_input = scheduler.scale_model_input(latent_model_input, t)
    # predict the noise residual
    noise_pred = model(
        inputs_embeds=latent_model_input,
        timesteps=t.reshape(1,).to(device),
        # encoder_hidden_states=prompt_embeds,
        # timestep_cond=timestep_cond,
        # cross_attention_kwargs=self.cross_attention_kwargs,
        # added_cond_kwargs=added_cond_kwargs,
        # return_dict=False,
    ).logits



    # compute the previous noisy sample x_t -> x_t-1
    latents = scheduler.step(noise_pred, t, latents, return_dict=False)[0]

tensor([[[0.7254, 0.0372, 0.0386,  ..., 0.9575, 0.1290, 0.6060],
         [0.7589, 0.4363, 0.1371,  ..., 0.2471, 0.7065, 0.2323],
         [0.9780, 0.7494, 0.5051,  ..., 0.9611, 0.1641, 0.0235],
         ...,
         [0.1705, 0.9530, 0.2505,  ..., 0.8179, 0.4565, 0.7365],
         [0.8882, 0.5730, 0.7132,  ..., 0.9168, 0.2951, 0.6713],
         [0.0859, 0.0052, 0.2802,  ..., 0.0771, 0.6213, 0.9036]]],
       device='cuda:0')
tensor([999, 998, 997, 996, 995, 994, 993, 992, 991, 990, 989, 988, 987, 986,
        985, 984, 983, 982, 981, 980, 979, 978, 977, 976, 975, 974, 973, 972,
        971, 970, 969, 968, 967, 966, 965, 964, 963, 962, 961, 960, 959, 958,
        957, 956, 955, 954, 953, 952, 951, 950, 949, 948, 947, 946, 945, 944,
        943, 942, 941, 940, 939, 938, 937, 936, 935, 934, 933, 932, 931, 930,
        929, 928, 927, 926, 925, 924, 923, 922, 921, 920, 919, 918, 917, 916,
        915, 914, 913, 912, 911, 910, 909, 908, 907, 906, 905, 904, 903, 902,
        901, 900, 899, 8

0it [00:00, ?it/s]

In [11]:
latents

tensor([[[-0.5556,  0.2821,  0.9652,  ..., -0.1067, -0.1024,  0.1055],
         [ 0.2921,  0.1844, -0.2657,  ...,  0.5672,  1.0000,  0.3432],
         [ 0.4761, -0.1221, -0.2770,  ...,  0.7733,  0.6842,  0.9755],
         ...,
         [-0.9979, -0.1217,  0.2603,  ...,  0.4429,  0.7443,  0.7620],
         [-1.0000, -0.4854,  0.2244,  ..., -0.5563,  0.5207,  0.4344],
         [-0.7882, -0.3605,  0.9690,  ...,  0.3023,  0.1312,  0.9118]]],
       device='cuda:0', grad_fn=<AddBackward0>)

In [12]:
print(vectors_to_indices(latents[0], embedding))
print(tokenizer.decode(vectors_to_indices(latents[0], embedding)))

tensor([17597, 20467, 15288,  1202, 21304,  2211,  2782,  9505, 21557,   123,
        12680,  9440,   898,  6662, 23797,  1439,  9078,  4707, 10455,  3991,
          383,   117,  3752,  9189,   117, 21015,  1422,  6660,   117, 20318,
          770,   117, 10970,   128,   117,  1695, 14066, 13962,  5220,   117,
          117,   117, 11489,  3978,  7040, 13061,   117,  6855,   301,  9520,
        12306,   117, 15365,  2176,  2177,   122,  3702, 18358,   117,  4707,
        18775,   225,  1887,  2428], device='cuda:0')
pás AbrahamPP aleuar profissaugu peranteUT a Rico Guin si Windows็ min pred fundo Libertadores tentativaec, inferób, Giro Sul sobera,bero já, buraos, pouco efetivo esportiva nós,,, notic Men acel homônimo, frequentvechtmail,indeãosculo e equipes Cis, fundoicargu 〉 revista


In [1]:
import torch
import torch.nn as nn

# Define the size of vocabulary and embedding dimension
vocab_size = 100  # Example vocabulary size
embedding_dim = 50  # Example embedding dimension size

# Instantiate nn.Embedding module
embedding = nn.Embedding(vocab_size, embedding_dim)

# Random indices for demonstration purposes
indices = torch.tensor([3, 7, 15])  # Example input indices

# Convert indices to vectors using the embedding layer
vectors = embedding(indices)

# Display the vectors corresponding to the input indices
print("Vectors corresponding to input indices:")
print(vectors)

# Function to transform vectors back to indices
def vectors_to_indices(vectors, embedding):
    # Calculate cosine similarity between vectors and all embedding weights
    similarity = torch.matmul(vectors, embedding.weight.T)
    
    # Get the index of the most similar embedding for each vector
    indices = torch.argmax(similarity, dim=1)
    
    return indices

# Convert vectors back to indices
recovered_indices = vectors_to_indices(vectors, embedding)

# Display the indices recovered from vectors
print("\nRecovered indices from vectors:")
print(recovered_indices)

Vectors corresponding to input indices:
tensor([[-7.7292e-02,  1.1234e+00, -1.1162e+00,  9.5290e-01,  7.0411e-01,
          9.3934e-02,  1.2905e-01, -6.1421e-01, -4.7354e-01,  1.8669e+00,
          1.3230e+00,  7.4839e-01,  3.6166e-01, -7.6501e-01, -3.1029e-01,
         -1.3262e+00, -1.2330e+00, -2.1209e-01, -1.2452e+00,  6.3154e-01,
         -4.2177e-01, -6.7838e-01,  1.8145e-01, -2.4687e-01,  4.7213e-01,
          2.9644e-01,  5.5261e-01, -1.4998e+00, -3.2089e-01,  1.9922e+00,
         -2.7300e-01, -1.3218e+00, -2.0146e-01,  1.8222e-02, -1.4948e+00,
         -5.4760e-01, -3.8630e-01, -6.9837e-01, -1.0270e-01,  8.3724e-01,
         -6.1612e-02, -1.1182e+00,  3.0394e+00, -2.8233e-01,  7.6667e-01,
         -2.0013e-01,  1.4309e+00, -4.0717e-01, -7.3446e-01,  8.6851e-02],
        [-8.8197e-01, -8.1627e-01, -9.9473e-01, -2.0596e-01, -4.3363e-01,
         -1.3574e+00,  8.7575e-01,  4.4570e-02,  6.7288e-01, -8.9306e-01,
         -5.2451e-01,  6.8276e-02,  9.4779e-01,  8.5183e-01,  1.8238e+0