In [1]:
import torch
import inspect
from typing import Any, Callable, Dict, List, Optional, Union
from tqdm.auto import tqdm
import numpy as np
import torch.nn.functional as F
import math

from transformers import AutoTokenizer, BertForMaskedLM
from diffusers import DDIMScheduler, DDPMScheduler, DPMSolverMultistepScheduler
import numpy as np
import matplotlib.pyplot as plt

from src.modeling_diffbert_sample import DiffBertForDiffusion
from src.modeling_diffllama import DiffLlamaForDiffusionLM
from src.configuration_diffbert import DiffBertConfig

    

    
# model(inputs_embeds=inputs_embeds, timesteps=timesteps).logits.shape

In [2]:
tokenizer = AutoTokenizer.from_pretrained("models/diffllama-mini-sample")
tokenizer.add_special_tokens({'pad_token': '<pad>'})
scheduler = DDPMScheduler.from_pretrained("models/diffllama-mini-sample")#DDIMScheduler(prediction_type="sample", num_train_timesteps=2000)
model = DiffLlamaForDiffusionLM.from_pretrained("models/diffllama-mini-sample-trained-prompts", torch_dtype=torch.float16).to("cuda")
device = model.device
# embedding = BertForMaskedLM.from_pretrained("neuralmind/bert-base-portuguese-cased").to(device)#torch.nn.Embedding(model.config.vocab_size, model.config.hidden_size).to(device)
# embedding.load_state_dict(torch.load('diffbert-mini/embedding_weights.bin'))

In [3]:
scheduler = DDPMScheduler(prediction_type="sample", num_train_timesteps=2500)


In [4]:

def retrieve_timesteps(
    scheduler,
    num_inference_steps: Optional[int] = None,
    device: Optional[Union[str, torch.device]] = None,
    timesteps: Optional[List[int]] = None,
    **kwargs,
):
    """
    Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
    custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.

    Args:
        scheduler (`SchedulerMixin`):
            The scheduler to get timesteps from.
        num_inference_steps (`int`):
            The number of diffusion steps used when generating samples with a pre-trained model. If used,
            `timesteps` must be `None`.
        device (`str` or `torch.device`, *optional*):
            The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
        timesteps (`List[int]`, *optional*):
                Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default
                timestep spacing strategy of the scheduler is used. If `timesteps` is passed, `num_inference_steps`
                must be `None`.

    Returns:
        `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
        second element is the number of inference steps.
    """
    if timesteps is not None:
        accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
        if not accepts_timesteps:
            raise ValueError(
                f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
                f" timestep schedules. Please check whether you are using the correct scheduler."
            )
        scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
        timesteps = scheduler.timesteps
        num_inference_steps = len(timesteps)
    else:
        scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
        timesteps = scheduler.timesteps
    return timesteps, num_inference_steps

def get_timesteps(num_inference_steps, strength, device):
        # get the original timestep using init_timestep
        init_timestep = min(int(num_inference_steps * strength), num_inference_steps)

        t_start = max(num_inference_steps - init_timestep, 0)
        timesteps = scheduler.timesteps[t_start * scheduler.order :]

        return timesteps, num_inference_steps - t_start

def id_to_one_hot(token_ids, vocab_size=tokenizer.vocab_size):
    one_hot_vectors = []
    for token_id in token_ids:
        # Create a zero-filled array with length equal to vocab_size
        one_hot = torch.zeros(vocab_size)
        # Set the value at the index of the token ID to 1
        one_hot[token_id] = 1
        one_hot_vectors.append(one_hot)
    return torch.stack(one_hot_vectors, dim=0)

def get_max_indices(list_of_tensors):
    max_indices = []
    for tensor in list_of_tensors:
        # Get the index of the maximum value in the tensor
        index = torch.argmax(tensor).item()
        max_indices.append(index)
    return max_indices
# Function to transform vectors back to indices
def vectors_to_indices(vectors):
    # Calculate cosine similarity between vectors and all embedding weights
    # similarity = torch.matmul(vectors, embedding.weight.T)
    
    # Get the index of the most similar embedding for each vector
    indices = torch.argmax(vectors, dim=-1)
    
    return indices
def sample_text(probabilities, temperature=1.0):
    batch_size, seq_len, vocab_size = probabilities.size()
    flattened_probs = probabilities.view(batch_size * seq_len, -1)
    
    scaled_logits = flattened_probs / temperature
    scaled_probs = F.softmax(scaled_logits, dim=-1)
    
    sampled_indices = torch.multinomial(scaled_probs, 1)
    sampled_token_ids = sampled_indices.view(batch_size, seq_len)
    
    return sampled_token_ids

In [5]:




with torch.no_grad():
    latents = torch.rand((8, 64, 4096), device=device).to(torch.float16)
    attention_mask = torch.ones((8, 64), device=device)
    # print(latents)
    num_inference_steps = 2000
    # strength=0.1
    timesteps=None#[999, 500, 1]
    timesteps, num_inference_steps = retrieve_timesteps(scheduler, num_inference_steps, device, timesteps)
    # timesteps, num_inference_steps = get_timesteps(num_inference_steps, strength, device)
    # print(timesteps)
    for i, t in tqdm(enumerate(timesteps)):
        # expand the latents if we are doing classifier free guidance
        latent_model_input =  latents
        latent_model_input = scheduler.scale_model_input(latent_model_input, t)
        rnd_latents = torch.rand((1, 64, 4096), device=device).to(torch.float16)
        # latent_model_input += latents/(1+2*i)
        # predict the noise residual
        # print(t)
        outputs = model(
            input_embeds=latent_model_input,
            timesteps=t.reshape(1,).long().to(device),
            attention_mask=attention_mask
            # encoder_hidden_states=prompt_embeds,
            # timestep_cond=timestep_cond,
            # cross_attention_kwargs=self.cross_attention_kwargs,
            # added_cond_kwargs=added_cond_kwargs,
            # return_dict=False,
        )
        noise_pred = outputs.last_hidden_state
        latents_final = outputs.logits
        # print(tokenizer.decode(sample_text(latents_final, .81)[0]))
        for n in range(latents_final.shape[0]):
            print(tokenizer.decode(vectors_to_indices(latents_final[n]), skip_special_tokens=True))
        print("---------------")



        # compute the previous noisy sample x_t -> x_t-1
        # print(scheduler.step(noise_pred, t, latents, return_dict=True))
        step = scheduler.step(noise_pred, t, latents, return_dict=True)#[0]
        latents = step["prev_sample"]
        # latents -= (noise_pred-latents)/num_inference_steps
        # latents_final = step["pred_original_sample"]

0it [00:00, ?it/s]

a a,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
a,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
a,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
a a,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
a of,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
---------------
a of,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
a,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
a a,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
a,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
a of,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
a of of a,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
a a,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,

In [52]:
noise_pred

tensor([[[-0.2250, -0.3081,  0.6484,  ...,  0.9854,  0.7769, -1.5811],
         [ 0.0786, -0.6016,  1.6348,  ..., -1.0537,  0.3479, -1.1104],
         [ 1.4375, -0.3169, -1.3896,  ..., -0.0406,  0.0459,  0.0362],
         ...,
         [-0.6318,  0.7417, -0.8677,  ..., -0.1237, -1.6895, -1.1328],
         [-0.6343,  0.7358, -0.8711,  ..., -0.1194, -1.6934, -1.1348],
         [-0.6255,  0.7319, -0.8838,  ..., -0.1175, -1.7012, -1.1348]]],
       device='cuda:0', dtype=torch.float16)

In [95]:

top_k_values, top_k_indices = torch.topk(latents_final[0][0], k=5)
print(top_k_values)
print(top_k_indices[0])
tokenizer.decode(top_k_indices)

tensor([9.9941e-01, 1.3838e-05, 7.5826e-06, 6.8771e-06, 5.4615e-06],
       device='cuda:0')
tensor(1, device='cuda:0')


'<s> impress har du L'

In [42]:
latents_final = torch.softmax(latents_final, dim=-1)

In [43]:
latents_final

tensor([[[5.4790e-10, 9.9929e-01, 9.7483e-10,  ..., 1.1442e-09,
          1.0479e-09, 1.6914e-08],
         [1.6510e-09, 3.3481e-09, 3.0664e-09,  ..., 1.8929e-09,
          3.8461e-09, 2.2680e-08],
         [1.2597e-09, 8.4278e-09, 5.3445e-10,  ..., 1.2056e-09,
          1.4814e-09, 1.1261e-08],
         ...,
         [3.8036e-11, 3.1856e-08, 5.1786e-11,  ..., 3.2917e-11,
          6.5977e-11, 6.2433e-08],
         [4.9039e-10, 1.6812e-08, 6.6376e-10,  ..., 9.0372e-10,
          1.0422e-09, 1.2409e-08],
         [4.1336e-16, 8.3412e-10, 5.6499e-16,  ..., 1.2244e-15,
          1.2055e-15, 2.5504e-06]]], device='cuda:0')

In [91]:
print(vectors_to_indices(latents_final[0]))
# print(tokenizer.decode(vectors_to_indices(latents[0], embedding)))
print(tokenizer.decode(sample_text(latents_final, .09)[0]))
print(tokenizer.decode(vectors_to_indices(latents_final[0])))



tensor([    1,   385, 11158,  9593, 13173,   411,   263,  3700,   327,   322,
          270,   293,  6964, 29892,  8494, 29871, 29892,   491, 10090,  2690,
        29892,   273,  2931, 29892, 15301, 29892,   364,   265,   298,  3573,
        29892, 15301,  8569, 29892,  6964,  1616, 29892, 13436,  8632,   362,
        29892,  6964,  1616, 29892,  6964,   371, 29892, 15301,  2654,   413,
        29872, 29892,   534,  2548,   373,  1616, 19569, 29892,   534,  2548,
          373,  1616, 19569, 29892], device='cuda:0')
tant an intricate detailed Zeit a faceot and dic conceptчень ult injured, by wallibli,an characterттON, ron h body, sharp focus, anni art, digital illustrationSPointer André, concept By,かsockутe, Bishopendingui artstation¡ low approximately mental shoulderstation,
<s> an intricate detailed with a faceot and dic concept, ult , by wallibli,an character, sharp, ron h body, sharp focus, concept art, digital illustration, concept art, conceptte, sharp red ke, trending on artstat

In [15]:
tokenizer.add_special_tokens({'pad_token': '<pad>'})

1

In [20]:

with torch.no_grad():
    prompt = "A portrait of a beautiful woman in a pink dress, trending on artstation, highly detailed"
    input_ids = tokenizer(prompt, padding="max_length", max_length=64, return_tensors="pt").to("cuda")
    latents = model.apply_embeddings(input_ids.input_ids)
    noise = torch.rand_like(latents)
    strength = 0.6
    num_inference_steps = 1000
    timesteps=None#[999, 500, 1]
    # timesteps, num_inference_steps = retrieve_timesteps(scheduler, num_inference_steps, device, timesteps)
    print(latents)
    # print(noisy_latents)

    
    # timesteps=None#[999, 500, 1]
    # noise_timesteps = torch.tensor([num_inference_steps/2000], device="cuda").long()

    timesteps, num_inference_steps = retrieve_timesteps(scheduler, num_inference_steps, device, timesteps)
    timesteps, num_inference_steps = get_timesteps(num_inference_steps, strength, device)
    noisy_latents = scheduler.add_noise(latents, noise, timesteps)[0].reshape(latents.shape)
    print(noise.shape)
    print(latents.shape)
    print(noisy_latents.shape)
    # timesteps, num_inference_steps = get_timesteps(num_inference_steps, strength, device)
    # print(timesteps)
    for i, t in tqdm(enumerate(timesteps)):
        # expand the latents if we are doing classifier free guidance
        latent_model_input =  noisy_latents
        latent_model_input = scheduler.scale_model_input(latent_model_input, t)
        # predict the noise residual
        outputs = model(
            input_embeds=latent_model_input,
            timesteps=t.reshape(1,).to(device),
            attention_mask = input_ids.attention_mask
            # encoder_hidden_states=prompt_embeds,
            # timestep_cond=timestep_cond,
            # cross_attention_kwargs=self.cross_attention_kwargs,
            # added_cond_kwargs=added_cond_kwargs,
            # return_dict=False,
        )
        noise_pred = outputs.last_hidden_state
        latents_final = outputs.logits

        print(tokenizer.decode(vectors_to_indices(latents_final[0], embedding)))

        # compute the previous noisy sample x_t -> x_t-1
        # print(scheduler.step(noise_pred, t, latents, return_dict=True))
        step = scheduler.step(noise_pred, t, noisy_latents, return_dict=True)#[0]
        noisy_latents = step["prev_sample"]
        # latents -= (noise_pred-latents)/num_inference_steps

tensor([[[-0.2471, -0.3223,  0.7031,  ...,  0.8828,  0.8906, -1.5000],
         [ 0.2773, -0.6055,  0.0669,  ...,  1.0234,  1.7734, -0.0840],
         [ 0.6680, -0.0908, -1.2188,  ..., -0.9922, -0.7031,  0.4980],
         ...,
         [-0.6562,  0.7734, -0.9219,  ..., -0.0466, -1.6172, -1.1406],
         [-0.6562,  0.7734, -0.9219,  ..., -0.0466, -1.6172, -1.1406],
         [-0.6562,  0.7734, -0.9219,  ..., -0.0466, -1.6172, -1.1406]]],
       device='cuda:0', dtype=torch.float16)
torch.Size([1, 64, 4096])
torch.Size([1, 64, 4096])
torch.Size([1, 64, 4096])


0it [00:00, ?it/s]

<s>,, of,,,,,,ink painting, trending on artstation, highly detailed,,,,,,<pad>,,,,<pad>,<pad>,,<pad>, k,<pad>,<pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
<s>,, of,,,, a,ink painting, trending on artstation,  detailed,ang<pad>,,,<pad>,,<pad>,<pad><pad><pad><pad>,<pad><pad><pad><pad><pad>,,<pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
<s>,, of,,,, cin,ink art, trending on artstation,  detailedangang<pad>,,<pad><pad>,,<pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>,<pad>
<s>,, of,,, in cin,ink detailed, trending on artstation, highly detailedangang<pad><pad>,<pad><pad>,,<pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
<s> A, of,,, in cin,ink b

In [9]:
# latents_final = torch.softmax(latents_final, dim=-1)
# latents_final

In [16]:
print(vectors_to_indices(latents_final[0], embedding))
# print(tokenizer.decode(vectors_to_indices(latents[0], embedding)))
print(tokenizer.decode(sample_text(latents_final, .1)[0]))
print(tokenizer.decode(vectors_to_indices(latents_final[0], embedding)))

tensor([ 2207,  2414,  1010,  2563,  1006, 13229,  1007,  1011,  1011,  1000,
         2031,  2732,  4303,  2579,  4633,  2102,  2000,  1037,  2117,  7938,
         2867,  2585,  1006,  1002,  9295,  1012,  1015,  2454,  1007,  2579,
         2004,  2002,  2323,  2324,  2006,  6928,  2117], device='cuda:0')
released london, england ( cnn ) - - " have star doors taken weather got to a second kenya players david ( $ chelsea. 1 million ) robbery as he should 18 on monday second
released london, england ( cnn ) - - " have star doors taken weathert to a second kenya players david ( $ chelsea. 1 million ) taken as he should 18 on monday second
