In [30]:
import vec2text
import torch
from transformers import AutoModel, AutoTokenizer

# Funzione per ottenere embedding utilizzando un modello GTR (Generalized T5 Retrieval)
def get_gtr_embeddings(text_list, encoder, tokenizer):
    # Preprocessa i testi per l'encoder
    inputs = tokenizer(
        text_list,
        return_tensors="pt",
        max_length=128,
        truncation=True,
        padding="max_length"
    ).to("cuda")

    # Ottieni gli embedding passando i testi attraverso l'encoder
    with torch.no_grad():
        model_output = encoder(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'])
        hidden_state = model_output.last_hidden_state
        embeddings = vec2text.models.model_utils.mean_pool(hidden_state, inputs['attention_mask'])

    return embeddings

# Carica il modello di encoder GTR e il tokenizer
encoder = AutoModel.from_pretrained("sentence-transformers/gtr-t5-base").encoder.to("cuda")
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/gtr-t5-base")

# Carica il modello di corrector preaddestrato
corrector = vec2text.load_pretrained_corrector("gtr-base")

# Lista di frasi da convertire in embedding e poi invertire
text_list = [
    "Jack Morris is a PhD student at Cornell Tech in New York City. He is a very good student and now he is the CEO of TESLA."]

# Ottieni gli embedding dalle frasi
embeddings = get_gtr_embeddings(text_list, encoder, tokenizer)

# Inversione degli embedding per ricostruire il testo
inverted_texts = vec2text.invert_embeddings(
    embeddings=embeddings.cuda(),
    corrector=corrector,
    num_steps=20,
    sequence_beam_width=4
)

# Stampa i risultati dell'inversione
for original, inverted in zip(text_list, inverted_texts):
    print(f"Original: {original}\nInverted: {inverted}\n")


Some weights of T5Model were not initialized from the model checkpoint at sentence-transformers/gtr-t5-base and are newly initialized: ['decoder.block.0.layer.0.SelfAttention.k.weight', 'decoder.block.0.layer.0.SelfAttention.o.weight', 'decoder.block.0.layer.0.SelfAttention.q.weight', 'decoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight', 'decoder.block.0.layer.0.SelfAttention.v.weight', 'decoder.block.0.layer.0.layer_norm.weight', 'decoder.block.0.layer.1.EncDecAttention.k.weight', 'decoder.block.0.layer.1.EncDecAttention.o.weight', 'decoder.block.0.layer.1.EncDecAttention.q.weight', 'decoder.block.0.layer.1.EncDecAttention.v.weight', 'decoder.block.0.layer.1.layer_norm.weight', 'decoder.block.0.layer.2.DenseReluDense.wi.weight', 'decoder.block.0.layer.2.DenseReluDense.wo.weight', 'decoder.block.0.layer.2.layer_norm.weight', 'decoder.block.1.layer.0.SelfAttention.k.weight', 'decoder.block.1.layer.0.SelfAttention.o.weight', 'decoder.block.1.layer.0.SelfAttention.q.weig

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

Original: Jack Morris is a PhD student at Cornell Tech in New York City. He is a very good student and now he is the CEO of TESLA.
Inverted: Jack Morris is a PhD student at Cornell Tech in New York City. He is a very good student and now the CEO of TESLA.

