**1. IMPORTAZIONE LIBRERIE**

In [1]:
import vec2text
import torch
from transformers import AutoModel, AutoTokenizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.translate.bleu_score import sentence_bleu
import numpy as np
import warnings

# Sopprimiamo i warning
warnings.filterwarnings("ignore")


**2. FUNZIONI PER OTTENERE EMBEDDING E QUANTIZZAZIONI**

In [2]:
# Funzione per ottenere embedding utilizzando un modello GTR (Generalized T5 Retrieval)
def get_gtr_embeddings(text_list, encoder, tokenizer):
    inputs = tokenizer(
        text_list,
        return_tensors="pt",
        max_length=128,
        truncation=True,
        padding="max_length"
    ).to("cuda")

    with torch.no_grad():
        model_output = encoder(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'])
        hidden_state = model_output.last_hidden_state
        embeddings = vec2text.models.model_utils.mean_pool(hidden_state, inputs['attention_mask'])

    return embeddings

# Funzione per quantizzare gli embedding in torch.uint8
def quantize_embeddings(embeddings, bits=8):
    scale = 2 ** bits - 1
    embeddings_min = embeddings.min(dim=-1, keepdim=True).values
    embeddings_max = embeddings.max(dim=-1, keepdim=True).values
    quantized = ((embeddings - embeddings_min) / (embeddings_max - embeddings_min)) * scale
    quantized = quantized.round().to(torch.uint8)  # Convertiamo a uint8
    return quantized, embeddings_min, embeddings_max

# Funzione per de-quantizzare gli embeddings
def dequantize_embeddings(quantized, embeddings_min, embeddings_max, bits=8):
    scale = 2 ** bits - 1
    quantized = quantized.to(torch.float32)  # Convertiamo a float32 per operazioni
    dequantized = (quantized / scale) * (embeddings_max - embeddings_min) + embeddings_min
    return dequantized

# Funzione per calcolare la percentuale di compressione
def calculate_compression_percentage(original_embeddings, quantized_embeddings):
    original_size = original_embeddings.numel() * original_embeddings.element_size()
    quantized_size = quantized_embeddings.numel() * quantized_embeddings.element_size()
    compression_ratio = (original_size - quantized_size) / original_size * 100
    return compression_ratio

**3. CARICAMENTO MODELLI**

In [3]:
# Carica il modello di encoder GTR e il tokenizer
encoder = AutoModel.from_pretrained("sentence-transformers/gtr-t5-base").encoder.to("cuda")
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/gtr-t5-base")

# Carica il modello di corrector preaddestrato
corrector = vec2text.load_pretrained_corrector("gtr-base")

Some weights of T5Model were not initialized from the model checkpoint at sentence-transformers/gtr-t5-base and are newly initialized: ['decoder.block.0.layer.0.SelfAttention.k.weight', 'decoder.block.0.layer.0.SelfAttention.o.weight', 'decoder.block.0.layer.0.SelfAttention.q.weight', 'decoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight', 'decoder.block.0.layer.0.SelfAttention.v.weight', 'decoder.block.0.layer.0.layer_norm.weight', 'decoder.block.0.layer.1.EncDecAttention.k.weight', 'decoder.block.0.layer.1.EncDecAttention.o.weight', 'decoder.block.0.layer.1.EncDecAttention.q.weight', 'decoder.block.0.layer.1.EncDecAttention.v.weight', 'decoder.block.0.layer.1.layer_norm.weight', 'decoder.block.0.layer.2.DenseReluDense.wi.weight', 'decoder.block.0.layer.2.DenseReluDense.wo.weight', 'decoder.block.0.layer.2.layer_norm.weight', 'decoder.block.1.layer.0.SelfAttention.k.weight', 'decoder.block.1.layer.0.SelfAttention.o.weight', 'decoder.block.1.layer.0.SelfAttention.q.weig

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

**4. TESTI ED EMBEDDINGS**

In [4]:
# Lista di frasi da convertire in embedding e poi invertire
text_list = [
    "My name is John Smith, I am 19 and a student in the college of New York. My favorite courses are Geometry and French.",
    "The conference will be held on September 15th, 2024, at the Hilton Hotel, with over 300 attendees expected.",
"The company reported a 12% increase in revenue for the second quarter, surpassing analysts' expectations.",
"The rocket launch is scheduled for 10:30 AM, with a payload consisting of scientific instruments and satellites."
]

# Ottieni gli embedding dalle frasi
embeddings = get_gtr_embeddings(text_list, encoder, tokenizer)


**5. ESECUZIONE OPERAZIONI**

In [5]:
# Applicare quantizzazione agli embedding
quantized_embeddings, embeddings_min, embeddings_max = quantize_embeddings(embeddings, bits=8)

# Dequantizzare gli embeddings per ricostruire il testo
dequantized_embeddings = dequantize_embeddings(quantized_embeddings, embeddings_min, embeddings_max, bits=8)

# Inversione degli embedding quantizzati per ricostruire il testo
inverted_texts = vec2text.invert_embeddings(
    embeddings=dequantized_embeddings.cuda(),
    corrector=corrector,
    num_steps=20,
    sequence_beam_width=4
)


**6. STAMPA RISULTATI**

In [6]:
# Stampa i risultati dell'inversione e la percentuale di compressione
for original, inverted, orig_emb, quant_emb in zip(text_list, inverted_texts, embeddings, quantized_embeddings):
    compression_percentage = calculate_compression_percentage(orig_emb, quant_emb)
    print(f"Original: {original}")
    print(f"Inverted (Quantized): {inverted}")
    print(f"Compression Percentage: {compression_percentage:.2f}%\n")

Original: My name is John Smith, I am 19 and a student in the college of New York. My favorite courses are Geometry and French.
Inverted (Quantized): My name is John Smith, I am 19 and a student in the college of New York. My favorite courses are Geometry and French. 
Compression Percentage: 75.00%

Original: The conference will be held on September 15th, 2024, at the Hilton Hotel, with over 300 attendees expected.
Inverted (Quantized): The conference will be held on September 15th, 2024, at the Hilton Hotel,    with over 300 attendees expected. 
Compression Percentage: 75.00%

Original: The company reported a 12% increase in revenue for the second quarter, surpassing analysts' expectations.
Inverted (Quantized): The company reported a     12% increase in revenue for the second quarter, surpassing analysts' expectations. 
Compression Percentage: 75.00%

Original: The rocket launch is scheduled for 10:30 AM, with a payload consisting of scientific instruments and satellites.
Inverted (Q

**8. VALUTAZIONE**

In [7]:
# Valutazione della qualità della ricostruzione
def calculate_cosine_similarity(original_texts, inverted_texts):
    original_embeddings = get_gtr_embeddings(original_texts, encoder, tokenizer)
    inverted_embeddings = get_gtr_embeddings(inverted_texts, encoder, tokenizer)
    
    # Calcola la similarità del coseno per ogni coppia di embedding
    cosine_similarities = []
    for orig_emb, inv_emb in zip(original_embeddings, inverted_embeddings):
        similarity = cosine_similarity(orig_emb.cpu().numpy().reshape(1, -1), inv_emb.cpu().numpy().reshape(1, -1))
        cosine_similarities.append(similarity[0][0])
    
    return np.mean(cosine_similarities)

def calculate_bleu_score(original_texts, inverted_texts):
    bleu_scores = []
    for orig, inv in zip(original_texts, inverted_texts):
        reference = orig.split()
        candidate = inv.split()
        bleu_score = sentence_bleu([reference], candidate)
        bleu_scores.append(bleu_score)
    
    return np.mean(bleu_scores)

# Calcola le metriche di valutazione
cosine_similarity_score = calculate_cosine_similarity(text_list, inverted_texts)
bleu_score = calculate_bleu_score(text_list, inverted_texts)

print(f"Average Cosine Similarity: {cosine_similarity_score}")
print(f"Average BLEU Score: {bleu_score}")

Average Cosine Similarity: 0.9999998807907104
Average BLEU Score: 1.0


VERIFICHE

In [14]:
# Verifica della differenza tra embeddings originali e quantizzati
def check_embedding_difference(original_embeddings, quantized_embeddings):
    difference = torch.abs(original_embeddings - quantized_embeddings).sum().item()
    return difference

difference = check_embedding_difference(embeddings, dequantized_embeddings)
print(f"Difference between original and quantized embeddings: {difference:.6f}")


Difference between original and quantized embeddings: 0.864703
