In [1]:
!nvidia-smi

Tue Jun 11 15:49:58 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.161.08             Driver Version: 535.161.08   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA RTX A6000               Off | 00000000:1A:00.0 Off |                  Off |
| 69%   86C    P2             296W / 300W |  44505MiB / 49140MiB |    100%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA RTX A6000               Off | 00000000:68:00.0 Off |  

In [None]:
%pip -q install git+https://github.com/huggingface/peft transformers datasets

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, default_data_collator, get_linear_schedule_with_warmup
from peft import get_peft_config, get_peft_model, PromptTuningInit, PromptTuningConfig, TaskType, PeftType, PeftModel, PeftConfig
import torch
from datasets import load_dataset
import os
from torch.utils.data import DataLoader
from tqdm import tqdm

peft_model_id = "./prompt_tuning/wn18rr/PT/template-8/bloom_1b7"

config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
model = PeftModel.from_pretrained(model, peft_model_id)
#print(model)
final_prompt_embeddings = model.prompt_encoder["default"].embedding.weight.detach().clone()
#device = "cuda:0"

In [None]:
inputs = tokenizer(
    f"Perform a sentence completion on the following sentence: seashore. 'seashore' part of speech is a ___.\nThe answer is",
    return_tensors="pt",
)
print(inputs.input_ids)

In [None]:
model.to(device)
#model.eval()

with torch.no_grad():
    inputs = {k: v.to(device) for k, v in inputs.items()}
    outputs = model.generate(
        input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], max_new_tokens=10, eos_token_id=3
    )
    print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True))

In [2]:
peft_config = PromptTuningConfig(
    task_type=TaskType.CAUSAL_LM,
    prompt_tuning_init=PromptTuningInit.TEXT,
    num_virtual_tokens=15,
    prompt_tuning_init_text="Classify part of speech of given word in the sentence to noun, verb, adjective or adverb",
    tokenizer_name_or_path="bigscience/bloom-1b7",
)

model = AutoModelForCausalLM.from_pretrained("bigscience/bloom-1b7")
tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-1b7")
model = get_peft_model(model, peft_config)

initial_prompt_embeddings = model.prompt_encoder["default"].embedding.weight.detach().clone()

In [3]:
print(initial_prompt_embeddings)
print(final_prompt_embeddings)

tensor([[ 0.0035, -0.0311, -0.0123,  ..., -0.0041,  0.0064,  0.0044],
        [ 0.0231, -0.0135,  0.0042,  ..., -0.0420,  0.0073, -0.0134],
        [-0.0067,  0.0186,  0.0099,  ...,  0.0083,  0.0137,  0.0059],
        ...,
        [ 0.0142,  0.0210,  0.0028,  ..., -0.0022, -0.0075,  0.0117],
        [ 0.0036, -0.0082, -0.0080,  ...,  0.0016,  0.0066,  0.0115],
        [-0.0129,  0.0167, -0.0007,  ..., -0.0056,  0.0057,  0.0127]])
tensor([[-0.0545, -0.3119,  0.0728,  ...,  0.0219, -0.2641, -0.1139],
        [ 0.3051, -0.4491,  0.1069,  ..., -0.2051,  0.0627, -0.2501],
        [-0.4285,  0.4684,  0.0269,  ...,  0.4216,  0.3620,  0.1785],
        ...,
        [ 0.6141,  0.5778, -0.2369,  ...,  0.3119, -0.1957,  0.1479],
        [ 0.2356,  0.1989, -0.0877,  ..., -0.1797, -0.0481, -0.0640],
        [-0.0164,  0.3942, -0.1560,  ...,  0.1617,  0.7052,  0.1235]])


In [4]:
embedding_difference = torch.norm(initial_prompt_embeddings - final_prompt_embeddings, dim=-1)

# Print the differences for each virtual token
print("Differences between initial and final prompt embeddings:", embedding_difference)

Differences between initial and final prompt embeddings: tensor([12.2748, 11.9375, 12.1711, 13.1558, 12.3087, 12.7102, 12.3118, 15.2285,
        12.1644, 12.3274, 13.8013, 11.8667, 13.0428, 12.7000, 11.8865])


In [5]:
from sklearn.metrics.pairwise import cosine_similarity

def closest_words(embedding, tokenizer, model, top_k=5):
    vocab_size = model.config.vocab_size
    word_embeddings = model.get_input_embeddings().weight.detach().cpu().numpy()
    embedding = embedding.cpu().numpy()
    similarities = cosine_similarity(embedding.reshape(1, -1), word_embeddings).flatten()
    closest_indices = similarities.argsort()[-top_k:][::-1]
    closest_tokens = [tokenizer.decode([idx]) for idx in closest_indices]
    return closest_tokens

# Compare initial and final embeddings by finding the closest words
for i, (initial_emb, final_emb) in enumerate(zip(initial_prompt_embeddings, final_prompt_embeddings)):
    initial_closest_words = closest_words(initial_emb, tokenizer, model)
    final_closest_words = closest_words(final_emb, tokenizer, model)
    print(f"Token {i}:")
    print(f"  Initial closest words: {initial_closest_words}")
    print(f"  Final closest words: {final_closest_words}")

Token 0:
  Initial closest words: ['Class', ' Class', ' class', '_class', 'class']
  Final closest words: [' SIP', ' praperadilan', ' ঘুর', 'gen', ' దల']
Token 1:
  Initial closest words: ['ify', 'ified', 'ifies', 'ifying', 'IFY']
  Final closest words: [">'", "_id'", 'sprintf', ' sentència', ' sprintf']
Token 2:
  Initial closest words: [' part', ' parte', ' parts', 'part', 'Part']
  Final closest words: [' parts', 'parts', ' part', ' Parts', '-part']
Token 3:
  Initial closest words: [' of', ',', ' de', ' and', '.']
  Final closest words: [' of', ' của', ' स', ' पर', ' के']
Token 4:
  Initial closest words: [' speech', ' Speech', ' speeches', ' discurso', 'spe']
  Final closest words: ['\n                  ', '\n          ', ' Bridge', ' साढ', ' ungg']
Token 5:
  Initial closest words: [' of', ',', ' de', ' and', '.']
  Final closest words: [' من', ' أن', ' of', ' của', '于']
Token 6:
  Initial closest words: [' given', 'given', ' Given', ' dado', ' donné']
  Final closest words: ['22

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, default_data_collator, get_linear_schedule_with_warmup
from peft import get_peft_config, get_peft_model, PromptTuningInit, PromptTuningConfig, TaskType, PeftType, PeftModel, PeftConfig
import torch
from datasets import load_dataset
import os
from torch.utils.data import DataLoader
from tqdm import tqdm

# peft_model_id = "./prompt_tuning/wn18rr/PT/template-4/llama3_chat"

# config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
# model = PeftModel.from_pretrained(model, peft_model_id)
device = "cuda:1"

inputs = tokenizer(
    f"Perform a sentence completion on the following sentence: cover her face with a handkerchief. 'cover' part of speech is a ___. The answer is",
    return_tensors="pt",
)
print(inputs.input_ids)

model.to(device)

with torch.no_grad():
    inputs = {k: v.to(device) for k, v in inputs.items()}
    outputs = model.generate(
        input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], max_new_tokens=10
    )
    print(outputs)
    print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=False))

In [None]:
import torch
torch.cuda.empty_cache()

In [None]:
!CUDA_VISIBLE_DEVICES=1 python3 prepare_prompt_tuning.py --kb_name=wn18rr --model_name=bloom_1b7 --template=template-3 --device=cuda --soft_prompt=PT --virtual_token=15 --train_size=0.15

In [None]:
!CUDA_VISIBLE_DEVICES=1 python3 test.py --kb_name=geonames --model_name=bloom_1b7 --template=template-4 --device=cuda

In [None]:
!CUDA_VISIBLE_DEVICES=0 python3 evaluator.py --kb_name=wn18rr --model=bloom_1b7 --template=template-4