# Huggingface Test

In [6]:
from transformers import AutoTokenizer
import transformers
import torch

model = "meta-llama/Llama-2-7b-chat-hf"

tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    torch_dtype=torch.float16,
    device_map="auto",
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
sequences = pipeline(
    'I liked "Breaking Bad" and "Band of Brothers". Do you have any recommendations of other shows I might like?\n',
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    max_length=200,
)
for seq in sequences:
    print(f"Result: {seq['generated_text']}")

In [None]:
sequences = pipeline(
    'I liked "Breaking Bad" and "Band of Brothers". Do you have any recommendations of other shows I might like?\n',
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    max_length=200,
)
for seq in sequences:
    print(f"Result: {seq['generated_text']}")

In [None]:
sample_input = "a power cord on a ski lift"
#see how the tokenizer tokenizes the input
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
print(tokenizer.tokenize(sample_input))

# Get Probabilities of generated tokens

In [3]:
from transformers import AutoTokenizer, LlamaForCausalLM

model_name = "meta-llama/Llama-2-7b-chat-hf"
model = LlamaForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [16]:
import json
import random

def give_prompts_outputs_and_lengths(desc_file_name):

    input_path_prompt = "/export/home/ru86qer/classify_by_description_release/prompts/prompt_0.txt"

    with open(input_path_prompt, 'r') as f:
        prompt = f.read()

    file_name_1 = desc_file_name.split(".")[0] + "_whole_outputs.json"
    input_path_whole_outputs = '/export/home/ru86qer/classify_by_description_release/descriptors_meta_info/'+file_name_1

    with open(input_path_whole_outputs, 'r') as f:
        whole_outputs = json.load(f)

    classes = list(whole_outputs.keys())

    lengths = []
    prompts_plus_outputs = [prompt]*len(classes)
    for i,class_name in enumerate(classes):
        prompt_plus_output = prompts_plus_outputs[i].format(category_name=class_name.replace('_', ' '))
        lengths.append(len(prompt_plus_output))
        prompts_plus_outputs[i] = prompt_plus_output
        prompts_plus_outputs[i] += whole_outputs[class_name]
    
    return prompts_plus_outputs, lengths


# desc_file_name = "descriptors_cub_llama2_prompt_0_doSampleFalse_run_0.json"

In [3]:
prompts_plus_outputs_1, lengths_1 =give_prompts_outputs_and_lengths("descriptors_cub_llama2_prompt_0_run_0.json")
prompts_plus_outputs_2, lengths_2 =give_prompts_outputs_and_lengths("descriptors_cub_llama2_prompt_0_topk1_run_0.json")

In [4]:
import torch
import tqdm

def give_token_probs(tokenizer, model, prompts_plus_outputs, lengths):
    
    acc = []

    for i, input_text in tqdm.tqdm(enumerate(prompts_plus_outputs)):
        
        input_ids = tokenizer.encode(input_text, return_tensors="pt")

        with torch.no_grad():
            outputs = model(input_ids)
            logits = outputs.logits

        
        probs = torch.nn.functional.softmax(logits, dim=-1)

        generated_answer = input_text[lengths[i]:]
        gen_answer_ids = tokenizer.encode(generated_answer)
        gen_answer_probs = [probs[0, i, input_ids[0, i]].item() for i in range(len(input_ids[0]) - len(gen_answer_ids), len(input_ids[0]))]

        
        average_probability = sum(gen_answer_probs) / len(gen_answer_probs)

        acc.append(average_probability)

    print("Average of averages {0}".format(sum(acc) / len(acc)))
    
    return acc


In [5]:
acc_topk10 = give_token_probs(tokenizer, model,prompts_plus_outputs_1,lengths_1)
acc_topk1 = give_token_probs(tokenizer, model,prompts_plus_outputs_2,lengths_2)

200it [25:20,  7.60s/it]


Average of averages 0.006796153096871086


200it [16:01,  4.81s/it]

Average of averages 0.010578365354993135





In [6]:
print("min acc_top1: ", min(acc_topk1))
print("index of min acc_top1: ", acc_topk1.index(min(acc_topk1)))

min acc_top1:  6.755461273571999e-05
index of min acc_top1:  123


In [8]:
print("max acc_top1: ", max(acc_topk1))
print("index of max acc_top1: ", acc_topk1.index(max(acc_topk1)))

max acc_top1:  0.02975448280754543
index of max acc_top1:  59


In [15]:
print("Minimal average token probability: \n"+prompts_plus_outputs_2[123])
print("\n\n ############################################################## \n\n")
print("Maximum average token probability: \n"+prompts_plus_outputs_2[59])

Minimal average token probability: 
Q: What are useful visual features for distinguishing a lemur in a photo?
A: There are several useful visual features to tell there is a lemur in a photo:
- four-limbed primate
- black, grey, white, brown, or red-brown
- wet and hairless nose with curved nostrils
- long tail
- large eyes
- furry bodies
- clawed hands and feet

Q: What are useful visual features for distinguishing a television in a photo?
A: There are several useful visual features to tell there is a television in a photo:
- electronic device
- black or grey
- a large, rectangular screen
- a stand or mount to support the screen
- one or more speakers
- a power cord
- input ports for connecting to other devices
- a remote control

Q: What are useful features for distinguishing a Le_Conte_Sparrow in a photo?
A: There are several useful visual features to tell there is a Le_Conte_Sparrow in a photo:
- small, dark sparrow
- distinctive black stripes on the head, neck, and back
- white str

In [18]:
print("min acc_top5: ", min(acc_topk10))
print("index of min acc_top5: ", acc_topk10.index(min(acc_topk10)))

min acc_top5:  4.603858009410944e-05
index of min acc_top5:  177


In [28]:
path = "/export/home/ru86qer/classify_by_description_release/prompts/prompt_1.txt"

with open(path, 'r') as f:
    prompt = f.read()

sequences = pipeline(
    prompt.format(category_name_1="Black footed albatross", category_name_0="Laysan albatross"),
    do_sample=True,
    top_k=1,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    max_length=500,
)
for seq in sequences:
    print(f"Result: {seq['generated_text']}")

Result: Please answer the following question using bullet points '-'. Behind each bullet point, a sentence shall be in a format that is shown in the following template:

- While for A feature X looks like this, for B feature X looks like that.

Ideally, feature X is a visual feature that is useful for distinguishing a Black footed albatross from a Laysan albatross in a photo.

It is essential to use the keyword 'While' and the comma (',') because we need it for further processing. Please do not use any other commas in your sentences.

When you have answered the question, please just stop the output.

Now comes your part:
Q: What are useful features for distinguishing a Black footed albatross from a Laysan albatross in a photo?
A: There are several useful visual features to tell apart a Black footed albatross from a a Laysan albatross in a photo:
- While  the Black footed albatross has a distinctive white stripe on its upperpart, the Laysan albatross has a more mottled appearance with l

# Generate contrastive responses

In [None]:
pairs_to_contrast =[("Red cockaded Woodpecker", "American Three toed Woodpecker"),
                    ("Louisiana Waterthrush", "Northern Waterthrush"),
                    ("Chuck will Widow","Nighthawk"),
                    ("Anna Hummingbird","Ruby throated Hummingbird"),
                    ("Artic Tern","Common Tern")]

In [None]:
model_name= "meta-llama/Llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(model_name)
print("Cuda available: ", torch.cuda.is_available())
model = LlamaForCausalLM.from_pretrained(
    model_name,
    device_map = "auto",
)


inputs = tokenizer(prompt, return_tensors="pt")
inputs = inputs.to('cuda')
# Generate
generate_ids = model.generate(inputs.input_ids, max_length=len(inputs.input_ids[0])+answer_length, top_k=top_k, num_return_sequences=1, do_sample=True)

decoded_responses = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
response_texts = [resp[len(prompt):] for resp in decoded_responses.split('\n-')]