In [1]:
import transformers
import torch
import os
import json

In [7]:
ROOT = ".."
PROMPT_PATH = "../prompts/"
MAX_NEW_TOKENS = 50
TEMPERATURE = 0.99
N_OUTPUTS = 10

In [3]:
with open(os.path.join(ROOT, 'token.txt'), 'r') as f:
    HF_TOKEN = f.read()

In [4]:
model_id = "meta-llama/Llama-3.2-1B-Instruct"
pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto",
    token=HF_TOKEN
)

Device set to use cuda:0


In [5]:
#  List of prompts
# prompts = [
#     "Explain the concept of deep learning.",
#     "What are the main differences between Python and Java?",
#     "Describe the process of photosynthesis."
# ]

# # Generate responses for each prompt
# for prompt in prompts:
#     messages = [
#         {"role": "system", "content": "You are a helpful assistant."},
#         {"role": "user", "content": prompt}
#     ]
    
#     outputs = pipeline(
#         messages,
#         max_new_tokens=256,
#         do_sample=True,
#         temperature=0.7,
#         top_p=0.9,
#     )
    
#     print(f"Prompt: {prompt}")
#     print(f"Response: {outputs[0]['generated_text']}")
#     print("-" * 50)

In [6]:
dataset = "dtd"

with open(os.path.join(PROMPT_PATH, f"{dataset}/{dataset}_prompts_full.json"), 'r') as f:
    templates = json.load(f)
with open(os.path.join(PROMPT_PATH, f"meta_prompts.json"), 'r') as f:
    meta_prompts = json.load(f)[dataset]

classnames = list(templates[dataset].keys())
print(f"dataset {dataset} has {len(classnames)} classes and {len(meta_prompts)} meta-prompts.")

dataset dtd has 47 classes and 5 meta-prompts.


In [26]:
instructions = """Please format your response as one that contains only lower case letters and no special characters (including new lines, bold, and any markdown artifacts) other than a period ('.') or commas (','). 
The response should be a single sentence ending in a period that is directed toward the final instruction in this message. Your sentence should be a minimum of three words and maximum of thirty."""

num_seeds = N_OUTPUTS // len(meta_prompts)
for class_ in classnames[0:3]:
    inputs = [instructions + " " + p.replace("{c}", class_) for p in meta_prompts]
    formatted_inputs = [f"<|user|>{user_input}" for user_input in inputs]
    for seed in range(num_seeds):
        torch.manual_seed(seed)
        outputs = pipeline(
            formatted_inputs,
            max_new_tokens=MAX_NEW_TOKENS,
            do_sample=True,
            temperature=TEMPERATURE,
            top_p=0.9,
            pad_token_id=128001,
            return_full_text=False
        )
        print(outputs)

[[{'generated_text': ' \n\n.g. gauzy. material. resembles. a. white. fog. that. gradually. dissipates. into. the. air. gauzy. material. has. a. soft. and. airy. texture. and.'}], [{'generated_text': " \n\ni apologize for the format in my previous message. it seems my formatting has been truncated. here's a revised version of what i was trying to say, but i guess my system decided it needed to be rewritten. let me try again. a"}], [{'generated_text': ' a. transparent. b. semi-opaque. c. opac. \n\nNote: a gauzy texture typically has a somewhat blurred appearance, and the colors seem muted. it can take on a range of hues depending on the surrounding environment.'}], [{'generated_text': ' \n\nby a person who lives in a house with a large garden. \n\nI remember the summer I spent in the house, it was a time of great change, for the garden was in full bloom, and the colors of the flowers were as vibrant'}], [{'generated_text': ".'.<p>no</p>"}]]
[[{'generated_text': ' \n\ngauzy. delicate. \n\