In [4]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load the tokenizer and model
model_name = "gpt2"  # Replace with your model # what is the model id for gpt2? Answer: gpt2 is the model id for GPT-2 ? ANswr 
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Tokenize the prompt
prompt = "Once upon a time"
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)

# Set generation parameters
max_new_tokens = 50
temperature = 1.0
top_k = 3
top_p = 0.95
eos_token_id = tokenizer.eos_token_id

# Initialize the generated sequence with the input prompt
generated_ids = input_ids

# Autoregressive generation loop
for _ in range(max_new_tokens):
    # Forward pass to get logits
    with torch.no_grad():
        outputs = model(input_ids=generated_ids)
        logits = outputs.logits[:, -1, :]  # Get logits for the last generated token

    # Apply temperature scaling
    logits = logits / temperature

    # Apply top-k filtering
    if top_k > 0:
        top_k_values, top_k_indices = torch.topk(logits, top_k, dim=-1)
        logits[logits < top_k_values[:, -1, None]] = -float('Inf')

    # Apply top-p (nucleus) filtering
    if top_p < 1.0:
        sorted_logits, sorted_indices = torch.sort(logits, descending=True, dim=-1)
        cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1)
        sorted_indices_to_remove = cumulative_probs > top_p
        sorted_indices_to_remove[:, 1:] = sorted_indices_to_remove[:, :-1].clone()
        sorted_indices_to_remove[:, 0] = 0
        for batch_idx in range(logits.size(0)):
            indices_to_remove = sorted_indices[batch_idx][sorted_indices_to_remove[batch_idx]]
            logits[batch_idx, indices_to_remove] = -float('Inf')

    # Convert logits to probabilities and sample the next token
    probabilities = torch.softmax(logits, dim=-1)
    next_token_id = torch.multinomial(probabilities, num_samples=1)

    # Append the sampled token to the generated sequence
    generated_ids = torch.cat([generated_ids, next_token_id], dim=-1)

    # Check for end-of-sequence token
    if next_token_id.item() == eos_token_id:
        break

# Decode the generated sequence into text
generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
print(generated_text)

  _torch_pytree._register_pytree_node(


Once upon a time, the world's greatest artists were all born to be masters of their craft. The first to be born in a world of music, the first to be born to be an artist, and the last to be born in the world of art.



In [5]:
import torch
from transformers import AutoProcessor, LlavaForConditionalGeneration
import requests

from PIL import Image

# Load model and processor
model_name = "llava-hf/llava-1.5-7b-hf"  # Replace with the correct LLaVA model
processor = AutoProcessor.from_pretrained(model_name)
model = LlavaForConditionalGeneration.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
model.eval()

# Prompt and image input
# image_path = "path_to_your_image.jpg"  # Replace with your image path
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg"
image = Image.open(requests.get(url, stream=True).raw)
prompt = "<s> USER: <image> What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud ASSISTANT:"

# Preprocess image and text
inputs = processor(text=prompt, images=image, return_tensors="pt").to(model.device)
input_ids = inputs["input_ids"]  # Initial input IDs
pixel_values = inputs["pixel_values"]  # Image pixel values

# Set generation parameters
max_new_tokens = 50
temperature = 1.0
top_k = 50
top_p = 0.95
eos_token_id = processor.tokenizer.eos_token_id

# Initialize generated_ids
generated_ids = input_ids

# Autoregressive decoding loop
for _ in range(max_new_tokens):
    # Forward pass: Pass the updated input_ids and static pixel_values
    with torch.no_grad():
        outputs = model(input_ids=generated_ids, pixel_values=pixel_values)

    # Get logits for the last token
    logits = outputs.logits[:, -1, :]

    # Apply temperature scaling
    logits = logits / temperature

    # Top-k filtering
    if top_k > 0:
        top_k_values, top_k_indices = torch.topk(logits, top_k, dim=-1)
        logits[logits < top_k_values[:, -1, None]] = -float('Inf')

    # Top-p (nucleus) filtering
    if top_p < 1.0:
        sorted_logits, sorted_indices = torch.sort(logits, descending=True)
        cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1)
        sorted_indices_to_remove = cumulative_probs > top_p
        sorted_indices_to_remove[:, 1:] = sorted_indices_to_remove[:, :-1].clone()
        sorted_indices_to_remove[:, 0] = 0
        for batch_idx in range(logits.size(0)):
            indices_to_remove = sorted_indices[batch_idx][sorted_indices_to_remove[batch_idx]]
            logits[batch_idx, indices_to_remove] = -float('Inf')

    # Sample the next token
    probabilities = torch.softmax(logits, dim=-1)
    next_token_id = torch.multinomial(probabilities, num_samples=1)

    # Append the sampled token to generated_ids
    generated_ids = torch.cat([generated_ids, next_token_id], dim=-1)

    # Decode and print the generated token
    decoded_token = processor.tokenizer.decode(next_token_id.item(), skip_special_tokens=True)
    print(decoded_token, end=" ", flush=True)

    # Break if EOS token is generated
    if next_token_id.item() == eos_token_id:
        break

# Final output decoding
generated_text = processor.tokenizer.decode(generated_ids[0], skip_special_tokens=True)
print("\n\nGenerated Output:")
print(generated_text)

Some kwargs in processor config are unused and will not have any effect: num_additional_image_tokens. 


Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

The label  1 5 represents the L ava C ave feature in the graph ic illustr ation . n , indicating the location of a large under ground passage filled with mol ten la va , as part of an active vol cano . This cave can be used 

Generated Output:
 USER:   What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud ASSISTANT: The label 15 represents the Lava Cave feature in the graphic illustration.n, indicating the location of a large underground passage filled with molten lava, as part of an active volcano. This cave can be used


In [6]:
max_new_tokens = 50
temperature = 1.0
top_k = 50
top_p = 0.95

outputs = model.generate(**inputs, max_new_tokens=max_new_tokens, temperature=temperature, top_k=top_k, top_p=top_p)
generated_text = processor.decode(outputs[0], skip_special_tokens=True)
print(generated_text)



 USER:   What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud ASSISTANT: The label 15 represents the lava flow.

In the image, there is a diagram of a mountain with a lava flow, and the number 15 is placed near the lava flow. This number likely represents the depth
