In [1]:
from transformers import pipeline

def main():
    # Create a summarization pipeline using a pretrained model
    summarizer = pipeline(
        task="summarization",
        model="facebook/bart-large-cnn"
    )

    text = """
    A proton is a stable subatomic particle with a positive electric charge. 
    It is slightly lighter than a neutron and much heavier than an electron. 
    Protons and neutrons together form the nucleons found in atomic nuclei. 
    Every atom's nucleus contains one or more protons, and the number of 
    protons determines the elementâ€™s atomic number and its chemical properties.
    """

    summary = summarizer(
        text,
        max_length=60,   # max number of tokens in summary
        min_length=10,   # minimum number of tokens
        do_sample=False  # deterministic output
    )

    print("Summary:\n", summary[0]["summary_text"])

if __name__ == "__main__":
    main()


Device set to use cuda:0


Summary:
 A proton is a stable subatomic particle with a positive electric charge. It is slightly lighter than a neutron and much heavier than an electron. Every atom's nucleus contains one or more protons.


In [2]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

def main():
    # Load tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
    model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")

    # Use GPU if available
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = model.to(device)

    text = """
    A proton is a stable subatomic particle with a positive electric charge. 
    It is slightly lighter than a neutron and much heavier than an electron. 
    Protons and neutrons together form the nucleons found in atomic nuclei. 
    The number of protons defines the atomic number and chemical properties 
    of an element.
    """

    # Convert text to tokens the model can understand
    inputs = tokenizer(
        [text],
        max_length=1024,   # Maximum tokens allowed for the model input
        truncation=True,   # Truncate text if it's too long
        return_tensors="pt"  #pytorch version
    )
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Generate summary using beam search
    summary_ids = model.generate(
        **inputs,
        
        max_length=60,       # Max tokens in the summary
        min_length=10,       # Minimum tokens in the summary
        length_penalty=2.0,  # >1 encourages shorter summaries; <1 encourages longer
        
        num_beams=4,         # Beam search width (4 beams = more accurate than greedy)
        early_stopping=True  # Stop when all beams finish early
    )

    # Decode tokens back into readable text
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    print("Summary:\n", summary)

if __name__ == "__main__":
    main()


Summary:
 A proton is a stable subatomic particle with a positive electric charge. It is slightly lighter than a neutron and much heavier than an electron. Protons and neutrons together form the nucleons found in atomic nuclei.


In [1]:
# Use a pipeline as a high-level helper
from transformers import pipeline
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = pipeline("image-text-to-text", model="nanonets/Nanonets-OCR2-3B")
messages = [
    {
        "role": "user",
        "content": [
            {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"},
            {"type": "text", "text": "What animal is on the candy?"}
        ]
    },
]
pipe(text=messages)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


[{'input_text': [{'role': 'user',
    'content': [{'type': 'image',
      'url': 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG'},
     {'type': 'text', 'text': 'What animal is on the candy?'}]}],
  'generated_text': [{'role': 'user',
    'content': [{'type': 'image',
      'url': 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG'},
     {'type': 'text', 'text': 'What animal is on the candy?'}]},
   {'role': 'assistant', 'content': 'chameleon'}]}]

In [None]:
# Load model directly
from transformers import AutoProcessor, AutoModelForVision2Seq

processor = AutoProcessor.from_pretrained("nanonets/Nanonets-OCR2-3B")
model = AutoModelForVision2Seq.from_pretrained("nanonets/Nanonets-OCR2-3B")
messages = [
    {
        "role": "user",
        "content": [
            {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"},
            {"type": "text", "text": "What animal is on the candy?"}
        ]
    },
]
inputs = processor.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)

outputs = model.generate(**inputs, max_new_tokens=40)
print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]