In [3]:
# Abstractive search example
# Download documents
from datasets import load_dataset
dataset = load_dataset("bilgeyucel/seven-wonders", split="train")

# Inspect structure
print("Dataset info:")
print(f"  Number of examples: {len(dataset)}")
print(f"  Features: {dataset.features}")
print(f"\nFirst example:")
print(dataset[0])

Dataset info:
  Number of examples: 151
  Features: {'id': Value('string'), 'content': Value('string'), 'content_type': Value('string'), 'meta': {'url': Value('string'), '_split_id': Value('int64')}, 'id_hash_keys': List(Value('string')), 'score': Value('null'), 'embedding': Value('null')}

First example:
{'id': 'b3de1a673c1eb2876585405395a10c3d', 'content': 'The Colossus of Rhodes (Ancient Greek: ὁ Κολοσσὸς Ῥόδιος, romanized:\xa0ho Kolossòs Rhódios Greek: Κολοσσός της Ρόδου, romanized:\xa0Kolossós tes Rhódou)[a] was a statue of the Greek sun-god Helios, erected in the city of Rhodes, on the Greek island of the same name, by Chares of Lindos in 280\xa0BC. One of the Seven Wonders of the Ancient World, it was constructed to celebrate the successful defence of Rhodes city against an attack by Demetrius Poliorcetes, who had besieged it for a year with a large army and navy.\nAccording to most contemporary descriptions, the Colossus stood approximately 70 cubits, or 33 metres (108 feet) hi

In [4]:
from datasets import load_dataset
from haystack import Document
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever

# Load dataset
dataset = load_dataset("bilgeyucel/seven-wonders", split="train")

# Setup document store
document_store = InMemoryDocumentStore()

# Convert dataset to Haystack Documents
documents = []
for item in dataset:
    doc = Document(
        content=item["content"],  # Adjust field name based on dataset
        meta=item  # Store all fields as metadata
    )
    documents.append(doc)

# Index documents
document_store.write_documents(documents)

print(f"✓ {document_store.count_documents()} documents indexed")

# Create retriever
retriever = InMemoryBM25Retriever(document_store=document_store)

✓ 151 documents indexed


In [5]:
from haystack import Pipeline
from haystack.components.builders import PromptBuilder
from haystack.components.generators import HuggingFaceLocalGenerator
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
from haystack.document_stores.in_memory import InMemoryDocumentStore

# Assuming you have document_store already set up

# 1. Prompt template (Jinja2 syntax)
prompt_template = """
Synthesize a comprehensive answer from the following text for the given question.
Provide a clear and concise response that summarizes the key points and information.
Your answer should be in your own words and be no longer than 50 words.

Related text:
{% for document in documents %}
{{ document.content }}
{% endfor %}

Question: {{ query }}

Answer:
"""

# 2. Create components
retriever = InMemoryBM25Retriever(document_store=document_store)
prompt_builder = PromptBuilder(template=prompt_template)
generator = HuggingFaceLocalGenerator(
    model="google/flan-t5-large",
    task="text2text-generation",
    generation_kwargs={"max_new_tokens": 50}
)

# 3. Build RAG pipeline
rag_pipeline = Pipeline()
rag_pipeline.add_component("retriever", retriever)
rag_pipeline.add_component("prompt_builder", prompt_builder)
rag_pipeline.add_component("generator", generator)

# 4. Connect components
rag_pipeline.connect("retriever.documents", "prompt_builder.documents")
rag_pipeline.connect("prompt_builder", "generator")

# 5. Run query
question = "What are the Seven Wonders of the Ancient World?"

result = rag_pipeline.run({
    "retriever": {"query": question, "top_k": 5},
    "prompt_builder": {"query": question},
    "generator": {}
})

# 6. Print answer
print(f"Question: {question}")
print(f"Answer: {result['generator']['replies'][0]}")


PromptBuilder has 2 prompt variables, but `required_variables` is not set. By default, all prompt variables are treated as optional, which may lead to unintended behavior in multi-branch pipelines. To avoid unexpected execution, ensure that variables intended to be required are explicitly set in `required_variables`.


config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

Device set to use cpu
Token indices sequence length is longer than the specified maximum sequence length for this model (1602 > 512). Running this sequence through the model will result in indexing errors


Question: What are the Seven Wonders of the Ancient World?
Answer: The Great Pyramid of Giza, the Lighthouse of Alexandria, the Statue of Zeus at Olympia, the Colossus of Rhodes, the Mausoleum at Halicarnassus, and the Mausoleum at


In [6]:
# 5. Run query
question = "What is the Great Pyramid of Giza"

result = rag_pipeline.run({
    "retriever": {"query": question, "top_k": 5},
    "prompt_builder": {"query": question},
    "generator": {}
})

# 6. Print answer
print(f"Question: {question}")
print(f"Answer: {result['generator']['replies'][0]}")

Question: What is the Great Pyramid of Giza
Answer: The Great Pyramid of Giza is the largest Egyptian pyramid and the tomb of Fourth Dynasty pharaoh Khufu.


In [7]:
# Text summarization with T5
from transformers import pipeline
summarization_pipe = pipeline("summarization", model="t5-large")

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.95G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

Device set to use cpu


In [None]:
# Summarize a short Sherlock Holmes text
with open("../data/sherlock_holmes_1.txt", "r") as file:
    sherlock = file.read()
summary = summarization_pipe(sherlock, max_length=150, min_length=40, do_sample=False)
print("Summary of Sherlock Holmes:")
print(summary[0]['summary_text'])

Both `max_new_tokens` (=256) and `max_length`(=150) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Summary of Sherlock Holmes:
in his eyes she eclipses and predominates the whole of her sex . as a lover he would have placed himself in a false position . he never spoke of the softer passions, save with a gibe and a sneer .


In [10]:
# Switch to BART
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
    
summarization_pipe_bart = pipeline("summarization", model="facebook/bart-large-cnn", device=0 if device=="cuda" else -1)
summary_bart = summarization_pipe_bart(sherlock, max_length=150, min_length=40, do_sample=False)
print("Summary of Sherlock Holmes (BART):")
print(summary_bart[0]['summary_text'])


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cpu


Summary of Sherlock Holmes (BART):
Sherlock Holmes never felt any emotionakin to love for Irene Adler. All emotions, and that one particularly, were abhorrent to his cold, precise mind. To admit such intrusions into his own delicate and finelyadjusted temperament was to introduce a distracting factor.


In [11]:
# Textual entailment with T5
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration

In [12]:
tokenizer = T5Tokenizer.from_pretrained("t5-large")
model = T5ForConditionalGeneration.from_pretrained("t5-large").to(device)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [13]:
premise = "Large language models are capable of performing a variety of natural language processing tasks."
hypothesis = "LLMs can do many NLP jobs."
input_ids = tokenizer.encode(f"mnli: {premise} </s> {hypothesis}", return_tensors="pt").to(device)
outputs = model.generate(input_ids)
entailment = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"Entailment result: {entailment}")

Entailment result: entailment


In [None]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = T5Tokenizer.from_pretrained("t5-large")
model = T5ForConditionalGeneration.from_pretrained("t5-large").to(device)

# Input
premise = "Large language models are capable of performing a variety of natural language processing tasks."
hypothesis = "LLMs can do many NLP jobs."

# Inference
with torch.no_grad():
    inputs = tokenizer(
        f"mnli: {premise} </s> {hypothesis}", # T5 specific input format
        return_tensors="pt",
        truncation=True,
        max_length=512
    ).to(device)
    
    outputs = model.generate(
        inputs.input_ids,
        max_length=10,
        num_beams=1
    )
    
    result = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(f"Entailment: {result}")

Entailment: entailment


In [15]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = T5Tokenizer.from_pretrained("t5-large")
model = T5ForConditionalGeneration.from_pretrained("t5-large").to(device)

# Input: one premise, multiple hypotheses
premise = "Large language models are capable of performing a variety of natural language processing tasks."
hypotheses = [
    "LLMs can do many NLP jobs.",
    "Neural networks cannot process text.",
    "Language models are useful for translation."
]

# Inference
with torch.no_grad():
    # Create batch of premise-hypothesis pairs
    texts = [f"mnli: {premise} </s> {hyp}" for hyp in hypotheses]
    
    inputs = tokenizer(
        texts,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=512
    ).to(device)
    
    outputs = model.generate(
        inputs.input_ids,
        max_length=10,
        num_beams=1
    )
    
    results = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]

# Display results
for hyp, result in zip(hypotheses, results):
    print(f"Hypothesis: {hyp}")
    print(f"Entailment: {result}\n")

Hypothesis: LLMs can do many NLP jobs.
Entailment: entailment

Hypothesis: Neural networks cannot process text.
Entailment: neutral

Hypothesis: Language models are useful for translation.
Entailment: neutral



config.json:   0%|          | 0.00/687 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/256 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Device set to use cpu


Document: The movie was fantastic! I really loved it.
Prediction: {'label': 'POSITIVE', 'score': 0.9989079236984253}

Top contributing words for POSITIVE sentiment:
  it                   +0.0124
  fantastic            +0.0124
  was                  +0.0117
  loved                +0.0113
  I                    +0.0092
  movie                +0.0056
