In [6]:
from lib.pipelines.hybrid_pipeline import hybrid_pipeline   # same instance of the pipeline will be loaded in all imports
from lib.pipelines.graph_pipeline import graph_pipeline

### Hybrid pipeline: sanity check

In [5]:
from lib.models.p1_qa import P1QA

p1_qa_schema = P1QA.model_json_schema()

query = "When did dinosaurs go extinct?"

input_data = {
    "elasticsearch_retriever": {"query": query},
    "text_embedder": {"text": query},
    "p1_qa_prompt_builder": {"p1_qa_schema": p1_qa_schema, "query": query},
}

result = hybrid_pipeline.run(data=input_data, include_outputs_from={"elasticsearch_retriever", "weaviate_retriever", "reciprocal_rank_fusion_joiner"})

result

{'text_embedder': {'meta': {'model': 'text-embedding-3-small',
   'usage': {'prompt_tokens': 6, 'total_tokens': 6}}},
 'p1_qa_generator': {'replies': ['{\n  "answer": "Dinosaurs went extinct at the end of the Cretaceous period, approximately 66 million years ago during the Cretaceous–Paleogene (K-Pg) extinction event.",\n  "need_more_context": false,\n  "reasoning": "The context clearly states that the mass extinction event that led to the extinction of non-avian dinosaurs occurred at the end of the Cretaceous period, dated to around 66 million years ago."\n}'],
  'meta': [{'model': 'gpt-4o-mini-2024-07-18',
    'index': 0,
    'finish_reason': 'stop',
    'usage': {'completion_tokens': 97,
     'prompt_tokens': 1484,
     'total_tokens': 1581,
     'completion_tokens_details': {'reasoning_tokens': 0}}}]},
 'elasticsearch_retriever': {'documents': [Document(id=a0d28395-4798-4c2d-b8a3-9a29dadc09dc, content: 'Some dinosaurs are known to have used gizzard stones like modern birds. These s

### Graph pipeline: sanity check

In [7]:
from haystack import Document
from lib.models.hierarchy_path import HierarchyPathData

docs = [
    Document(
        id="3f45e988-b093-4d9c-bde1-67e2e4c3b599",
        content="All non-avian dinosaurs and most lineages of birds became extinct in a mass extinction event, called...",
        meta={
            "file_path": "Dinosaur.html",
            "source_id": "93000a3fb02b99d2d115cd4042256d2f5db2a0ff3928927ca14465276534a75e",
            "split_id": 234,
            "title": "Dinosaurs",
            "h2": "Extinction of major groups",
        },
    ),
    Document(
        id="433ad671-ea22-4ae1-9bc4-1cce06a0e6ee",
        content="Just before the K-Pg extinction event, the number of non-avian dinosaur species that existed globall...",
        meta={
            "file_path": "Dinosaur.html",
            "source_id": "93000a3fb02b99d2d115cd4042256d2f5db2a0ff3928927ca14465276534a75e",
            "split_id": 235,
            "title": "Dinosaurs",
            "h2": "Extinction of major groups",
            "h3": "Pre-extinction diversity",
        },
    ),
]

query = "Give a brief about alternative theories of dinosaur extinction."

path_schema = HierarchyPathData.model_json_schema()

In [9]:
input_data = {
    "hierarchy_prompt_builder": {"query": query, "hierarchy_path_schema": path_schema},
    "wiki_hierarchy_builder": {"documents": docs},
    "final_prompt_builder": {"query": query},
}

result = graph_pipeline.run(
    data=input_data,
    include_outputs_from={
        "hierarchy_generator",
        "wiki_context_creator",
        "final_generator",
    },
)

result['hierarchy_generator']

{'meta': [{'model': 'gpt-4o-mini-2024-07-18',
   'index': 0,
   'finish_reason': 'stop',
   'usage': {'completion_tokens': 108,
    'prompt_tokens': 1280,
    'total_tokens': 1388,
    'completion_tokens_details': {'reasoning_tokens': 0}}}],
 'replies': ['{\n  "paths": [\n    {\n      "path": ["Dinosaur", "Extinction of major groups", "Impact event"],\n      "reasoning": "The section on \'Impact event\' likely discusses one of the leading theories related to dinosaur extinction."\n    },\n    {\n      "path": ["Dinosaur", "Extinction of major groups", "Deccan Traps"],\n      "reasoning": "The \'Deccan Traps\' section might provide information on alternative volcanic activity theories concerning the extinction."\n    }\n  ]\n}']}

In [10]:
result['wiki_context_creator']

{'context_list': ['Impact event (h3):\nThe bolide impact hypothesis, first brought to wide attention in 1980 by Walter Alvarez, Luis Alvarez, and colleagues, attributes the K-Pg extinction event to a bolide (extraterrestrial projectile) impact. Alvarez and colleagues proposed that a sudden increase in iridium levels, recorded around the world in rock deposits at the Cretaceous–Paleogene boundary, was direct evidence of the impact. Shocked quartz, indicative of a strong shockwave emanating from an impact, was also found worldwide. The actual impact site remained elusive until a crater measuring 180 km (110 mi) wide was discovered in the Yucatán Peninsula of southeastern Mexico, and was publicized in a 1991 paper by Alan Hildebrand and colleagues. Now, the bulk of the evidence suggests that a bolide 5 to 15 kilometers (3 to 9 + 1 ⁄ 2 miles) wide impacted the Yucatán Peninsula 66 million years ago, forming this crater and creating a "kill mechanism" that triggered the extinction event.\n\

In [11]:
result['final_generator']

{'replies': ["The primary alternative theory to the Chicxulub impact hypothesis for dinosaur extinction is related to the Deccan Traps volcanic eruptions in India. These eruptions released greenhouse gases such as carbon dioxide and sulfur dioxide, which caused climate change and temperature perturbations, potentially affecting the environment and ecosystems. Some researchers argue that these eruptions may have played a significant role in the extinction event, with estimates placing the start of their most impactful phase within 50,000 years after the Chicxulub impact.\n\nWhile the Chicxulub impact is generally recognized as the primary cause of the extinction, the Deccan Traps may have contributed to the climatic changes that worsened the effects of the impact. However, there remains uncertainty about the extent of the Deccan Traps' role compared to the Chicxulub impact. Proponents of the Deccan Traps theory note parallels with other mass extinctions, suggesting that its sulfur dioxi

## Connect hybrid and graph pipelines

In [17]:
import json

def question_answer(question: str) -> dict:
    p1_qa_schema = P1QA.model_json_schema()

    input_data = {
        "elasticsearch_retriever": {"query": question},
        "text_embedder": {"text": question},
        "p1_qa_prompt_builder": {"p1_qa_schema": p1_qa_schema, "query": question},
    }

    hybrid_result_dict = hybrid_pipeline.run(data=input_data, include_outputs_from={"elasticsearch_retriever", "weaviate_retriever", "reciprocal_rank_fusion_joiner"})

    hybrid_replies_json =  hybrid_result_dict['p1_qa_generator']['replies'][0]
    hybrid_replies_dict = json.loads(hybrid_replies_json)
    hybrid_replies = P1QA(**hybrid_replies_dict)
    
    if not hybrid_replies.need_more_context:    
        # If the answer is complete, return
        return { "answer": hybrid_replies.answer, "phase": 1 }
    else:                                
        # If the answer is incomplete, run graph pipeline to fetch more context
        grounding_docs = hybrid_result_dict['reciprocal_rank_fusion_joiner']['documents']
        
        path_schema = HierarchyPathData.model_json_schema()
        
        input_data = {
            "hierarchy_prompt_builder": {"query": question, "hierarchy_path_schema": path_schema},
            "wiki_hierarchy_builder": {"documents": grounding_docs},
            "final_prompt_builder": {"query": question},
        }

        result = graph_pipeline.run(
            data=input_data,
            include_outputs_from={
                "hierarchy_generator",
                "wiki_context_creator",
                "final_generator",
            },
        )
    
        return { "answer": result['final_generator']['replies'][0], "phase": 2 }
    

In [15]:
response = question_answer("What was Huayangosauridae?")

response

{'answer': 'Huayangosauridae was a group of small stegosaurs characterized by flank osteoderms and tail clubs.',
 'phase': 1}

In [18]:
response = question_answer("Write a short essay on the paleobiology of dinosaurs.")

response

{'answer': 'The paleobiology of dinosaurs encompasses a broad range of knowledge derived from various fossil and non-fossil records, including fossilized skeletons, feces, and trackways, complemented by insights from numerous scientific disciplines such as physics, chemistry, biology, and Earth sciences. Understanding dinosaurs involves examining their size, behavior, physiology, communication, and reproductive biology.\n\nDinosaur size varied significantly over the Triassic, Jurassic, and Cretaceous periods, with evidence suggesting a broad range of body masses. Predatory theropods generally weighed between 100 to 1,000 kg, whereas the majority of Mesozoic dinosaurs exhibited body masses between 1 and 10 metric tons. The largest dinosaurs were sauropods, whose sizes eclipsed modern land animals, and recent estimates indicate some species, like Argentinosaurus, may have reached up to 100,000 kg and lengths of 30 to 40 meters. In contrast, some non-avialan dinosaurs were notably small, 

In [19]:
print(response['answer'])

The paleobiology of dinosaurs encompasses a broad range of knowledge derived from various fossil and non-fossil records, including fossilized skeletons, feces, and trackways, complemented by insights from numerous scientific disciplines such as physics, chemistry, biology, and Earth sciences. Understanding dinosaurs involves examining their size, behavior, physiology, communication, and reproductive biology.

Dinosaur size varied significantly over the Triassic, Jurassic, and Cretaceous periods, with evidence suggesting a broad range of body masses. Predatory theropods generally weighed between 100 to 1,000 kg, whereas the majority of Mesozoic dinosaurs exhibited body masses between 1 and 10 metric tons. The largest dinosaurs were sauropods, whose sizes eclipsed modern land animals, and recent estimates indicate some species, like Argentinosaurus, may have reached up to 100,000 kg and lengths of 30 to 40 meters. In contrast, some non-avialan dinosaurs were notably small, with the bee h