In [1]:
import sys
import os

# Add the parent directory of 'wiki' to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

from wiki.api.lib.pipelines.hybrid_pipeline import hybrid_pipeline   # same instance of the pipeline will be loaded in all imports
from wiki.api.lib.pipelines.graph_pipeline import graph_pipeline
from wiki.api.lib.models.p1_qa import P1QA
from wiki.api.lib.models.p2_qa import P2QA
from wiki.api.lib.models.hierarchy_path import HierarchyPathData

  from .autonotebook import tqdm as notebook_tqdm


### Question-Answer: Invoke pipeline, capture response

In [2]:
import json

def question_answer(question: str) -> dict:
    p1_qa_schema = P1QA.model_json_schema()

    input_data = {
        "elasticsearch_retriever": {"query": question},
        "text_embedder": {"text": question},
        "p1_qa_prompt_builder": {"p1_qa_schema": p1_qa_schema, "query": question},
    }

    hybrid_result_dict = hybrid_pipeline.run(data=input_data, include_outputs_from={"elasticsearch_retriever", "weaviate_retriever", "reciprocal_rank_fusion_joiner"})

    hybrid_replies_json =  hybrid_result_dict['p1_qa_generator']['replies'][0]
    hybrid_replies_dict = json.loads(hybrid_replies_json)
    hybrid_replies = P1QA(**hybrid_replies_dict)
    
    if not hybrid_replies.need_more_context:    
        # If the answer is complete, return
        answer = {"answer": hybrid_replies.answer, "phase": 1}
        return { "answer": answer, "hybrid_result_dict": hybrid_result_dict }
    else:                                
        # If the answer is incomplete, run graph pipeline to fetch more context
        grounding_docs = hybrid_result_dict['reciprocal_rank_fusion_joiner']['documents']
        
        path_schema = HierarchyPathData.model_json_schema()
        p2_qa_schema = P2QA.model_json_schema()
        
        input_data = {
            "hierarchy_prompt_builder": {"query": question, "hierarchy_path_schema": path_schema},
            "wiki_hierarchy_builder": {"documents": grounding_docs},
            "p2_qa_prompt_builder": {"p2_qa_schema": p2_qa_schema, "query": question},
        }

        result = graph_pipeline.run(
            data=input_data,
            include_outputs_from={
                "hierarchy_generator",
                "wiki_context_creator",
                "p2_qa_generator",
            },
        )
    
        answer = {"answer": result['p2_qa_generator']['replies'][0], "phase": 2}
        return { "answer": answer, "hybrid_result_dict": hybrid_result_dict, "hierarchy_generator": result['hierarchy_generator'], "wiki_context_creator": result['wiki_context_creator'] }
    

In [7]:
response = question_answer("What is Ornithoscelida?")

response

  timestamp = datetime.utcnow().replace(tzinfo=tzutc())
  body["sentAt"] = datetime.utcnow().replace(tzinfo=tzutc()).isoformat()


{'answer': {'answer': "Ornithoscelida is a proposed clade that includes various major groupings of dinosaurs, known for their 'bird hipped' hip structure. The clade is characterized by a unique configuration of hip bones, specifically where the Pubis and Isquion are fused together. It includes dinosaurs such as triceratops, Stegosaurus, and Ceratopsians, which can be distinguished by features like an extra bone in the jaw and interlacing boney tendons in the vertebrae.",
  'phase': 1},
 'docs': {'text_embedder': {'meta': {'model': 'text-embedding-3-small',
    'usage': {'prompt_tokens': 8, 'total_tokens': 8}}},
  'p1_qa_generator': {'replies': ['{\n  "answer": "Ornithoscelida is a proposed clade that includes various major groupings of dinosaurs, known for their \'bird hipped\' hip structure. The clade is characterized by a unique configuration of hip bones, specifically where the Pubis and Isquion are fused together. It includes dinosaurs such as triceratops, Stegosaurus, and Ceratops

In [3]:
response = question_answer("What is Ornithoscelida?")

response

  timestamp = datetime.utcnow().replace(tzinfo=tzutc())
  body["sentAt"] = datetime.utcnow().replace(tzinfo=tzutc()).isoformat()


{'answer': {'answer': 'Ornithoscelida is a proposed clade that includes various major groupings of dinosaurs. It was originally proposed by Thomas Henry Huxley but later abandoned, and it was revived in 2017 after a new cladistic analysis.',
  'phase': 1},
 'docs': {'text_embedder': {'meta': {'model': 'text-embedding-3-small',
    'usage': {'prompt_tokens': 8, 'total_tokens': 8}}},
  'p1_qa_generator': {'replies': ['{\n  "answer": "Ornithoscelida is a proposed clade that includes various major groupings of dinosaurs. It was originally proposed by Thomas Henry Huxley but later abandoned, and it was revived in 2017 after a new cladistic analysis.",\n  "need_more_context": false,\n  "reasoning": "The context provides a clear definition and historical background regarding the term Ornithoscelida and its significance in dinosaur classification.",\n  "document_ids": [2]\n}'],
   'meta': [{'model': 'gpt-4o-mini-2024-07-18',
     'index': 0,
     'finish_reason': 'stop',
     'usage': {'comple

In [5]:
response = question_answer("Write a short essay on the extinction of dinosaurs. Include all probable causes, the main hyppthesis as well as alternative theories if any.")

response

  timestamp = datetime.utcnow().replace(tzinfo=tzutc())
  body["sentAt"] = datetime.utcnow().replace(tzinfo=tzutc()).isoformat()


{'answer': {'answer': "The extinction of dinosaurs, along with many other species, occurred during the mass extinction event known as the Cretaceous–Paleogene (K-Pg) extinction event, approximately 66 million years ago. This event marked the end of the Cretaceous period and was characterized by the abrupt disappearance of non-avian dinosaur fossils, with about 47% of genera and 76% of species on Earth becoming extinct.\n\nThe primary hypothesis for this mass extinction is the Chicxulub impact hypothesis, which attributes the event to an extraterrestrial impact event. This theory was initially brought to attention in 1980, when a team led by Luis Alvarez discovered a sedimentary layer containing unusually high levels of iridium, a metal more common in asteroids than in Earth's crust. Further evidence includes shocked quartz and microfossils indicative of a cataclysmic event. The impact would have unleashed immense energy, leading to various immediate effects such as earthquakes, tsunami

In [3]:
response = question_answer("Write a short essay on the following topic: 'Chixulub crater and the end of the dinosaurs'")

response

  timestamp = datetime.utcnow().replace(tzinfo=tzutc())
  body["sentAt"] = datetime.utcnow().replace(tzinfo=tzutc()).isoformat()


{'answer': {'answer': 'The Chicxulub crater, located beneath the Yucatán Peninsula in Mexico, was formed over 66 million years ago when a ten-kilometer-diameter asteroid struck Earth. This impact is linked to the mass extinctions at the K–Pg boundary, including the extinction of the dinosaurs. In 2010, experts reviewed two decades of evidence and concluded that the Chicxulub impact played a significant role in these extinctions, although there is debate about the contributions of the Deccan Traps volcanic eruptions. The crater itself is 200 kilometers wide and is considered the second largest impact structure on Earth, providing valuable insights into the events surrounding the extinction of the dinosaurs.',
  'phase': 1},
 'hybrid_result_dict': {'text_embedder': {'meta': {'model': 'text-embedding-3-small',
    'usage': {'prompt_tokens': 22, 'total_tokens': 22}}},
  'p1_qa_generator': {'replies': ['{\n  "answer": "The Chicxulub crater, located beneath the Yucatán Peninsula in Mexico, w

In [4]:
response = question_answer("Write a short essay on the paleobiology of dinosaurs.")

response

  timestamp = datetime.utcnow().replace(tzinfo=tzutc())


{'answer': {'answer': '{\n  "answer": "Paleobiology encompasses the study of ancient life forms, particularly dinosaurs, through various fossil and non-fossil records. This knowledge derives from fossilized bones, feces, trackways, feathers, and impressions of skin, among other evidence. Different fields, including biomechanics, chemistry, biology, and Earth sciences, contribute to our understanding of dinosaurs, with significant interest in their size and behavior. Evidence indicates that dinosaur size varied across the Mesozoic era, with theropods typically weighing between 100 to 1,000 kg, while sauropods were the largest, showcasing advantages like predation protection and dietary efficiency. Behaviorally, interpretations based on fossil evidence suggest that some dinosaurs exhibited herding behavior, and the communication methods, possibly utilizing visual displays and non-vocal sounds, hint at complex social structures. Reproductive biology shows that all dinosaurs laid amniotic 

## Question-Answer: Return answer + reference as response to API call

In [None]:
def build_api_response(qa_pipeline_response: dict) -> dict:
    # response = {
    #     "answer": qa_pipeline_response['answer'],
    #     "phase": qa_pipeline_response['answer']['phase'],
    #     "hybrid_result_dict": qa_pipeline_response['hybrid_result_dict']
    # }
    
    # if qa_pipeline_response['answer']['phase'] == 2:
    #     response['hierarchy_generator'] = qa_pipeline_response['hierarchy_generator']
    #     response['wiki_context_creator'] = qa_pipeline_response['wiki_context_creator']
    
    # qa_phase = qa_pipeline_response['answer']['phase']
    
        
    # return response