### Test WikiHierarchyBuilder component

In [1]:
from haystack import Document

docs = [Document(id="3f45e988-b093-4d9c-bde1-67e2e4c3b599", content= 'All non-avian dinosaurs and most lineages of birds became extinct in a mass extinction event, called...', meta= {'file_path': 'Dinosaur.html', 'source_id': '93000a3fb02b99d2d115cd4042256d2f5db2a0ff3928927ca14465276534a75e', 'split_id': 234, 'title': 'Dinosaurs', 'h2': 'Extinction of major groups'}),
        Document(id="433ad671-ea22-4ae1-9bc4-1cce06a0e6ee", content= 'Just before the K-Pg extinction event, the number of non-avian dinosaur species that existed globall...', meta= {'file_path': 'Dinosaur.html', 'source_id': '93000a3fb02b99d2d115cd4042256d2f5db2a0ff3928927ca14465276534a75e', 'split_id': 235, 'title': 'Dinosaurs', 'h2': 'Extinction of major groups', 'h3': 'Pre-extinction diversity'})]

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from wiki_hierarchy_builder import WikiHierarchyBuilder

hb = WikiHierarchyBuilder("bolt://localhost:7687", "neo4j", "neo4jpass")

result = hb.run(documents=docs)

result

{'sections_hierarchy': {'Dinosaur': {'title': 'Dinosaur',
   'sections': [{'name': 'Definition',
     'type': 'h2',
     'sections': [{'name': 'General description', 'type': 'h3'},
      {'name': 'Distinguishing anatomical features', 'type': 'h3'}]},
    {'name': 'History of study',
     'type': 'h2',
     'sections': [{'name': 'Pre-scientific history', 'type': 'h3'},
      {'name': 'Early dinosaur research', 'type': 'h3'},
      {'name': 'Discoveries in North America', 'type': 'h3'},
      {'name': '"Dinosaur renaissance" and beyond', 'type': 'h3'},
      {'name': 'Soft tissue and molecular preservation', 'type': 'h3'}]},
    {'name': 'Evolutionary history',
     'type': 'h2',
     'sections': [{'name': 'Origins and early evolution', 'type': 'h3'},
      {'name': 'Evolution and paleobiogeography', 'type': 'h3'}]},
    {'name': 'Classification',
     'type': 'h2',
     'sections': [{'name': 'Taxonomy', 'type': 'h3'},
      {'name': 'Timeline of major groups', 'type': 'h3'}]},
    {'nam

In [3]:
chunks_hierarchy = result['chunks_hierarchy']

chunks_hierarchy

{'Dinosaur': {'title': 'Dinosaur',
  'chunks': ['a143ea79-f5d1-450e-ae15-6f7ce5034d8f',
   'b5bb2eb7-c529-4f14-a4da-99ea0ce636bf',
   'fb334a4d-0c1c-4a4d-98a7-ec1db6a5ecca',
   '298c4052-ebd4-4de9-a15e-49400409b4c6'],
  'sections': [{'name': 'Definition',
    'type': 'h2',
    'chunks': ['0c9e9c40-8387-41cd-a484-1ba20183ddb8',
     'fddc9347-5b5a-441a-828b-46e14f1c5a05',
     'cc1356da-f729-45bb-909e-ad1890e46993'],
    'sections': [{'name': 'General description',
      'type': 'h3',
      'chunks': ['a3e88b32-83b0-439e-bbb3-3d007c047c69',
       'f05fdf12-63e9-466f-9bdf-a3a7a46c1352',
       'dabe7759-3d78-4e0c-a9fe-8488df94c89b']},
     {'name': 'Distinguishing anatomical features',
      'type': 'h3',
      'chunks': ['e3301398-887c-4cc7-a10f-3299dd420a88',
       '567d7f5d-adc4-42c1-9774-1bb172553d8b',
       '7a16aafb-10ec-4552-8eb4-1d7fe2ad2b79',
       'c0bb0ab0-06b4-403d-968d-1bd8e34594bd',
       '1d0a398b-8661-4b31-b4a5-cb7d87e29c78',
       '3bacb7b3-51d1-4963-ba83-3d5f9c3c3

### Test WikiContextCreator component

In [6]:
from wiki_context_creator import WikiContextCreator
from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore

e_store = ElasticsearchDocumentStore(hosts= "http://localhost:9200")

wcc = WikiContextCreator(e_store)

result = wcc.run(chunks_hierarchy=chunks_hierarchy, hierarchy_paths=['{\n  "paths": [\n    {\n      "path": ["Dinosaur", "Extinction of major groups"],\n      "reasoning": "The \'Impact event\' section is likely to provide details on the causes of the extinction of dinosaurs, particularly the asteroid impact that is widely believed to have contributed significantly."\n    },\n    {\n      "path": ["Dinosaur", "Extinction of major groups", "Deccan Traps"],\n      "reasoning": "The \'Deccan Traps\' section may discuss volcanic activity and its effects, which could also be a contributing factor to the extinction of dinosaurs."\n    }\n  ]\n}'])

result

{'context_list': ['Extinction of major groups (h2):\nAll non-avian dinosaurs and most lineages of birds became extinct in a mass extinction event, called the Cretaceous–Paleogene (K-Pg) extinction event, at the end of the Cretaceous period. Above the Cretaceous–Paleogene boundary, which has been dated to 66.038 ± 0.025 million years ago, fossils of non-avian dinosaurs disappear abruptly; the absence of dinosaur fossils was historically used to assign rocks to the ensuing Cenozoic. The nature of the event that caused this mass extinction has been extensively studied since the 1970s, leading to the development of two mechanisms that are thought to have played major roles: an extraterrestrial impact event in the Yucatán Peninsula, along with flood basalt volcanism in India. However, the specific mechanisms of the extinction event and the extent of its effects on dinosaurs are still areas of ongoing research. Alongside dinosaurs, many other groups of animals became extinct: pterosaurs, mar

In [7]:
print(result['context_list'][0])

Extinction of major groups (h2):
All non-avian dinosaurs and most lineages of birds became extinct in a mass extinction event, called the Cretaceous–Paleogene (K-Pg) extinction event, at the end of the Cretaceous period. Above the Cretaceous–Paleogene boundary, which has been dated to 66.038 ± 0.025 million years ago, fossils of non-avian dinosaurs disappear abruptly; the absence of dinosaur fossils was historically used to assign rocks to the ensuing Cenozoic. The nature of the event that caused this mass extinction has been extensively studied since the 1970s, leading to the development of two mechanisms that are thought to have played major roles: an extraterrestrial impact event in the Yucatán Peninsula, along with flood basalt volcanism in India. However, the specific mechanisms of the extinction event and the extent of its effects on dinosaurs are still areas of ongoing research. Alongside dinosaurs, many other groups of animals became extinct: pterosaurs, marine reptiles such as