In [None]:
# !pip install llama-index



In [1]:
import os
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    load_index_from_storage,
    StorageContext,
)

from dotenv import load_dotenv
from os import environ
load_dotenv()

True

### Set Up Api Key


In [2]:
OPENAI_API_KEY = environ.get("OPENAI_API_KEY")
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

### Load Data Files

In [3]:
filename_fn = lambda filename: {"file_name": filename}
# automatically sets the metadata of each document according to filename_fn
documents = SimpleDirectoryReader(
    input_files=["./data/01.pdf","./data/02.pdf","./data/03.pdf","./data/04.pdf","./data/05.pdf"], file_metadata=filename_fn
).load_data()

In [5]:
print(f"Loaded {len(documents)} docs")

Loaded 91 docs


### Save Indexing

In [6]:
# check if storage already exists
PERSIST_DIR = "./storage"
if not os.path.exists(PERSIST_DIR):
    index = VectorStoreIndex.from_documents(documents)
    # store it for later
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
  # rebuild storage context
  storage_context = StorageContext.from_defaults(persist_dir="storage")
  # load index
  index = load_index_from_storage(storage_context=storage_context)

### Retrieval

In [7]:
query_engine = index.as_query_engine(response_mode="tree_summarize", similarity_top_k=5)

In [8]:
response = query_engine.query("how strong must the spinal column be?")

In [9]:
response.response

'The spinal column must be strong enough to support the trunk and extremities, protect the spinal cord and cauda equina, anchor muscles, and allow for movement of the head and trunk in multiple directions.'

In [10]:
#explore which nodes were selected for the answer
for node_with_score in response.source_nodes:
    print(node_with_score.node)

Node ID: 1171a1e8-d743-4afa-a6d8-05cd55bc36aa
Text: Anatomy and Examination of the Spine Michael W. Devereaux, MDa,b
aDepartment of Neurology, University Hospitals of Cleveland/Case
Western Reserve University, 11100 Euclid Avenue, Cleveland, OH 44106,
USA bUniversity Hospitals Health System, Richmond Heights Hospital
27100 Chardon Road, Richmond Heights, OH 44143-1116, USA A review of
the anatomy...
Node ID: fa377596-72fe-478f-bd0e-e5110f628164
Text: assess whether the spine is balanced. A spine is said to be
balanced clinically when the head is centred over the pubicsymphysis
in the coronal plane and centred over the femoralhead in the sagittal
plane ( Figure 1 ). In the coronal plane a weighted string or plumb
line can be used to further assess bal-ance. The string should be
pressed agains...
Node ID: 936d20ac-9c3c-433b-a4f1-b47b9ba51cd9
Text: The spinal canal itself is formed posterolaterally by the
laminae and lig- amentum ﬂavum, anterolaterally by the pedicles, and
anteriorly by t

In [11]:
#explore which files were selected for the answer
for node_with_score in response.source_nodes:
    print(node_with_score.node.metadata)

{'page_label': '331', 'file_name': 'data\\02.pdf'}
{'page_label': '351', 'file_name': 'data\\03.pdf'}
{'page_label': '332', 'file_name': 'data\\02.pdf'}
{'page_label': '350', 'file_name': 'data\\03.pdf'}
{'page_label': '468', 'file_name': 'data\\04.pdf'}
