In [None]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

documents = SimpleDirectoryReader("docs").load_data()


index = VectorStoreIndex.from_documents(documents)

query_engine = index.as_query_engine()
print(query_engine)

# query = "summarize each document in a few sentences"
query = "What is the treatment for type 2 diabetes?"
response = query_engine.query(query)

print(response.source_nodes)

print("\nResponse:\n")
print(response.response)


# Print retrieved content
print("\nRetrieved Chunks:")
for i, source_node in enumerate(response.source_nodes):
    print(f"\nChunk {i+1}:")
    print(source_node.node.text)

In [None]:
from llama_index.core import SummaryIndex, Document
from llama_index.core.schema import TextNode

nodes = [
  TextNode(
    text="Lionel Messi is a football player from Argentina."
    ),
  TextNode(
    text="He has won the Ballon d'Or trophy 7 times."
    ),
  TextNode(text="Lionel Messi's hometown is Rosario."),
  TextNode(text="He was born on June 24, 1987.")
]
index = SummaryIndex(nodes)

query_engine = index.as_query_engine()
response = query_engine.query(
    "What is Messi's hometown?"
)
print(response)

In [None]:
from llama_index.core.node_parser import HTMLNodeParser
from llama_index.readers.file import FlatReader
from pathlib import Path

reader = FlatReader()
document = reader.load_data(Path("data_sample/sample.html"))

my_tags = ["p", "span"]  
html_parser = HTMLNodeParser(tags=my_tags)
nodes = html_parser.get_nodes_from_documents(document)

print('<span> elements:')
for node in nodes:
    if node.metadata['tag']=='span':
        print(node.text)
 
print('<p> elements:') 
for node in nodes:
    if node.metadata['tag']=='p':
        print(node.text)


from llama_index.core.node_parser import JSONNodeParser
document = reader.load_data(Path("data_sample/sample.json"))

json_parser = JSONNodeParser.from_defaults()
nodes = json_parser.get_nodes_from_documents(document)

for node in nodes:
    print(f"Metadata {node.metadata} \nText: {node.text}")

In [None]:
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import SimpleDirectoryReader
reader = SimpleDirectoryReader('data_sample')
documents = reader.load_data()
splitter = SentenceSplitter.from_defaults(
    chunk_size=60, 
    chunk_overlap=0, 
    include_metadata=False
)
nodes = splitter.get_nodes_from_documents(
    documents
)

retriever = BM25Retriever.from_defaults(
    nodes=nodes, 
    similarity_top_k=2
)
response = retriever.retrieve("medicine for type 2 diabetes")
for node_with_score in response:
    print('Text:'+node_with_score.node.text)
    print('Score: '+str(node_with_score.score))


index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()
response = query_engine.query("cause of type 2 diabetes")

print("\nResponse:\n")
print(response.response)


# Print retrieved content
print("\nRetrieved Chunks:")
for i, source_node in enumerate(response.source_nodes):
    print(f"\nChunk {i+1}:")
    print(source_node.node.text)
