## Get started with LlamaIndex

In [None]:
!pip install llama-index langchain pypdf
!pip install -U langchain-community
!pip install llama_index.llms.huggingface
!pip install sentence-transformers
!pip install llama-index-embeddings-langchain
!pip install llama-index-llms-gemini

In [None]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

In [None]:
from llama_index.core import ServiceContext, StorageContext,load_index_from_storage, PromptHelper
from llama_index.core.node_parser import SimpleNodeParser
from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings
#from llama_index.embeddings import LangchainEmbedding
from langchain.embeddings import HuggingFaceEmbeddings
#from llama_index.llms import HuggingFaceInferenceAPI
from llama_index.llms.gemini import Gemini
from llama_index.llms.huggingface.base import HuggingFaceInferenceAPI
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine

## Create parser and parse document into nodes

In [None]:
import os

In [None]:
PERSIST_DIR = "./storage"

#if not os.path.exists(PERSIST_DIR):
documents = SimpleDirectoryReader("/content/data").load_data()
parser = SimpleNodeParser()
nodes = parser.get_nodes_from_documents(documents)
'''
llm = HuggingFaceInferenceAPI(
    model_name="google/flan-t5-xxl", token=HF_TOKEN
)
'''

os.environ['GOOGLE_API_KEY'] = 'AIzaSyCKnwTG-kvt2B6vH0OBMRdUhhsP3lZHGic'


llm = Gemini(model="models/gemini-1.0-pro", temperature=0.9)
embed_model = HuggingFaceEmbeddings(model_name="thenlper/gte-large")

prompt_helper = PromptHelper(
    context_window=500,
    #num_output=256,
    chunk_overlap_ratio=0.1,
    chunk_size_limit=None,
)

service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=llm, chunk_size=512, prompt_helper=prompt_helper)
storage_context = StorageContext.from_defaults() #vector store
index = VectorStoreIndex(
    nodes,
    service_context=service_context,
    storage_context=storage_context,
)
index.storage_context.persist(persist_dir=PERSIST_DIR)
'''
else:
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)
'''

  service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=llm, chunk_size=512, prompt_helper=prompt_helper)


'\nelse:\n    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)\n    index = load_index_from_storage(storage_context)\n'

In [None]:
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=10,
)

In [None]:
len(nodes)

2856

In [None]:
len(nodes[99].text)

1643

In [None]:
#query_engine = index.as_query_engine()

query_engine = RetrieverQueryEngine(
    retriever=retriever,
)
response = query_engine.query("who was Chani?")

In [None]:
response

In [None]:
print(response)

Chani is the daughter of Liet and is a Fremen. She is also a Sayyadina, which is a consecrated woman in the Fremen culture.


In [None]:
len(response.source_nodes[2].text)

2215

In [None]:
print(response.source_nodes[2].text)

In [None]:
response

In [None]:
response_2 = query_engine.query("How Stilgar dies?")

In [None]:
print(response_2)

This context does not mention anything about Stilgar's death, so I cannot answer this question from the provided context.


In [None]:
response_2