In [1]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.ollama import Ollama

## Load data

In [2]:
# https://docs.llamaindex.ai/en/stable/module_guides/loading/simpledirectoryreader/ 
documents = SimpleDirectoryReader("data").load_data()

In [27]:
# Loads data into Document objets
print(documents[0])

Doc ID: 8865b5a0-5a92-4f87-96b1-6ff2d4c7eee6
Text: What I Worked On  February 2021  Before college the two main
things I worked on, outside of school, were writing and programming. I
didn't write essays. I wrote what beginning writers were supposed to
write then, and probably still are: short stories. My stories were
awful. They had hardly any plot, just characters with strong feelings,
which I ...


## Embedding config

In [28]:
# bge-base embedding model
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")

## LLM connection

In [10]:
# ollama
Settings.llm = Ollama(model="llama3.1", request_timeout=360.0)

## Building an index

In [29]:
# This builds an index over the documents in the data folder 
# (which in this case just consists of the essay text, but could contain many documents).

# https://docs.llamaindex.ai/en/stable/module_guides/indexing/vector_store_index/
# "By default, VectorStoreIndex stores everything in memory"

index = VectorStoreIndex.from_documents(
    documents,
)

## Querying data

In [30]:
# Enginer to query the index

# https://docs.llamaindex.ai/en/stable/module_guides/deploying/query_engine/
# "If you want to have a conversation with your data (multiple back-and-forth instead of a single question & answer), 
# take a look at Chat Engine"


query_engine = index.as_query_engine()

In [31]:
# https://docs.llamaindex.ai/en/stable/understanding/querying/querying/
# " querying is just a prompt call to an LLM"
# Querying consists of three distinct stages:
# 1. Retrieval is when you find and return the most relevant documents for your query from your Index. As previously discussed in indexing, the most common type of retrieval is "top-k" semantic retrieval, but there are many other retrieval strategies.
# 2. Postprocessing is when the Nodes retrieved are optionally reranked, transformed, or filtered, for instance by requiring that they have specific metadata such as keywords attached.
# 3. Response synthesis is when your query, your most-relevant data and your prompt are combined and sent to your LLM to return a response.

response = query_engine.query("What did the author do growing up?")
print(response)

The author worked on writing short stories outside of school and also tried to write programs on an IBM 1401 computer in their early teenage years.
