In [12]:
# Retrieval Augmented Generation
from dotenv import load_dotenv
import os
load_dotenv()

True

In [13]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [14]:
from llama_index import VectorStoreIndex , SimpleDirectoryReader

# Retrieval Augmented Generation
documents = SimpleDirectoryReader("data").load_data()

In [15]:
documents[0]

Document(id_='070fb73f-be16-4f01-89eb-53aadcf248b1', embedding=None, metadata={'page_label': '1', 'file_name': 'attnetion.pdf', 'file_path': 'data/attnetion.pdf', 'file_type': 'application/pdf', 'file_size': 2215244, 'creation_date': '2024-01-30', 'last_modified_date': '2024-01-30', 'last_accessed_date': '2024-01-30'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.comNoam Shazeer∗\nGoogle Brain\nnoam@google.comNiki Parmar∗\nGoogle Research\nnikip@google.comJakob Uszkoreit∗\nGoogle Research\nusz@google.com\

In [16]:
index = VectorStoreIndex.from_documents(documents,show_progress=True)

Parsing nodes:   0%|          | 0/25 [00:00<?, ?it/s]

Parsing nodes: 100%|██████████| 25/25 [00:00<00:00, 503.73it/s]
Generating embeddings: 100%|██████████| 36/36 [00:02<00:00, 16.54it/s]


In [17]:
index

<llama_index.indices.vector_store.base.VectorStoreIndex at 0x7f0abacd1300>

In [18]:
query_engine = index.as_query_engine()

In [24]:
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.indices.postprocessor import SimilarityPostprocessor

retriever = VectorIndexRetriever(index=index, similarity_top_k=4)
postprocessor = SimilarityPostprocessor(similarity_cutoff=0.8)
query_engine = RetrieverQueryEngine(retriever=retriever,
                                    node_postprocessors=[postprocessor])

In [25]:
response = query_engine.query("What is Attention is all you need")

In [26]:
print(response)

The paper "Attention Is All You Need" proposes a new network architecture called the Transformer. This architecture is based solely on attention mechanisms and does not use recurrent or convolutional neural networks. The paper demonstrates that the Transformer models achieve superior quality in machine translation tasks while being more parallelizable and requiring less training time compared to existing models. The paper also shows that the Transformer generalizes well to other tasks, such as English constituency parsing.


In [27]:
from llama_index.response.pprint_utils import pprint_response
pprint_response(response,show_source=True)

Final Response: The paper "Attention Is All You Need" proposes a new
network architecture called the Transformer. This architecture is
based solely on attention mechanisms and does not use recurrent or
convolutional neural networks. The paper demonstrates that the
Transformer models achieve superior quality in machine translation
tasks while being more parallelizable and requiring less training time
compared to existing models. The paper also shows that the Transformer
generalizes well to other tasks, such as English constituency parsing.
______________________________________________________________________
Source Node 1/1
Node ID: 37016653-36d5-406d-aa41-a0e0b0fd5fa8
Similarity: 0.8242520844390897
Text: Provided proper attribution is provided, Google hereby grants
permission to reproduce the tables and figures in this paper solely
for use in journalistic or scholarly works. Attention Is All You Need
Ashish Vaswani∗ Google Brain avaswani@google.comNoam Shazeer∗ Google
Brain noam@googl

In [29]:
import os.path
from llama_index.response.pprint_utils import pprint_response
from llama_index import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage,
)

# check if storage already exists
PERSIST_DIR = "./storage"
if not os.path.exists(PERSIST_DIR):
    # load the documents and create the index
    documents = SimpleDirectoryReader("data").load_data()
    index = VectorStoreIndex.from_documents(documents)
    # store it for later
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
    # load the existing index
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)

# either way we can now query the index
query_engine = index.as_query_engine()
response = query_engine.query("What are transformers?")
pprint_response(response)

Final Response: Transformers are a model architecture that rely
entirely on an attention mechanism to draw global dependencies between
input and output. They eschew recurrence and do not use sequence-
aligned recurrent neural networks or convolutional neural networks.
Transformers allow for significantly more parallelization and have
been shown to achieve state-of-the-art results in tasks such as
translation.
