# Build Your First RAG System

1. Data Ingestion.
2. Indexing.
3. Retriever.
4. Response Synthesizer.
5. Querying.

In [None]:
!pip install llama-index

In [None]:
import os
os.environ['OPENAI_API_KEY'] = None

# Stage 1: Data Ingestion

## 1.1 Data Loaders


In [None]:
#download the file
!mkdir data
!wget https://raw.githubusercontent.com/aravindpai/Speech-Recognition/c9c45731e966592b1805929fc1585c72e1f34f10/dhs.txt -O data/dhs.txt

In [None]:
from llama_index.core import SimpleDirectoryReader
documents = SimpleDirectoryReader("data").load_data()

In [None]:
type(documents)

In [None]:
len(documents)

In [None]:
documents[0]

## Embedding Model

In [None]:
from llama_index.embeddings.openai import OpenAIEmbedding
embed_model = OpenAIEmbedding()
print(embed_model)

## LLM

In [None]:
from llama_index.llms.openai import OpenAI
llm = OpenAI()

# Stage 2: Indexing

In [None]:
from llama_index.core import ServiceContext
service_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embed_model)

In [None]:
from llama_index.core import VectorStoreIndex
index = VectorStoreIndex.from_documents(documents,service_context=service_context)

#Stage 3:Retrieval

In [None]:
retriever = index.as_retriever()

In [None]:
retrieved_nodes = retriever.retrieve("What is the theme of DHS?")

In [None]:
(retrieved_nodes)[0].text

In [None]:
(retrieved_nodes)[1].text

# Stage 4: Response Synthesis


In [None]:
from llama_index.core.response_synthesizers import get_response_synthesizer
response_synthesizer = get_response_synthesizer()

## Stage 5: Query Engine

In [None]:
query_engine = index.as_query_engine(#retriever=retriever,
                                     llm=llm,
                                     #response_synthesizer=response_synthesizer
                                     )

In [None]:
response = query_engine.query("What is the theme of DHS?")

In [None]:
response.response

In [None]:
response.source_nodes[1]

# End to End RAG Pipeline

In [None]:
import os
os.environ['OPENAI_API_KEY'] = None

from llama_index.core import SimpleDirectoryReader,VectorStoreIndex
documents = SimpleDirectoryReader("data").load_data()

llm = OpenAI()
embed_model = OpenAIEmbedding()

service_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embed_model)

index = VectorStoreIndex.from_documents(documents, service_context=service_context)
query_engine = index.as_query_engine()

print(query_engine.query("What is the theme of DHS?").response)

In [None]:
print(query_engine.query("Who is Anand Mishra?").response)