# Basic RAG Pipeline

### Load and Split Document

In [None]:
from langchain_community.document_loaders import TextLoader

from langchain_text_splitters import CharacterTextSplitter

raw_documents = TextLoader('./apple.txt', encoding='utf-8').load()
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)

### Store Documents in Vector Database

In [None]:
from langchain_community.embeddings import LlamafileEmbeddings
from langchain_community.vectorstores import Qdrant

embedding_model = LlamafileEmbeddings()

db = Qdrant.from_documents(documents, embedding_model, path="./local_qdrant", collection_name="apple_basic")

### Query Vector Database

In [None]:
# query = "In what ways did Martina's sense of aesthetics influence the design of the computer's exterior?"
query = "How did Timmy and Martina manage to make the computer both easy to use and powerful with just old parts?"

docs = db.similarity_search_with_score(query)

only_text_results = [result[0].page_content for result in docs]

for result in docs:
    print(f'Score: {result[1]}')
    print(f'Text:\n{result[0].page_content}\n\n')

### RAG

In [None]:
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

retriever = db.as_retriever()

template = """Answer the following question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

model_name = 'gpt-3.5-turbo-0125'
model = ChatOpenAI(model_name = model_name)

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

chain.invoke(query)