# SIMPLE RAG
here we want to get 5 differnet files generated by AI and then saved the embeddings in a pgvector vectorbase via a docker file and then try to have our chatbot based on them.
volume_name: - pgvector_RAG_chp2data


## 1- Ingestion process
![Ingestion process](ingest.png)

### 1-1 Import 5 different docs 
* 1. Fairy story: Alice Enchanted Heart
* 2. Blood Pressure and Heart Attack
* 3. Medal of olympic games in 2012
* 4. star movie facts and interesting points
* 5. Tesla revenue 2023

In [None]:
# use langchain different loaders to load documents from various sources
from langchain_community.document_loaders import PyPDFLoader,TextLoader,UnstructuredMarkdownLoader

doc_1 = PyPDFLoader("./1_Alice_story.pdf").load()
doc_2 = PyPDFLoader("./2_Medical_paper.pdf").load()
doc_3 = TextLoader("./3_olympics2012.txt").load()
doc_4 = UnstructuredMarkdownLoader("./4_star_war.md").load()
doc_5 = PyPDFLoader("./5_TeslaRevenue2023.pdf").load()

### 1-2 splitt docs in chunks

In [None]:
# use langchain text splitter to split the documents into smaller chunks
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pprint import pprint


text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
#Combine all documents into a single list of texts(with concatenation)
docs = doc_1 + doc_2 + doc_3 + doc_4 + doc_5

# Split the documents into smaller chunks
splitted_texts = text_splitter.split_documents(docs)

# outputs
pprint(f"The number of original documents: {len(docs)} and after splitting there are {len(splitted_texts)} of chunks.")
pprint("Here is an example of a splitted text chunk metadata:")
pprint(splitted_texts[10].metadata)

### 1-3 Embedding the chunks

In [None]:
# using ollama as embedding model
from langchain_ollama import OllamaEmbeddings

embedding_model = OllamaEmbeddings(model='llama3.1:latest')

# the pgvector will do the embeddings for us! so next section will do it
# embeddings = embedding_model.embed_documents([chunk.page_content for chunk in splitted_texts])

# print(f"The dimension of the embeddings matrix are(rows*columns): {len(embeddings)}*{len(embeddings[0])}")

### 1-4 Save in a vector database

In [None]:
#now first we need a docker to build the vector database.
# we use pgvector dokcer (create a docker-compose.yml file and write these:)

# services:
#   pgvector:
#     image: pgvector/pgvector:pg16
#     container_name: pgvector-container
#     environment:
#       POSTGRES_USER: langchain
#       POSTGRES_PASSWORD: langchain
#       POSTGRES_DB: langchain
#       POSTGRES_HOST_AUTH_METHOD: md5
#     ports:
#       - "6024:5432"
#     volumes:
#       - pgvector_RAG_chp2data:/var/lib/postgresql/data

# volumes:
#   pgvector_RAG_chp2data:

In [None]:
# # =====================Error handling====================================
# #clean the documents from null characters
# def clean_doc(doc):
#     doc.page_content = doc.page_content.replace("\x00", "")
#     return doc
# cleaned_docs = [clean_doc(d) for d in splitted_texts]
# # ===================================================================

In [None]:
# from langchain_postgres.vectorstores import PGVector

# connection = "postgresql+psycopg://langchain:langchain@127.0.0.1:6024/langchain"
# db = PGVector.from_documents(cleaned_docs, embedding_model, connection=connection)


In [None]:
db.similarity_search("What is the name of the village where Alice lives?", k=1)

In [None]:
# call vectore database again to re-use the existing store, no reinsertion
from langchain_postgres.vectorstores import PGVector
from langchain_ollama import OllamaEmbeddings

connection = "postgresql+psycopg://langchain:langchain@127.0.0.1:6024/langchain"
embedding_model = OllamaEmbeddings(model='llama3.1:latest')

# Later, second time (re-use existing store, no reinsertion)
db = PGVector.from_existing_index(embedding=embedding_model, connection=connection)

## 2- RAG system


In [2]:
#invoke the db vectorebase to do similarity search again
from langchain_postgres.vectorstores import PGVector
from langchain_ollama import OllamaEmbeddings

connection = "postgresql+psycopg://langchain:langchain@127.0.0.1:6024/langchain"
embedding_model = OllamaEmbeddings(model='llama3.1:latest')

# Later, second time (re-use existing store, no reinsertion)
db = PGVector.from_existing_index(embedding=embedding_model, connection=connection)

In [9]:
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import chain

llm_model =ChatOllama(model='llama3.1:latest')
prompt = ChatPromptTemplate.from_template("""Answer the question short based on the following retrieved documents:
                                           and if you don't know the answer say you I don't know.
                                           Context: {context}
                                           Question: {question}
                                           Answer: """)
retriever = db.as_retriever(search_kwargs={"k": 10})

@chain
def q_a(input):
    #fetch releavante doc for the question
    docs = retriever.invoke(input)
    #format prompt
    formatted = prompt.invoke({"context": docs, "question": input})
    #generate answer
    answer = llm_model.invoke(formatted)
    return answer

q_a.invoke(input="who wins the olympics 2012? ")


AIMessage(content='The United States topped the medal table at London 2012, winning 46 gold medals, 29 silver medals, and 29 bronze medals, for a total of 104 medals.', additional_kwargs={}, response_metadata={'model': 'llama3.1:latest', 'created_at': '2026-02-10T16:11:00.773938825Z', 'done': True, 'done_reason': 'stop', 'total_duration': 1260135223, 'load_duration': 61405010, 'prompt_eval_count': 2032, 'prompt_eval_duration': 593477488, 'eval_count': 39, 'eval_duration': 582585056, 'model_name': 'llama3.1:latest'}, id='lc_run--019c4852-0039-7122-b227-f0bcbd6fa448-0', tool_calls=[], invalid_tool_calls=[], usage_metadata={'input_tokens': 2032, 'output_tokens': 39, 'total_tokens': 2071})