# RAG-based OLLO QNA

In [None]:
!pip install --upgrade --quiet  gpt4all

In [None]:
!pip install --upgrade langchain pydantic

In [None]:
!pip install -q -U google-generativeai

In [None]:
!pip install streamlit

In [None]:
from langchain.document_loaders import DirectoryLoader
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import GPT4AllEmbeddings
import os
import shutil
from langchain.evaluation import load_evaluator
from pydantic import BaseModel, GetJsonSchemaHandler
from langchain.prompts import ChatPromptTemplate
import google.generativeai as genai
from langchain.chains import ConversationChain
import streamlit as st

In [24]:
# from langchain.chains.conversation.memory import ConversationBufferMemory
from langchain.chains.conversation.memory import ConversationSummaryMemory
# from langchain.chains.conversation.memory import ConversationBufferWindowMemory
# from langchain.chains.conversation.memory import ConversationSummaryBufferMemory

In [5]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="langchain")

### create chunks from documents

In [6]:
DATA_PATH = "data_sources"

In [7]:
def load_documents():
    loader = DirectoryLoader(DATA_PATH, glob="*.pdf")
    documents = loader.load()
    return documents

In [8]:
documents = load_documents()

In [9]:
def split_text(documents: list[Document]):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=300,
        chunk_overlap=100,
        length_function=len,
        add_start_index=True,
    )
    
    chunks = text_splitter.split_documents(documents)
    
    print(f"Split {len(documents)} documents into {len(chunks)} chunks.")
    
    document = chunks[3]
    print(document.page_content)
    print(document.metadata)
    
    return chunks

In [None]:
doc_chunks = split_text(documents)

In [None]:
doc_chunks

### save chunks to vector database

In [12]:
CHROMA_PATH = "chroma"

model_name = "all-MiniLM-L6-v2.gguf2.f16.gguf"
gpt4all_kwargs = {'allow_download': 'True'}
gpt4all_embeddings = GPT4AllEmbeddings(
    model_name=model_name,
    gpt4all_kwargs=gpt4all_kwargs
)
    
def save_to_chroma(chunks: list[Document]):
    # clear previous db
    if os.path.exists(CHROMA_PATH):
        shutil.rmtree(CHROMA_PATH)

    # create db
    db = Chroma.from_documents(
        doc_chunks, gpt4all_embeddings, persist_directory=CHROMA_PATH
    )
    
    print(f"Saved {len(chunks)} chunks to {CHROMA_PATH}.")

In [13]:
save_to_chroma(doc_chunks)

Saved 19 chunks to chroma.


### query the closest texts from the db

In [None]:
# use the same embedding function
model_name = "all-MiniLM-L6-v2.gguf2.f16.gguf"
gpt4all_kwargs = {'allow_download': 'True'}
gpt4all_embeddings = GPT4AllEmbeddings(
    model_name=model_name,
    gpt4all_kwargs=gpt4all_kwargs
)

# prepare the db
db = Chroma(persist_directory=CHROMA_PATH,
            embedding_function=gpt4all_embeddings)

In [15]:
query_text = "should i hire her as a data scientist?"

In [16]:
results = db.similarity_search_with_relevance_scores(query_text, k=3)

In [None]:
results

### create the prompt with my custom data

In [64]:
PROMPT_TEMPLATE = """
Given the following context and a question, generate an answer based on this context only.
If the answer is not found in the context, kindly state "I don't know." Don't try to make up an answer.

CONTEXT: {context}

QUESTION: {question}
"""

In [65]:
# PROMPT_TEMPLATE = """
# Answer the question based only on the following context:

# {context}

# ---

# Answer the question based on the above context: {question}
# """

In [19]:
context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
prompt = prompt_template.format(context=context_text, question=query_text)

In [None]:
print(prompt)

### prompt the LLM

In [21]:
with open('GEMINI_KEY.txt', 'r') as file:
    exec(file.read().strip())

In [None]:
genai.configure(api_key=API_KEY)
model = genai.GenerativeModel("gemini-1.5-flash")
response = model.generate_content(prompt)
print(response.text)

In [None]:
!pip install --upgrade --quiet  langchain-google-genai

In [29]:
from langchain_google_genai import ChatGoogleGenerativeAI

In [30]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=API_KEY)

In [43]:
result = llm.invoke(prompt)

In [None]:
print(result.content)

In [None]:
conversation_with_summary = ConversationChain(
    llm=llm,
    memory=ConversationSummaryMemory(llm=llm),
    verbose=True
)

In [None]:
response_with_summary = conversation_with_summary.invoke(response)

### prompt with history

In [48]:
# from langchain_openai import ChatOpenAI
# llm = ChatOpenAI(openai_api_key=openai_api_key)

# convert the output of the chatmodel into pure text
from langchain_core.output_parsers import StrOutputParser
output_parser = StrOutputParser

In [51]:
# retriever takes the question then compares with numeric vectors in the db and return the similar text
retriever = db.as_retriever()

In [None]:
!pip install langchain-core

In [57]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

instruction_to_system = """
Given a chat history and the latest user question
which might reference context in the chat history, formulate a standalone question
which can be understood without the chat history. Do NOT answer the question,
just reformulate it if needed and otherwise return it as is.
"""

question_maker_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", instruction_to_system),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

question_chain = question_maker_prompt | llm | StrOutputParser