# Retrieval Augmented Generation (RAG)

In [None]:
from langchain_community.document_loaders import TextLoader
from langchain_community.chat_models import ChatOllama
from langchain_core.documents import Document
from pypdf import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_openai import OpenAI
import numpy as np

In [None]:
# functions to read documents from different resources

def read_pdf(pdf_file):
    reader = PdfReader(pdf_file)
    pdf_texts = [p.extract_text().strip() for p in reader.pages if p.extract_text()]
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    docs = [Document(page_content=p) for p in pdf_texts]
    splits = text_splitter.split_documents(docs)
    return splits

def read_weblink(url):
    loader = WebBaseLoader(url)
    docs = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    splits = text_splitter.split_documents(docs)
    return splits

def read_local_text_file(filename):
    loader = TextLoader(filename)
    docs = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    splits = text_splitter.split_documents(docs)
    return splits

# Embeddings

In [None]:
# embeddings = OllamaEmbeddings(model="mixtral")
embeddings = GPT4AllEmbeddings()
# embeddings = HuggingFaceEmbeddings()

In [None]:
# embedding and saving to vector database
def embedding_documents(documents):
    vectorstore = Chroma.from_documents(documents=documents, embedding=embeddings, persist_directory="./chroma_db")

# embedding only
def embedding_text(text):
    return embeddings.embed_query(text)

In [None]:
# retrieve documents from vector database

def retrieve_documents(question):
    chromadb = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)
    retriever = chromadb.as_retriever(search_type="similarity")
    docs = retriever.invoke(question)
    return docs

# Retrieval Augmented Generation functions

In [None]:
def rag(query, retrieved_documents):
    messages = [
        {
            "role": "system",
            "content": "You are an assistant. You will be shown the user's question, and the relevant information. Respond according to the provided information"
        },
        {
            "role": "user",
            "content": f"Question: {query}. \n Information: {retrieved_documents}"
        }
    ]
    # print(messages)
    llm = ChatOllama(model="mixtral")
    response = llm.invoke(messages)
    return response.content


def rag_openai(query, retrieved_documents):
    messages = [
        {
            "role": "system",
            "content": "You are an assistant. You will be shown the user's question, and the relevant information. Respond according to the provided information"
        },
        {
            "role": "user",
            "content": f"Question: {query}. \n Information: {retrieved_documents}"
        }
    ]
    llm = OpenAI(openai_api_key="API_KEY")
    response = llm.invoke(messages)
    return response.content

In [None]:
# step 1: save document to vector database
# docs = read_local_text_file('./docs/demo.txt')
# docs = read_pdf('./docs/demo.pdf')
docs = read_weblink('https://docs.scala-lang.org/overviews/scala-book/prelude-taste-of-scala.html')
embedding_documents(docs)
print(len(docs))
print(docs[0])

In [None]:
# step 2: retrieve and call LLM
search_query = 'what is the difference of var and val?'
related_documents = retrieve_documents(search_query)
output = rag(query=search_query, retrieved_documents=related_documents)
print(search_query)
print(output)

In [None]:
# test cosine of 2 vectors

from numpy.linalg import norm

text = 'income'
embedding_result1 = embedding_text(text)
# print(embedding_result1)

embedding_result2 = embedding_text('revenue')
# print(embedding_result2)

cosine = np.dot(embedding_result1, embedding_result2) / (norm(embedding_result1) * norm(embedding_result2))
print(cosine)

In [None]:
# test embeddings performance

test_embeddings = OllamaEmbeddings(model="mixtal")
# test_embeddings = GPT4AllEmbeddings()
# test_embeddings = HuggingFaceEmbeddings()

import time
docs = read_local_text_file('./docs/demo.txt')
text = docs[0].page_content
start_time = time.time()
query_result = test_embeddings.embed_query(text)
print(len(query_result))
end_time = time.time()
print("Running time: ", end_time - start_time)