# Imports

In [None]:
from langchain.retrievers import ParentDocumentRetriever
from langchain_chroma import Chroma
from langchain.storage import InMemoryStore
from langchain_openai import ChatOpenAI
from langchain_text_splitters import RecursiveCharacterTextSplitter
import pickle
from langchain_huggingface import HuggingFaceEmbeddings
import os
import keys
import pandas as pd
from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
from bs4 import BeautifulSoup as Soup
from tqdm import tqdm
import pandas as pd
import ast


os.environ["OPENAI_API_KEY"] = keys.OPENAI_API_KEY
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", max_retries=50)
embeddings = HuggingFaceEmbeddings(model_name="avsolatorio/GIST-Embedding-v0")

# Corpus Generation

In [None]:
# url = "https://docs.streamlit.io/"
# loader = RecursiveUrlLoader(url=url, max_depth=5, extractor=lambda x: Soup(x, "html.parser").text)
# docs = loader.load()
# for i in docs:
#     if "content_type" in i.metadata:
#         del i.metadata["content_type"]
#     if "language" in i.metadata:
#         del i.metadata["language"]
#     if "description" in i.metadata:
#         del i.metadata["description"]
# pickle.dump(docs, open("docs", "wb"))


In [None]:
docs = pickle.load(open("docs", 'rb'))
docs = docs[-9:]


In [None]:
# text_splitter = RecursiveCharacterTextSplitter(
#     chunk_size=1000, chunk_overlap=200)
# splits = text_splitter.split_documents(docs)
# db = Chroma.from_documents(docs, embeddings, persist_directory="./chroma_db")
# db = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)

# Question Generation

In [None]:
from ragas.run_config import RunConfig
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context, conditional
from tqdm import tqdm
testset = []

config = RunConfig(max_retries=50, max_wait=30)

for i in tqdm(range(0, len(docs), 10)):
    generator = TestsetGenerator.from_langchain(llm, llm, embeddings)
    testset += [generator.generate_with_langchain_docs(docs[i:i+10], test_size=10, distributions={
        simple: 0.1, reasoning: 0.3, multi_context: 0.3, conditional: 0.3}, is_async=False, raise_exceptions=False, with_debugging_logs=True, run_config=config).to_pandas()]
    testset[-1].to_csv(f"{i+448795792}.csv", index=None)

pd.concat(testset).to_csv("questions2.csv", index=None)

# RAG Parent Child Retriever

In [None]:
vectorstore = Chroma(
    collection_name="full_documents", embedding_function=embeddings, persist_directory="./parent"
)
# The storage layer for the parent documents
store = InMemoryStore()
retriever = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=RecursiveCharacterTextSplitter(chunk_size=400),
    parent_splitter=RecursiveCharacterTextSplitter(chunk_size=1000)

)

retriever.add_documents(docs)


# Chain Building

In [None]:
from rag import setup
from langchain.schema import HumanMessage, AIMessage
chain, rag_chain = setup()


In [None]:
messages = []
prompt = "What is the function decorator used to cache functions that return data in Streamlit?"

def generate_answer(prompt):
    messages = [{"role": "user", "content": prompt}]
    rag_context = rag_chain.invoke(prompt)
    history = [HumanMessage(content=i["content"]+"\n") if i["role"] == "user" else AIMessage(
        content=i["content"]+"\n") for i in messages]
    response = chain.invoke({"rag_context": rag_context, "chat_history": history})

    return response

# Answer Generation

In [None]:
df = pd.read_csv("questions.csv")
df.head()

In [None]:
answer = []
for i in tqdm(range(len(df))):
    question = df["question"].iloc[i]
    answer += [generate_answer(question)]
    
df["answer"] = answer
df.to_csv("question_answer.csv", index=None)

# Evaluation

In [None]:
df = pd.read_csv("question_answer.csv")
df["contexts"] = df["contexts"].apply(lambda x: ast.literal_eval(x))
df["metadata"] = df["metadata"].apply(lambda x: ast.literal_eval(x))
df.head()

In [None]:
from ragas import evaluate
from ragas.metrics import answer_relevancy, AnswerCorrectness
import datasets
results = []

# AnswerCorrectness(weights=[1,0]): 100% Factual answer correctness
# answer_relevancy: Semantic Similarity

metrics = [AnswerCorrectness(weights=[1,0]),answer_relevancy]
for i in tqdm(range(len(df))):
    results += [evaluate(datasets.Dataset.from_pandas(df[i:i+1]),metrics=metrics, llm=llm, embeddings=embeddings).to_pandas()]


In [81]:
results_df = pd.concat(results)
results_df.to_csv("results.csv", index=None)
metric_names = [i.name for i in metrics]
for i in results_df.columns:
    if i in metric_names:
        print("Metric:", i, "\tValue:", results_df[i].mean())

Metric: answer_correctness 	Value: 0.4608059358059359
Metric: answer_relevancy 	Value: 0.8482784726386126
