# Graphene Q&A Chatbot

References: 

[1] Sahota, H. (2023). RAG Evaluation Using LangChain and Ragas. Deci. https://deci.ai/blog/evaluating-rag-pipelines-using-langchain-and-ragas/ 

[2] Julien Simon (2023). Retrieval-Augmented Generation chatbot, part 1: LangChain, Hugging Face, FAISS, AWS [Video]. YouTube. URL https://www.youtube.com/watch?v=7kDaMz3Xnkw&t=240s

### Installing libraries

In [None]:
!pip install -U langchain
!pip install -U openai
!pip install -U ragas
!pip install -U arxiv
!pip install -U pymupdf
!pip install -U chromadb
!pip install -U tiktoken
!pip install -U accelerate
!pip install -U bitsandbytes
!pip install -U datasets
!pip install -U sentence_transformers
!pip install -U FlagEmbedding
!pip install -U ninja
!pip install -U flash_attn --no-build-isolation
!pip install -U tqdm
!pip install -U rank_bm25
!pip install -U transformers
!pip install -U faiss
!pip install -U faiss-gpu
!pip install matplotlib

In [None]:
%pip install --upgrade  boto3 langchain-openai tiktoken python-dotenv

In [None]:
%pip install --upgrade  "amazon-textract-caller>=0.2.0"

In [None]:
!pip install amazon-textract-textractor

In [None]:
import os
import openai
from getpass import getpass

openai.api_key = getpass("Please provide your OpenAI Key: ")
os.environ["OPENAI_API_KEY"] = openai.api_key

### Uploading graphene PDF from S3 bucket into notebook

In [None]:
from langchain_community.document_loaders import AmazonTextractPDFLoader
import boto3

textract_client = boto3.client("textract", region_name="us-east-1")

file_path = "s3://sagemaker-us-east-1-182943155813/graphene-rag/graphenepdfs_full.pdf"
loader = AmazonTextractPDFLoader(file_path, client=textract_client)
all_docs = loader.load()

In [None]:
len (all_docs)

In [None]:
# Checking metadata of corpus

for doc in all_docs:
  print(doc.metadata) 

### Adding embedding model, setting chunk size and overlap and storing it into Chroma DB

In [None]:
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceBgeEmbeddings

model_name = "sentence-transformers/all-MiniLM-L6-v2"

encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity

hf_bge_embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs={'device': 'cuda'},
    encode_kwargs=encode_kwargs
)

text_splitter = RecursiveCharacterTextSplitter(chunk_size=256,
                                               chunk_overlap = 10,
                                               length_function=len)

docs = text_splitter.split_documents(all_docs)

vectorstore = Chroma.from_documents(docs, hf_bge_embeddings)

In [None]:
len(docs)

In [None]:
# Checking maximum chunk size in vector store

print(max([len(chunk.page_content) for chunk in docs]))

### Adding base retriever, checking if it works in retrieving relevant documents from vector store

In [None]:
# Establishing base retriever. Setting the number of chunks retrieved to be 5 

base_retriever = vectorstore.as_retriever(search_kwargs={"k" : 5}) 

In [None]:
relevant_docs = base_retriever.get_relevant_documents("What are the steps to produce graphene?")

In [None]:
len(relevant_docs)

In [None]:
for doc in relevant_docs:
  print(doc.page_content)
  print('\n')

### Putting a system prompt template

In [None]:
# This teaches the LLM how to generate a response - how to 'talk'

from langchain.prompts import ChatPromptTemplate

template = """<human>: You are a materials scientist interested in synthesizing graphene with various methods. Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know':

### CONTEXT
{context}

### QUESTION
Question: {question}

\n

<bot>:
"""

prompt = ChatPromptTemplate.from_template(template)

### Instantiating LLM into the chatbot

In [None]:
from operator import itemgetter
import torch
from langchain.llms.huggingface_pipeline import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GenerationConfig, pipeline
from langchain.chat_models import ChatOpenAI
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained("llmware/dragon-deci-7b-v0",  # Replace with "llmware/dragon/mistral-7b-v0"
                                             quantization_config = quantization_config,
                                             low_cpu_mem_usage=True,
                                             trust_remote_code=True)

tokenizer = AutoTokenizer.from_pretrained("llmware/dragon-deci-7b-v0", # Replace with "llmware/dragon/mistral-7b-v0"
                                          trust_remote_code=True)

generation_config = GenerationConfig(
    max_length=4096,
    temperature=1e-3,
    do_sample=True,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id
)

pipeline = pipeline("text-generation",
                    model=model,
                    tokenizer=tokenizer,
                    max_length=4096,
                    temperature=1e-3,
                    do_sample=True,
                    eos_token_id=tokenizer.eos_token_id,
                    pad_token_id=tokenizer.eos_token_id
                    )

deci_dragon = HuggingFacePipeline(pipeline=pipeline) # Replace with mistral_dragon for labelling purposes

### Creating a RAG chain 

In [None]:
retrieval_augmented_qa_chain = (
    {"context": itemgetter("question") | base_retriever, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": prompt | deci_dragon, "context": itemgetter("context")}
)

### Querying the chatbot with the RAG chain

In [None]:
question = "Provide me a step-by-step process to produce high-quality monolayer graphene. Provide the source."

result = retrieval_augmented_qa_chain.invoke({"question" : question})

print(result['response'])

### Making the synthetic dataset for LLM (OpenAI) evaluation

In [None]:
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser

question_schema = ResponseSchema(
    name="question",
    description="a question about the context."
)

question_response_schemas = [
    question_schema,
]

In [None]:
question_output_parser = StructuredOutputParser.from_response_schemas(question_response_schemas)

format_instructions = question_output_parser.get_format_instructions()

#### Generating questions using GPT-3.5

In [None]:
from langchain_openai import ChatOpenAI
question_generation_llm = ChatOpenAI(model="gpt-3.5-turbo-1106")

bare_prompt_template = "{content}"

bare_template = ChatPromptTemplate.from_template(template=bare_prompt_template)

In [None]:
from langchain.prompts import ChatPromptTemplate

qa_template = """\
You are a materials scientist interested in synthesizing graphene with various methods. For each context, create a question that is specific to the context. Avoid creating generic or general questions.

question: a question about the context.

Format the output as JSON with the following keys:
question

context: {context}
"""

prompt_template = ChatPromptTemplate.from_template(template=qa_template)

messages = prompt_template.format_messages(
    context=docs[0],
    format_instructions=format_instructions
)

question_generation_chain = bare_template | question_generation_llm

response = question_generation_chain.invoke({"content" : messages})

output_dict = question_output_parser.parse(response.content)

In [None]:
for k, v in output_dict.items():
  print(k)
  print(v)

#### Retrieving context from vector store for the dataset

In [None]:
from tqdm import tqdm
import random

random.seed(42)

qac_triples = []

# Randomly select 10 chunks from the initial number of chunks generated earlier
for text in tqdm(random.sample(docs, 10)):
  messages = prompt_template.format_messages(
      context=text,
      format_instructions=format_instructions
  )
  response = question_generation_chain.invoke({"content" : messages})
  try:
    output_dict = question_output_parser.parse(response.content)
  except Exception as e:
    continue
  output_dict["context"] = text
  qac_triples.append(output_dict)

In [None]:
qac_triples[5]

#### Generating ground truths using GPT-4

In [None]:
answer_generation_llm = ChatOpenAI(model="gpt-4-1106-preview", temperature=0)

answer_schema = ResponseSchema(
    name="answer",
    description="an answer to the question"
)

answer_response_schemas = [
    answer_schema,
]

answer_output_parser = StructuredOutputParser.from_response_schemas(answer_response_schemas)

format_instructions = answer_output_parser.get_format_instructions()

qa_template = """\
You are a materials scientist interested in synthesizing graphene with various methods. For each question and context, create an answer.

answer: a answer about the context.

Format the output as JSON with the following keys:
answer

question: {question}
context: {context}
"""

prompt_template = ChatPromptTemplate.from_template(template=qa_template)

messages = prompt_template.format_messages(
    context=qac_triples[0]["context"],
    question=qac_triples[0]["question"],
    format_instructions=format_instructions
)

answer_generation_chain = bare_template | answer_generation_llm

response = answer_generation_chain.invoke({"content" : messages})

output_dict = answer_output_parser.parse(response.content)

In [None]:
for k, v in output_dict.items():
  print(k)
  print(v)

In [None]:
for triple in tqdm(qac_triples):
  messages = prompt_template.format_messages(
      context=triple["context"],
      question=triple["question"],
      format_instructions=format_instructions
  )
  response = answer_generation_chain.invoke({"content" : messages})
  try:
    output_dict = answer_output_parser.parse(response.content)
  except Exception as e:
    continue
  triple["answer"] = output_dict["answer"]

### Combining questions, context and ground truths into a data frame

In [None]:
import pandas as pd
from datasets import Dataset

ground_truth_qac_set = pd.DataFrame(qac_triples)

ground_truth_qac_set["context"] = ground_truth_qac_set["context"].map(lambda x: str(x.page_content))

ground_truth_qac_set = ground_truth_qac_set.rename(columns={"answer" : "ground_truth"})

eval_dataset = Dataset.from_pandas(ground_truth_qac_set)

In [None]:
eval_dataset[0]

### Adding RAGAs metrics into the code

In [None]:
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
    context_relevancy,
    answer_correctness,
    answer_similarity
)

from ragas.metrics.critique import harmfulness
from ragas import evaluate

def create_ragas_dataset(rag_pipeline, eval_dataset):
  rag_dataset = []
  for row in tqdm(eval_dataset):
    answer = rag_pipeline.invoke({"question" : row["question"]})
    rag_dataset.append(
        {"question" : row["question"],
         "answer" : answer["response"],
         "contexts" : [context.page_content for context in answer["context"]],
         "ground_truths" : [row["ground_truth"]]
         }
    )
  rag_df = pd.DataFrame(rag_dataset)
  rag_eval_dataset = Dataset.from_pandas(rag_df)
  return rag_eval_dataset

def evaluate_ragas_dataset(ragas_dataset):
  result = evaluate(
    ragas_dataset,
    metrics=[
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall,
        context_relevancy,
        answer_correctness,
        answer_similarity
    ],
  )
  return result

### Inserting the RAG chain with LLM to generate answers and insertting them into the data frame

In [None]:
from tqdm import tqdm
import pandas as pd

basic_qa_ragas_dataset = create_ragas_dataset(retrieval_augmented_qa_chain, eval_dataset)

In [None]:
basic_qa_ragas_dataset[0]

### Starting the RAG Evaluation

In [None]:
basic_qa_result = evaluate_ragas_dataset(basic_qa_ragas_dataset)

### Plotting RAGAs performance metrics into a bar graph

In [None]:
# @title
import matplotlib.pyplot as plt

def plot_metrics_with_values(metrics_dict, title='RAG Metrics'):
    """
    Plots a bar chart for metrics contained in a dictionary and annotates the values on the bars.

    Args:
    metrics_dict (dict): A dictionary with metric names as keys and values as metric scores.
    title (str): The title of the plot.
    """
    names = list(metrics_dict.keys())
    values = list(metrics_dict.values())

    plt.figure(figsize=(10, 6))
    bars = plt.barh(names, values, color='skyblue')

    # Adding the values on top of the bars
    for bar in bars:
        width = bar.get_width()
        plt.text(width + 0.01,  # x-position
                 bar.get_y() + bar.get_height() / 2,  # y-position
                 f'{width:.4f}',  # value
                 va='center')

    plt.xlabel('Score')
    plt.title(title)
    plt.xlim(0, 1)  # Setting the x-axis limit to be from 0 to 1
    plt.show()

In [None]:
plot_metrics_with_values(basic_qa_result, "Base Retriever ragas Metrics") # Add LLM name in title

### Experimenting with advanced retrievers

In [None]:
def create_qa_chain(retriever, primary_qa_llm):
  created_qa_chain = (
    {"context": itemgetter("question") | retriever,
     "question": itemgetter("question")
    }
    | RunnablePassthrough.assign(
        context=itemgetter("context")
      )
    | {
         "response": prompt | primary_qa_llm,
         "context": itemgetter("context"),
      }
  )

  return created_qa_chain

### Parent Document Retriever

In [None]:
from langchain.retrievers import ParentDocumentRetriever
from langchain.storage import InMemoryStore

parent_splitter = RecursiveCharacterTextSplitter(chunk_size=1536)

child_splitter = RecursiveCharacterTextSplitter(chunk_size=256)

vectorstore = Chroma(collection_name="split_parents", embedding_function=hf_bge_embeddings)

store = InMemoryStore()

In [None]:
parent_document_retriever = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter,
    parent_splitter=parent_splitter,
)

In [None]:
parent_document_retriever.add_documents(docs)

In [None]:
parent_document_retriever_qa_chain = create_qa_chain(parent_document_retriever, deci_dragon)

#### Testing parent document retriever with query

In [None]:
parent_document_retriever_qa_chain.invoke({"question" : "Provide me a step-by-step process to produce high-quality monolayer graphene. Provide the source."})["response"]

##### Making synthetic dataset with parent document retriever in RAG chain

In [None]:
pdr_qa_ragas_dataset = create_ragas_dataset(parent_document_retriever_qa_chain, eval_dataset)

In [None]:
pdr_qa_result = evaluate_ragas_dataset(pdr_qa_ragas_dataset)

In [None]:
plot_metrics_with_values(pdr_qa_result, "Parent Document Retriever ragas Metrics")

### Ensemble Retriever

In [None]:
from langchain.retrievers import BM25Retriever, EnsembleRetriever

text_splitter = RecursiveCharacterTextSplitter(chunk_size=256, chunk_overlap=10)

docs = text_splitter.split_documents(all_docs)

bm25_retriever = BM25Retriever.from_documents(docs)

bm25_retriever.k = 5

vectorstore = Chroma.from_documents(docs, hf_bge_embeddings)

chroma_retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, chroma_retriever], weights=[0.42, 0.58]) # Total weights must equal 1

In [None]:
ensemble_retriever_qa_chain = create_qa_chain(ensemble_retriever, deci_dragon)

#### Testing ensemble retriever with query

In [None]:
ensemble_retriever_qa_chain.invoke({"question" : "Provide me a step-by-step process to produce high-quality monolayer graphene. Provide the source."})["response"]

##### Making synthetic dataset with ensemble retriever in RAG chain

In [None]:
ensemble_qa_ragas_dataset = create_ragas_dataset(ensemble_retriever_qa_chain, eval_dataset)

In [None]:
ensemble_qa_result = evaluate_ragas_dataset(ensemble_qa_ragas_dataset)

In [None]:
plot_metrics_with_values(ensemble_qa_result, "Ensemble Retriever ragas Metrics")