## 0) Packages

In [None]:
!pip install -q torch transformers
!pip install -q accelerate bitsandbytes
!pip install -q sentence-transformers
!pip install -q faiss-gpu
# !pip install -q faiss
# !pip install -q faiss-cpu

!pip install --upgrade --quiet  cohere
!pip install pypdf datasets
!pip install -q langchain
!pip install ragas==0.1.4
# !pip install torch torchvision -U

In [1]:
import getpass
import os
from getpass import getpass
from utils.utils_print import pretty_print_docs
from utils.utils_dset import get_knet_all_urls, get_knet_urls_recursive
import pandas as pd

# for Google Colab users:
# import locale
# locale.getpreferredencoding = lambda: "UTF-8"

# import torch
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# print('Running on', device)

In [None]:
# # If required:
# ACCESS_TOKEN_GITHUB = getpass("Github Personal Token: ")
# ACCES_TOKEN_HugF = getpass("HuggingFace Token: ")

In [None]:
os.environ["COHERE_API_KEY"] = getpass("Cohere API Key:")

## 1) Dataset

In [None]:
# PDF Text Loader

# # from langchain.document_loaders import PyPDFLoader, DirectoryLoader
# # # loader = TextLoader('single_text_file.pdf')
# # loader = DirectoryLoader('./pdf_sources/',
# #                          glob="./*.pdf",
# #                          loader_cls=PyPDFLoader
# #                          )

# # documents = loader.load()
# # len(documents)

In [None]:
### Load Dataset
from datasets import load_dataset
from langchain.document_loaders.csv_loader import CSVLoader

DSET_SOURCE = "XXXXX"

qa_docs = load_dataset(DSET_SOURCE)

# create Q&A in format of CSV with DF
df_docs = pd.DataFrame.from_dict(qa_docs)
print("DataFrame length:",len(df_docs))
df_docs.to_csv('df_qa.csv', index=False)


### Get context and load as docs
df_contexts = df_docs.drop(['answer',"ground_truth", "question"],axis=1)
df_contexts.to_csv('df_contexts.csv', index=False)

loader_contexts = CSVLoader(file_path='./df_answers.csv')
docs = loader_contexts.load()
print("Docs length:",len(docs))
docs[:5]


In [None]:
# 1) Questions
questions_dset = df_docs["questions"]

# 2) Labels
ground_truths_dset = df_docs["ground_truth"]

# 3) LLM Answers
# 4) Retrivere Model's Contexts
answers_rag = []
contexts_rag = []

## 2) Parameter Test

In [10]:
import pandas as pd
import tqdm
from langchain_core.runnables import RunnablePassthrough
from datasets import Dataset

from ragas.metrics import (
    answer_relevancy,
    context_relevancy,
    context_recall,
    context_precision,
    faithfulness,
)

# select metrics to evaluate
METRICS_RAGAS = [
            # context_relevancy,
            context_precision,
            context_recall,
            # faithfulness,
            answer_relevancy,
          ]



In [None]:
# import torch
import pandas as pd
from itertools import product

# from optimizer_funcs import get_chunked_docs, get_emb_model, get_db_retriever, get_model_tokenizer, create_hf_pipeline
# from optimizer_funcs import create_comp_retriever, create_llm_chain_with_prompt, evaluate_answers

CHUNK_SIZES = [512, 1024]
SEARCH_KWARGS = [2, 4, 8, 16]
LLM_TEMPERATURES = [0.01, 0.2, 0.5]
RERANK_MODES = [True]

device = "cuda"
emb_model_name = "BAAI/bge-base-en"          # "BAAI/bge-large-en", "thenlper/gte-small"
lm_model_name = "HuggingFaceH4/zephyr-7b-alpha"

eval_scores = []

for opt_param in tqdm.tqdm(product(CHUNK_SIZES, 
                            SEARCH_KWARGS, 
                            LLM_TEMPERATURES,
                            RERANK_MODES
                            )):
    
    CHUNK_SIZE = opt_param[0]
    SEARCH_KWARG = opt_param[1]
    LM_TEMPERATURE = opt_param[2]
    RERANK_MODE = opt_param[3]

    print(f"CHUNK:{CHUNK_SIZE}  SEARCH:{SEARCH_KWARG}  TEMP:{LM_TEMPERATURE} RERANK_MODE:{RERANK_MODE}")

    # # 1- text splitter & chunk creator
    chunked_docs = get_chunked_docs(CHUNK_SIZE, docs)

    # # 2- create embedding model
    embeddings = get_emb_model(emb_model_name, device)

    # # 3- create DB & retriever
    db_faiss, retriever = get_db_retriever(chunked_docs, embeddings, SEARCH_KWARG)

    # # 4- create LM and tokenizer
    model, tokenizer = get_model_tokenizer(lm_model_name)

    # # 5- create HuggingFace pipeline
    llm = create_hf_pipeline(model, tokenizer, LM_TEMPERATURE)

    # # 6- create compression retriever with Cohere Rerank
    comp_retriever = create_comp_retriever(os.environ["COHERE_API_KEY"], retriever)

    # # 7 - create LLM chain
    llm_chain = create_llm_chain_with_prompt(llm)
    if RERANK_MODE:
        rag_chain = {"context": comp_retriever, "question": RunnablePassthrough()} | llm_chain
    if RERANK_MODE==False:
        rag_chain = {"context": retriever, "question": RunnablePassthrough()} | llm_chain


    # # 8 - get answers from RAG
    result_testset = evaluate_answers(questions_dset, 
                                      ground_truths_dset,
                                      rag_chain, 
                                      comp_retriever, 
                                      llm,
                                      embeddings, 
                                      METRICS_RAGAS)


    # # 9 - add to list to create DF 
    # # change desired metrics
    eval_scores.append({
            'Chunk Size': CHUNK_SIZE,
            'Search Kwargs': SEARCH_KWARG,
            'Temperature': LM_TEMPERATURE,
            'Rerank Mode':RERANK_MODE,
            'Context Relevancy': result_testset["context_relevancy"],
            'Context Precision': result_testset["context_precision"],
            'Context Recall': result_testset["context_recall"],
            # 'Faithfulness': result_testset["faithfulness"],
            'Answer Relevancy': result_testset["answer_relevancy"],            
        })
    
    
    # torch.cuda.empty_cache()


In [41]:
eval_results_df = pd.DataFrame(eval_scores,columns=['Chunk Size','Search Kwargs','Temperature','Rerank Mode',
                                                #  'Context Relevancy',
                                                   'Context Precision',
                                                   'Context Recall',
                                                #    'Faithfulness',
                                                   'Answer Relevancy'])
eval_results_df.to_csv('eval_results.csv')

eval_scores_df_sorted = eval_results_df.sort_values("Answer Relevancy", ascending=False)
eval_scores_df_sorted.to_csv('eval_results_sorted.csv')


In [42]:
eval_results_df

Unnamed: 0,Chunk Size,Search Kwargs,Temperature,Rerank Mode,Context Precision,Context Recall,Answer Relevancy
0,512,2,0.01,True,0.5,1.0,0.9979
1,512,2,0.2,True,0.5,1.0,0.9979
2,512,2,0.5,True,0.5,1.0,0.9979
3,512,4,0.01,True,0.5,1.0,0.9979
4,512,4,0.2,True,0.5,1.0,0.9979
5,512,4,0.5,True,0.5,1.0,0.9979
6,512,8,0.01,True,0.5,1.0,0.9979
7,512,8,0.2,True,0.5,1.0,0.9979
8,512,8,0.5,True,0.5,1.0,0.9979
9,512,16,0.01,True,0.5,1.0,0.9979


## Additional

In [None]:
!nvidia-smi
torch.cuda.empty_cache()
!nvidia-smi