# **Welcome to OpenLLM Day 1.0**

## Presented by Petavue, sponsored by MongoDB & Boldcap. Partnered with Bright Academy and AI Brains

#### Retriever-Augmented Generation (RAG) with LLMs

# **Dependencies**

This section installs all the necessary dependencies required for our notebook.

In [2]:
# Install necessary packages
%%capture
!pip install pymongo
!pip install datasets
!pip install transformers
!pip install faiss-cpu
!pip install ragas
!pip install cohere

!pip install openai
!pip install llama_index
!pip install llama-index-vector-stores-mongodb

# **Imports**

This section imports all the necessary libraries and modules required for the functionalities we will implement in this notebook.

In [3]:
#import modules which are needed
import os
import pandas as pd
import json
import time
from datetime import datetime, timezone
import asyncio
from openai import AsyncOpenAI
from ast import literal_eval

#mongo connection imports
import pymongo
from typing import List
from llama_index.core.schema import NodeWithScore
from tqdm.asyncio import tqdm
import cohere
import certifi

import nest_asyncio
nest_asyncio.apply()

#llamaIndex package imports
from llama_index.core import Document
from llama_index.core.schema import MetadataMode
from llama_index.core.settings import Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core import Document, VectorStoreIndex
from transformers import AutoModelForSequenceClassification, AutoTokenizer

#Ragas package imports
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
    answer_correctness
)
from datasets import Dataset
from ragas import evaluate

# **Setting-up the Environment**

In [56]:
#Read env variables
OPENAI_KEY = "**************"    #openai api key
MONGO_CONNECTION_STRING = 'mongodb+srv://**************@cluster0.k4om2il.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0'   #mongodb connection string(personal)
MONGO_DB_NAME = 'RAG'
MONGO_COLLECTION_NAME = 'Basic_Rag_implementation'
MONGO_INDEX_NAME = 'vector_index'
cohere_api = '******************'    #cohere api key

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **Dataset**

In this section, we load the dataset that will be used for our experiments.


In [5]:
csv_file = './Data_set/RAG_event_dataset.csv'   #csv file that contains dataset for rag technique

# **No RAG**

In [6]:
#output column list and output dataframe
columns=['RAG_technique','Embedding_technique','Chunk_size','Top_k', 'Transcript', 'Question_type', 'Question', 'Expected_answer', 'Top_k_context', 'llm_response','Top_k_context_list']
os.environ["OPENAI_API_KEY"] = OPENAI_KEY
rag_technique='no_rag'
execution_file_location=f'./{rag_technique}_execution-log.jsonl'
max_tokens_op = 250

In [8]:
#10 transcript Q and A dataset
qa_df = pd.read_csv(csv_file)
qa_df=qa_df.head(3)
qa_df

Unnamed: 0.1,Unnamed: 0,date,q,ticker,transcript,longName,industry,sector,single_hop_question,single_hop_answer,multiple_hop_question,multiple_hop_answer,fact_checking_question,fact_checking_answer,mcq_question,mcq_answer
0,0,"Jan 17, 2024, 4:30 p.m. ET",Q4 2023 Earnings Call,KMI,Prepared Remarks:\n\nOperator\nWelcome to the ...,"Kinder Morgan, Inc.",Oil & Gas Midstream,Energy,What was the dividend declared by Kinder Morga...,The company declared a dividend of $0.2825 per...,What opportunities does Kinder Morgan see for ...,Kinder Morgan sees extensive opportunities to ...,What was the total amount spent on share repur...,"According to the context, Kinder Morgan spent ...",What was the leverage ratio (net debt to adjus...,B) 4.2 times
1,1,"Feb 28, 2024, 11:00 a.m. ET",Q4 2023 Earnings Call,DVN,"Prepared Remarks:\n\nOperator\nHello, everyone...",Devon Energy Corporation,Oil & Gas E&P,Energy,What was one of the standout performance areas...,"According to Clay Gaspar, a standout performan...",What are some of the strategic priorities that...,"According to Rick Muncrief, Devon Energy's str...","According to the context, what was Devon Energ...","According to the context, Devon Energy deliver...","According to Clay Gaspar, which of the followi...",(Option D) All of the above


In [9]:
#Function to save log to jsonl file
def log(data, log_file_path):
    with open(log_file_path, "a") as json_file:
        json.dump(data, json_file)
        json_file.write("\n")

In [10]:
#Function to call gpt-4 for response generation
async def llm_response_generation(query):
    prompt= '''
    answer the following query with your prior knowledge,
    Query: {query_str}
    Answer: '''.replace('{query_str}', query)
    model_name = 'gpt-4o'
    data_to_log = {
                "model": model_name,
            }

    client = AsyncOpenAI()
    req = [
                {
                    "role": "system",
                    "content": prompt
                },
            ]
    response_time_start = datetime.now(timezone.utc)
    open_ai_response = await client.chat.completions.create(
                model=model_name,
                messages=req,
                max_tokens=max_tokens_op
            )
    response_time_end = datetime.now(timezone.utc)

    llm_response_content = open_ai_response.choices[0].message.content

    llm_response_tokens = open_ai_response.usage.completion_tokens
    llm_prompt_tokens = open_ai_response.usage.prompt_tokens

    data_to_log['response'] = str(open_ai_response)
    data_to_log['input_token'] = llm_prompt_tokens
    data_to_log['output_token'] = llm_response_tokens
    data_to_log['response_time_start'] = response_time_start.strftime("%Y-%m-%d %H:%M:%S")
    data_to_log['response_time_end'] = response_time_end.strftime("%Y-%m-%d %H:%M:%S")
    data_to_log['llm_response_content'] = llm_response_content

    return data_to_log , llm_response_content

In [11]:
#Fuction to run through all the questions for the dataframe
async def main():
    question_list=['single_hop_question', 'multiple_hop_question', 'fact_checking_question', 'mcq_question']
    final_df=pd.DataFrame(columns=columns)

    print(f"statring to run the scripts for {rag_technique}")
    for indx, row in qa_df.iterrows():
        print(f"Running Transcript {indx}")
        Question=[]
        Expected_answer=[]
        llm_response=[]
        Question_type=[]

        for question in question_list:
            print(f"    Running Question: {question}")
            query=row[question]
            (data_to_log , llm_response_content)= await llm_response_generation(query)

            Question.append(query)
            Expected_answer.append(row[question.replace('question','answer')])
            llm_response.append(llm_response_content)
            Question_type.append(question.replace('_question',''))

            log(data_to_log, execution_file_location)

        RAG_technique=[rag_technique]*4
        Transcript=[row['transcript']]*4
        temp_df=pd.DataFrame(columns=columns)
        temp_df['Question']=Question
        temp_df['Expected_answer']=Expected_answer
        temp_df['llm_response']=llm_response
        temp_df['Question_type']=Question_type
        temp_df['RAG_technique']=RAG_technique
        temp_df['Transcript']=Transcript
        temp_df['Top_k_context']=""
        temp_df['Top_k_context_list'] = [""]*4

        final_df=pd.concat([final_df,temp_df], axis=0)

    final_df.to_csv(f'./{rag_technique}_results.csv',index=False)

asyncio.run(main())

statring to run the scripts for no_rag
Running Transcript 0
    Running Question: single_hop_question
    Running Question: multiple_hop_question
    Running Question: fact_checking_question
    Running Question: mcq_question
Running Transcript 1
    Running Question: single_hop_question
    Running Question: multiple_hop_question
    Running Question: fact_checking_question
    Running Question: mcq_question


In [12]:
test_df=pd.read_csv(f'./no_rag_results.csv')
no_rag_df=test_df[['Question', 'Expected_answer', 'llm_response']]
no_rag_df

Unnamed: 0,Question,Expected_answer,llm_response
0,What was the dividend declared by Kinder Morga...,The company declared a dividend of $0.2825 per...,"I'm sorry, but I don't have the specific infor..."
1,What opportunities does Kinder Morgan see for ...,Kinder Morgan sees extensive opportunities to ...,"Kinder Morgan, a prominent energy infrastructu..."
2,What was the total amount spent on share repur...,"According to the context, Kinder Morgan spent ...","As of my last update in October 2023, I do not..."
3,What was the leverage ratio (net debt to adjus...,B) 4.2 times,"I'm sorry, but I don't have access to real-tim..."
4,What was one of the standout performance areas...,"According to Clay Gaspar, a standout performan...",One of the standout performance areas for Devo...
5,What are some of the strategic priorities that...,"According to Rick Muncrief, Devon Energy's str...","Devon Energy, as a major player in the oil and..."
6,"According to the context, what was Devon Energ...","According to the context, Devon Energy deliver...","According to the information, Devon Energy's p..."
7,"According to Clay Gaspar, which of the followi...",(Option D) All of the above,D. All of the above


# **Connecting and pushing data to MongoDB**

In [13]:
#function to create a object to connect with mongoDB
def get_mongo_client(mongo_uri):
    try:
        client = pymongo.MongoClient(mongo_uri, tlsCAFile=certifi.where())
        print("Connection to MongoDB successful")
        return client
    except pymongo.errors.ConnectionFailure as e:
        print(f"Connection failed: {e}")
        return None

In [18]:
#api keys, connection strings and other constants
os.environ["OPENAI_API_KEY"] = OPENAI_KEY
connection_string = MONGO_CONNECTION_STRING
#DB name and collection name
DB_NAME=MONGO_DB_NAME
COLLECTION_NAME=MONGO_COLLECTION_NAME
chunk_size = 512
indexName = MONGO_INDEX_NAME
dimension = 3072

In [18]:
#Setting all embedding model and llm for inference
embed_model = OpenAIEmbedding(model="text-embedding-3-large", dimensions=dimension)
llm = OpenAI()
Settings.llm = llm
Settings.embed_model = embed_model

In [19]:
#MongoDB connection
mongo_client = get_mongo_client(connection_string)
db = mongo_client[DB_NAME]
collection = db[COLLECTION_NAME]

Connection to MongoDB successful


In [43]:
dataset_df = pd.read_csv(csv_file)
dataset_df=dataset_df[['transcript']]

# Convert the DataFrame to a JSON string representation
documents_json = dataset_df.to_json(orient='records')
# Load the JSON string into a Python list of dictionaries
documents_list = json.loads(documents_json)
llama_documents = []

In [44]:
for document in documents_list:
    document["transcript"] = json.dumps(document["transcript"])
    llama_document = Document(text=document["transcript"])
    llama_documents.append(llama_document)

In [49]:
sample_transcript=llama_documents[0].text
sample_transcript_part=sample_transcript[:5000]
sample_transcript_part

'"Prepared Remarks:\\r\\n\\r\\nOperator\\r\\nWelcome to the quarterly earnings conference call. At this time, all participants are in a listen-only mode until the question-and-answer session of today\'s conference. [Operator instructions] I would like to inform all parties that today\'s conference is being recorded. If you have any objections, you may disconnect at this time.\\r\\nI would now like to turn the conference over to Mr. Rich Kinder, executive chairman of Kinder Morgan. Thank you. You may begin.\\r\\nRich Kinder -- Executive Chairman\\r\\nThank you, Sheila. Before we begin, as usual, I\'d like to remind you that KMI\'s earnings release today and this call includes forward-looking statements within the meaning of the Private Securities Litigation Reform Act of 1995 and the Securities and Exchange Act of 1934 as well as certain non-GAAP financial measures. Before making any investment decisions, we strongly encourage you to read our full disclosures on forward-looking statemen

In [50]:
#nodes what has to be pushed to the MongoDB server
parser = SentenceSplitter(chunk_size=chunk_size)
nodes = parser.get_nodes_from_documents(llama_documents)

In [52]:
nodes[0].text

'"Prepared Remarks:\\r\\n\\r\\nOperator\\r\\nWelcome to the quarterly earnings conference call. At this time, all participants are in a listen-only mode until the question-and-answer session of today\'s conference. [Operator instructions] I would like to inform all parties that today\'s conference is being recorded. If you have any objections, you may disconnect at this time.\\r\\nI would now like to turn the conference over to Mr. Rich Kinder, executive chairman of Kinder Morgan. Thank you. You may begin.\\r\\nRich Kinder -- Executive Chairman\\r\\nThank you, Sheila. Before we begin, as usual, I\'d like to remind you that KMI\'s earnings release today and this call includes forward-looking statements within the meaning of the Private Securities Litigation Reform Act of 1995 and the Securities and Exchange Act of 1934 as well as certain non-GAAP financial measures. Before making any investment decisions, we strongly encourage you to read our full disclosures on forward-looking statemen

In [53]:
nodes[1].text

"Kim and the management team will be taking you through our '24 budget in great detail at the investor conference next week. In my remarks on these calls over the last few quarters, I've tried to outline the tremendous growth that we and most energy experts expect in natural gas production and demand over the coming years, driven primarily by LNG exports and exports to Mexico. To the obvious relief of all of you on this call, I won't be repeating the details supporting our outlook, but that growth is leading to extensive opportunities to grow our system, which already delivers about 40% of the nation's natural gas throughput. Through selective expansion and extension of our enormous system, we can benefit from this expansion.\\r\\n\\r\\nShould you invest $1,000 in Kinder Morgan right now?\\r\\nBefore you buy stock in Kinder Morgan, consider this:\\r\\nThe Motley Fool Stock Advisor analyst team just identified what they believe are the 10 best stocks for investors to buy now... and Kind

In [None]:
for node in tqdm(nodes):
    node_embedding = embed_model.get_text_embedding(
        node.get_content(metadata_mode="all")
    )
    node.embedding = node_embedding

100%|██████████| 437/437 [02:00<00:00,  3.62it/s]


In [None]:
#pushing contents to the MongoDB server
vector_store = MongoDBAtlasVectorSearch(mongo_client, db_name=DB_NAME, collection_name=COLLECTION_NAME, index_name=indexName)
vector_store.add(nodes)

['cd8f34c8-d08b-4164-ae89-ffe689362907',
 '69ade516-d07c-45a5-afd9-a0dcd53ceb80',
 '3933e2ab-e8dc-4faa-90aa-bc4f6edd7f86',
 '1f9ebd7e-4b7a-4dc6-8e09-972c83abb5f9',
 'd2eb6adc-f028-449e-85ff-68da1e498e61',
 '3a8e3393-c787-4865-b545-a0b53dfade07',
 'e1697d80-0fe9-4372-84db-ef993647d8c4',
 'd3f06ec4-ab1a-4311-802d-372a90da7633',
 '77239c7a-bda5-434e-8114-b981615f7770',
 '955afe30-dd53-4ac7-b05a-5774c5990f0e',
 '6562e6e0-3ada-4673-b2ea-d1f1673e5d0f',
 '4f1d47e8-ba24-43fa-84ac-2bfe84400807',
 'fbbbb2a7-2111-4c20-a366-a48ed7c988df',
 '5ca72151-7990-404e-809e-a214a5788ff0',
 '037414e4-3848-40b4-bf02-bfada1b486cc',
 '781672d5-c892-47a6-a73b-93391fcceeff',
 'd2cc6ea0-93fa-4cc3-8535-4cf53aedeed5',
 'cce1dc9f-857d-4e64-8576-8b6ee12c3f4a',
 '30c0d96d-2672-485d-a327-179b25b57d29',
 'd33c2662-9742-424a-b78f-9939cf4ec1a0',
 '2026f8b0-040e-4ff4-aaf5-5d407fbf57bd',
 'aae97805-45d8-4776-9355-1961524b22f3',
 'a21e2b59-4843-48af-8f90-e7d2ac408cb4',
 '3639dc45-1bfa-4583-8d3f-376343428719',
 '3316b8be-b332-

# **Basic RAG**

This section implements the basic version of RAG, where we use FAISS for retrieval and BERT for generation.


In [14]:
rag_technique='basic_rag'
execution_file_location=f'./{rag_technique}_execution-log.jsonl'
chunk_size=512 #tokens
top_k=4
embedding_techique="text-embedding-3-large"
max_tokens_op = 250

In [15]:
#Read the dataset
qa_df = pd.read_csv(csv_file)
qa_df = qa_df.head(3)

In [19]:
#Setting all embedding model and llm for inference
embed_model = OpenAIEmbedding(model=embedding_techique, dimensions=3072)
llm = OpenAI()
Settings.llm = llm
Settings.embed_model = embed_model

In [20]:
#MongoDB connection
mongo_client = get_mongo_client(connection_string)
db = mongo_client[DB_NAME]
collection = db[COLLECTION_NAME]

vector_store = MongoDBAtlasVectorSearch(mongo_client, db_name=DB_NAME, collection_name=COLLECTION_NAME, index_name=indexName)
index = VectorStoreIndex.from_vector_store(vector_store)

Connection to MongoDB successful


In [21]:
#Function to retrive top_k relevent chunks from mongoDB
def get_topk_context(query):
    topk_context =""
    topk_context_list = []
    similarity_score= []

    retriever = VectorIndexRetriever(
        index=index,
        similarity_top_k=top_k,
    )

    contexts = retriever.retrieve(query)
    # similarity_score=[context.get_score() for context in contexts]

    for context in contexts:
        topk_context+=context.text
        topk_context_list.append(context.text)
    return topk_context, topk_context_list

#Function to save log to jsonl file
def log(data, log_file_path):
    with open(log_file_path, "a") as json_file:
        json.dump(data, json_file)
        json_file.write("\n")


In [22]:
#Function to call gpt-4 for response generation
async def llm_response_generation(query, top_k_context):
    prompt= '''Context information is below.
    ---------------------
    {context_str}
    ---------------------
    Given the context information and not prior knowledge,
    answer the query.
    Query: {query_str}
    Answer: '''.replace('{context_str}', top_k_context).replace('{query_str}', query)
    model_name = 'gpt-4o'
    data_to_log = {
                "model": model_name,
            }


    client = AsyncOpenAI()
    req = [
                {
                    "role": "system",
                    "content": prompt
                },
            ]
    response_time_start = datetime.now(timezone.utc)
    open_ai_response = await client.chat.completions.create(
                model=model_name,
                messages=req,
                max_tokens=max_tokens_op
            )
    response_time_end = datetime.now(timezone.utc)

    llm_response_content = open_ai_response.choices[0].message.content

    llm_response_tokens = open_ai_response.usage.completion_tokens
    llm_prompt_tokens = open_ai_response.usage.prompt_tokens

    data_to_log['response'] = str(open_ai_response)
    data_to_log['input_token'] = llm_prompt_tokens
    data_to_log['output_token'] = llm_response_tokens
    data_to_log['response_time_start'] = response_time_start.strftime("%Y-%m-%d %H:%M:%S")
    data_to_log['response_time_end'] = response_time_end.strftime("%Y-%m-%d %H:%M:%S")
    data_to_log['llm_response_content'] = llm_response_content

    return data_to_log , llm_response_content

In [23]:
#Fuction to run through all the questions for the dataframe
async def main():
    question_list=['single_hop_question', 'multiple_hop_question', 'fact_checking_question', 'mcq_question']
    final_df=pd.DataFrame(columns=columns)
    print(f"statring to run the scripts for {rag_technique} with top_k: {top_k}")
    for indx, row in qa_df.iterrows():
        print(f"Running Transcript {indx}")
        Question=[]
        Expected_answer=[]
        Top_k_context=[]
        llm_response=[]
        Question_type=[]
        Top_k_context_list = []

        for question in question_list:
            print(f"    Running Question: {question}")
            query=row[question]
            top_k_context,top_k_context_list=get_topk_context(query)
            (data_to_log , llm_response_content)= await llm_response_generation(query, top_k_context)

            Question.append(query)
            Expected_answer.append(row[question.replace('question','answer')])
            Top_k_context.append(top_k_context)
            Top_k_context_list.append(top_k_context_list)
            llm_response.append(llm_response_content)
            Question_type.append(question.replace('_question',''))

            log(data_to_log, execution_file_location)

        RAG_technique=[rag_technique]*4
        Chunk_size=[chunk_size]*4
        Top_k=[top_k]*4
        Embedding_technique=[embedding_techique]*4
        Transcript=[row['transcript']]*4
        temp_df=pd.DataFrame(columns=columns)
        temp_df['Question']=Question
        temp_df['Expected_answer']=Expected_answer
        temp_df['Top_k_context']=Top_k_context
        temp_df['llm_response']=llm_response
        temp_df['Question_type']=Question_type
        temp_df['RAG_technique']=RAG_technique
        temp_df['Chunk_size']=Chunk_size
        temp_df['Top_k']=Top_k
        temp_df['Embedding_technique']=Embedding_technique
        temp_df['Transcript']=Transcript
        temp_df['Top_k_context_list'] = Top_k_context_list

        final_df=pd.concat([final_df,temp_df], axis=0)

    final_df.to_csv(f'./{rag_technique}_results.csv',index=False)

asyncio.run(main())

statring to run the scripts for basic_rag with top_k: 4
Running Transcript 0
    Running Question: single_hop_question
    Running Question: multiple_hop_question
    Running Question: fact_checking_question
    Running Question: mcq_question
Running Transcript 1
    Running Question: single_hop_question
    Running Question: multiple_hop_question
    Running Question: fact_checking_question
    Running Question: mcq_question


In [24]:
basic_rag_df=pd.read_csv('./basic_rag_results.csv')
basic_rag_df=basic_rag_df[['llm_response']]
basic_rag_df=basic_rag_df.rename(columns={'llm_response': 'basic_rag_llm_response'})

comp_noRAG_vs_basicRAG=pd.concat([no_rag_df,basic_rag_df],axis=1)
comp_noRAG_vs_basicRAG

Unnamed: 0,Question,Expected_answer,llm_response,basic_rag_llm_response
0,What was the dividend declared by Kinder Morga...,The company declared a dividend of $0.2825 per...,"I'm sorry, but I don't have the specific infor...",The dividend declared by Kinder Morgan for the...
1,What opportunities does Kinder Morgan see for ...,Kinder Morgan sees extensive opportunities to ...,"Kinder Morgan, a prominent energy infrastructu...",Kinder Morgan sees several opportunities for e...
2,What was the total amount spent on share repur...,"According to the context, Kinder Morgan spent ...","As of my last update in October 2023, I do not...",Kinder Morgan spent a total of $522 million on...
3,What was the leverage ratio (net debt to adjus...,B) 4.2 times,"I'm sorry, but I don't have access to real-tim...",B) 4.2 times
4,What was one of the standout performance areas...,"According to Clay Gaspar, a standout performan...",One of the standout performance areas for Devo...,One of the standout performance areas for Devo...
5,What are some of the strategic priorities that...,"According to Rick Muncrief, Devon Energy's str...","Devon Energy, as a major player in the oil and...",Some of the strategic priorities that will dri...
6,"According to the context, what was Devon Energ...","According to the context, Devon Energy deliver...","According to the information, Devon Energy's p...",Devon Energy's production growth rate in 2023 ...
7,"According to Clay Gaspar, which of the followi...",(Option D) All of the above,D. All of the above,D. All of the above


# **RAG Fusion**

This section implements the RAG Fusion model for retrieval-augmented generation.


In [46]:
rag_technique='RAG_Fusion'
execution_file_location=f'./{rag_technique}_execution-log.jsonl'
chunk_size=512 #tokens
top_k=50
top_c=6
embedding_techique="text-embedding-3-large"

In [47]:
qa_df = pd.read_csv(csv_file)
qa_df = qa_df.head(3)

In [48]:
#Setting all embedding model and llm for inference
embed_model = OpenAIEmbedding(model=embedding_techique, dimensions=3072)
llm = OpenAI()
Settings.llm = llm
Settings.embed_model = embed_model

In [49]:
#MongoDB connection
mongo_client = get_mongo_client(connection_string)
db = mongo_client[DB_NAME]
collection = db[COLLECTION_NAME]

vector_store = MongoDBAtlasVectorSearch(mongo_client, db_name=DB_NAME, collection_name=COLLECTION_NAME, index_name=indexName)
index = VectorStoreIndex.from_vector_store(vector_store)

Connection to MongoDB successful


In [50]:
# Reciprocal Rank Fusion algorithm
def fuse_results(results_dict, similarity_top_c=top_c):
    """Fuse results."""
    k = 60.0  # `k` is a parameter used to control the impact of outlier rankings.
    fused_scores = {}
    text_to_node = {}

    # compute reciprocal rank scores
    for nodes_with_scores in results_dict.values():
        for rank, node_with_score in enumerate(
            sorted(nodes_with_scores, key=lambda x: x.score or 0.0, reverse=True)):
            text = node_with_score.node.get_content()
            text_to_node[text] = node_with_score
            if text not in fused_scores:
                fused_scores[text] = 0.0
            fused_scores[text] += 1.0 / (rank + k)

    # sort results
    reranked_results = dict(sorted(fused_scores.items(), key=lambda x: x[1], reverse=True))

    # adjust node scores
    reranked_nodes: List[NodeWithScore] = []
    for text, score in reranked_results.items():
        reranked_nodes.append(text_to_node[text])
        reranked_nodes[-1].score = score

    return reranked_nodes[:similarity_top_c]

In [51]:
#Function to retrive top_k relevent chunks from mongoDB
async def get_topk_context(query,mcq=False):
    topk_context =""
    topk_context_list = []
    async def generate_queries(original_query):
        client=AsyncOpenAI()
        response =await client.chat.completions.create(
            model="gpt-4o",
            messages=[
            {"role": "system", "content": "You are a helpful assistant that creates multiple queries with the same meaning as a provided input query."},
            {"role": "user", "content": f"Generate multiple queries with same meaning as the following query: {original_query}."},
            {"role": "user", "content": "OUTPUT (3 queries):"}            ]
        )

        generated_queries = response.choices[0].message.content.strip().split("\n")
        generated_queries = [s for s in generated_queries if s.strip()]
        return generated_queries

    if mcq:
        qn=query.strip().split('\n')[0]
        op=query.strip().split('\n')[1:]
        options="\n".join(op)
        queries = await generate_queries(qn)
        for i in range(len(queries)):
            queries[i] += options
    else:
        queries =await generate_queries(query)

    async def run_queries(queries, retrievers):
        """Run queries against retrievers."""
        tasks = []
        for query in queries:
            for i, retriever in enumerate(retrievers):
                tasks.append(retriever.aretrieve(query))
        task_results = await tqdm.gather(*tasks)

        results_dict = {}
        for i, (query, query_result) in enumerate(zip(queries, task_results)):
            results_dict[(query, i)] = query_result

        return results_dict

    vector_retriever = VectorIndexRetriever(index=index,similarity_top_k=top_k,)

    results_dict = await run_queries(queries, [vector_retriever])

    final_results = fuse_results(results_dict)

    for context in final_results:
        topk_context+=context.text
        topk_context_list.append(context.text)

    return topk_context, topk_context_list,queries

In [52]:
#Function to call gpt-4 for response generation
async def llm_response_generation(query, top_k_context):
    prompt= '''Context information is below.
    ---------------------
    {context_str}
    ---------------------
    Given the context information and not prior knowledge,
    answer the query.
    Query: {query_str}
    Answer: '''.replace('{context_str}', top_k_context).replace('{query_str}', query)
    model_name = 'gpt-4o'
    data_to_log = {
                "model": model_name,
            }

    client = AsyncOpenAI()
    req = [
                {
                    "role": "system",
                    "content": prompt
                },
            ]
    response_time_start = datetime.now(timezone.utc)
    open_ai_response = await client.chat.completions.create(
                model=model_name,
                messages=req,
                max_tokens=250
            )
    response_time_end = datetime.now(timezone.utc)

    llm_response_content = open_ai_response.choices[0].message.content

    llm_response_tokens = open_ai_response.usage.completion_tokens
    llm_prompt_tokens = open_ai_response.usage.prompt_tokens

    data_to_log['response'] = str(open_ai_response)
    data_to_log['input_token'] = llm_prompt_tokens
    data_to_log['output_token'] = llm_response_tokens
    data_to_log['response_time_start'] = response_time_start.strftime("%Y-%m-%d %H:%M:%S")
    data_to_log['response_time_end'] = response_time_end.strftime("%Y-%m-%d %H:%M:%S")
    data_to_log['llm_response_content'] = llm_response_content

    return data_to_log , llm_response_content

In [53]:
#Fuction to run through all the questions for the dataframe

async def main():
    #output column list and output dataframe
    columns=['RAG_technique','Embedding_technique','Chunk_size','Top_k', 'Transcript', 'Question_type', 'Question', 'Expected_answer', 'Top_k_context', 'llm_response','Top_k_context_list']
    final_df=pd.DataFrame(columns=columns)

    question_list=['single_hop_question', 'multiple_hop_question', 'fact_checking_question', 'mcq_question']
    for indx, row in qa_df.iterrows():
        print(f"Running Transcript {indx}")
        Question=[]
        Expected_answer=[]
        Top_k_context=[]
        llm_response=[]
        Question_type=[]
        Top_k_context_list = []

        generated_query=[]

        for question in question_list:
            print(f"    Running Question: {question}")
            query=row[question]
            if question=='mcq_question':
                top_k_context,top_k_context_list,queries=await get_topk_context(query,True)
            else:
                top_k_context,top_k_context_list,queries=await get_topk_context(query,False)

            generated_query.append(queries)

            (data_to_log , llm_response_content)=await llm_response_generation(query, top_k_context)

            Question.append(query)
            Expected_answer.append(row[question.replace('question','answer')])
            Top_k_context.append(top_k_context)
            Top_k_context_list.append(top_k_context_list)
            llm_response.append(llm_response_content)
            Question_type.append(question.replace('_question',''))

            log(data_to_log, execution_file_location)

        RAG_technique=[rag_technique]*4
        Chunk_size=[chunk_size]*4
        Top_k=[top_k]*4
        Embedding_technique=[embedding_techique]*4
        Transcript=[row['transcript']]*4
        temp_df=pd.DataFrame(columns=columns)
        temp_df['Question']=Question
        temp_df['Expected_answer']=Expected_answer
        temp_df['Top_k_context']=Top_k_context
        temp_df['llm_response']=llm_response
        temp_df['Question_type']=Question_type
        temp_df['RAG_technique']=RAG_technique
        temp_df['Chunk_size']=Chunk_size
        temp_df['Top_k']=Top_k
        temp_df['Embedding_technique']=Embedding_technique
        temp_df['Transcript']=Transcript
        temp_df['Top_k_context_list'] = Top_k_context_list

        final_df=pd.concat([final_df,temp_df], axis=0)
    final_df.to_csv(f'./{rag_technique}_results.csv',index=False)

asyncio.run(main())

Running Transcript 0
    Running Question: single_hop_question


100%|██████████| 3/3 [00:04<00:00,  1.53s/it]


    Running Question: multiple_hop_question


100%|██████████| 3/3 [00:03<00:00,  1.17s/it]


    Running Question: fact_checking_question


100%|██████████| 3/3 [00:03<00:00,  1.23s/it]


    Running Question: mcq_question


100%|██████████| 3/3 [00:03<00:00,  1.17s/it]


Running Transcript 1
    Running Question: single_hop_question


100%|██████████| 3/3 [00:03<00:00,  1.15s/it]


    Running Question: multiple_hop_question


100%|██████████| 3/3 [00:03<00:00,  1.17s/it]


    Running Question: fact_checking_question


100%|██████████| 3/3 [00:03<00:00,  1.16s/it]


    Running Question: mcq_question


100%|██████████| 3/3 [00:03<00:00,  1.14s/it]


In [54]:
rag_fusion_df=pd.read_csv('./RAG_Fusion_results.csv')
rag_fusion_df=rag_fusion_df[['llm_response']]
rag_fusion_df=rag_fusion_df.rename(columns={'llm_response': 'rag_fusion_llm_response'})

comp_noRAG_vs_basicRAG_vs_RAGfusion=pd.concat([comp_noRAG_vs_basicRAG,rag_fusion_df],axis=1)
comp_noRAG_vs_basicRAG_vs_RAGfusion

Unnamed: 0,Question,Expected_answer,llm_response,basic_rag_llm_response,rag_fusion_llm_response
0,What was the dividend declared by Kinder Morga...,The company declared a dividend of $0.2825 per...,"I'm sorry, but I don't have the specific infor...",The dividend declared by Kinder Morgan for the...,"For the fourth quarter of 2023, Kinder Morgan ..."
1,What opportunities does Kinder Morgan see for ...,Kinder Morgan sees extensive opportunities to ...,"Kinder Morgan, a prominent energy infrastructu...",Kinder Morgan sees several opportunities for e...,Kinder Morgan sees multiple opportunities for ...
2,What was the total amount spent on share repur...,"According to the context, Kinder Morgan spent ...","As of my last update in October 2023, I do not...",Kinder Morgan spent a total of $522 million on...,"In 2023, Kinder Morgan's total amount spent on..."
3,What was the leverage ratio (net debt to adjus...,B) 4.2 times,"I'm sorry, but I don't have access to real-tim...",B) 4.2 times,B) 4.2 times
4,What was one of the standout performance areas...,"According to Clay Gaspar, a standout performan...",One of the standout performance areas for Devo...,One of the standout performance areas for Devo...,One of the standout performance areas for Devo...
5,What are some of the strategic priorities that...,"According to Rick Muncrief, Devon Energy's str...","Devon Energy, as a major player in the oil and...",Some of the strategic priorities that will dri...,The strategic priorities that will drive Devon...
6,"According to the context, what was Devon Energ...","According to the context, Devon Energy deliver...","According to the information, Devon Energy's p...",Devon Energy's production growth rate in 2023 ...,"According to the context, Devon Energy's produ..."
7,"According to Clay Gaspar, which of the followi...",(Option D) All of the above,D. All of the above,D. All of the above,D. All of the above


# **Cohere Re-ranker**

This section demonstrates how to use Cohere's re-ranker to re-rank the retrieved documents for better relevance.


In [34]:
rag_technique='cohere-rerank'
execution_file_location=f'./{rag_technique}_execution-log.jsonl'
chunk_size=512 #tokens
top_k=50
embedding_techique="text-embedding-3-large"
reranked_topk = 6
max_tokens_op = 250

In [35]:
#55 transcript Q and A dataset
qa_df = pd.read_csv(csv_file)
qa_df = qa_df.head(3)

In [36]:
#Setting all embedding model and llm for inference
embed_model = OpenAIEmbedding(model=embedding_techique, dimensions=3072)
llm = OpenAI()
Settings.llm = llm
Settings.embed_model = embed_model
co = cohere.Client(cohere_api)

In [37]:
#MongoDB connection
mongo_client = get_mongo_client(connection_string)
db = mongo_client[DB_NAME]
collection = db[COLLECTION_NAME]

vector_store = MongoDBAtlasVectorSearch(mongo_client, db_name=DB_NAME, collection_name=COLLECTION_NAME, index_name=MONGO_INDEX_NAME)
index = VectorStoreIndex.from_vector_store(vector_store)

Connection to MongoDB successful


In [38]:
#Function to retrive top_k relevent chunks from mongoDB
def get_topk_context(query):
    topk_context =""
    topk_context_list = []
    reranked_topk_list = []
    reranked_topk_context = ""
    retriever = VectorIndexRetriever(
        index=index,
        similarity_top_k=top_k,
    )


    contexts = retriever.retrieve(query)

    for context in contexts:
        topk_context+=context.text
        topk_context_list.append(context.text)

    response = co.rerank(
        model="rerank-english-v3.0",
        query=query,
        documents=topk_context_list,
        top_n=reranked_topk,
    )

    for ele in json.loads(response.json())['results'][:reranked_topk]:
        reranked_topk_list.append(topk_context_list[ele['index']])

    for contxt in reranked_topk_list:
        reranked_topk_context+=contxt


    return reranked_topk_context, reranked_topk_list

In [39]:
#Function to call gpt-4 for response generation
async def llm_response_generation(query, top_k_context):
    prompt= '''Context information is below.
    ---------------------
    {context_str}
    ---------------------
    Given the context information and not prior knowledge,
    answer the query.
    Query: {query_str}
    Answer: '''.replace('{context_str}', top_k_context).replace('{query_str}', query)
    model_name = 'gpt-4o'
    data_to_log = {
                "model": model_name,
            }


    client = AsyncOpenAI()
    req = [
                {
                    "role": "system",
                    "content": prompt
                },
            ]
    response_time_start = datetime.now(timezone.utc)
    open_ai_response = await client.chat.completions.create(
                model=model_name,
                messages=req,
                max_tokens=max_tokens_op
            )
    response_time_end = datetime.now(timezone.utc)

    llm_response_content = open_ai_response.choices[0].message.content

    llm_response_tokens = open_ai_response.usage.completion_tokens
    llm_prompt_tokens = open_ai_response.usage.prompt_tokens

    data_to_log['response'] = str(open_ai_response)
    data_to_log['input_token'] = llm_prompt_tokens
    data_to_log['output_token'] = llm_response_tokens
    data_to_log['response_time_start'] = response_time_start.strftime("%Y-%m-%d %H:%M:%S")
    data_to_log['response_time_end'] = response_time_end.strftime("%Y-%m-%d %H:%M:%S")
    data_to_log['llm_response_content'] = llm_response_content

    return data_to_log , llm_response_content

In [40]:
#Fuction to run through all the questions for the dataframe
async def main():
    question_list=['single_hop_question', 'multiple_hop_question', 'fact_checking_question', 'mcq_question']
    final_df=pd.DataFrame(columns=columns)
    print(f"statring to run the scripts for {rag_technique} with top_k: {top_k}")
    for indx, row in qa_df.iterrows():
        print(f"Running Transcript {indx}")
        Question=[]
        Expected_answer=[]
        Top_k_context=[]
        llm_response=[]
        Question_type=[]
        Top_k_context_list = []

        for question in question_list:
            print(f"    Running Question: {question}")
            query=row[question]
            (top_k_context, top_k_context_list)=get_topk_context(query)
            time.sleep(6)
            (data_to_log , llm_response_content)=await llm_response_generation(query, top_k_context)

            Question.append(query)
            Expected_answer.append(row[question.replace('question','answer')])
            Top_k_context.append(top_k_context)
            Top_k_context_list.append(top_k_context_list)
            llm_response.append(llm_response_content)
            Question_type.append(question.replace('_question',''))

            log(data_to_log, execution_file_location)

        RAG_technique=[rag_technique]*4
        Chunk_size=[chunk_size]*4
        Top_k=[top_k]*4
        Embedding_technique=[embedding_techique]*4
        Transcript=[row['transcript']]*4
        temp_df=pd.DataFrame(columns=columns)
        temp_df['Question']=Question
        temp_df['Expected_answer']=Expected_answer
        temp_df['Top_k_context']=Top_k_context
        temp_df['llm_response']=llm_response
        temp_df['Question_type']=Question_type
        temp_df['RAG_technique']=RAG_technique
        temp_df['Chunk_size']=Chunk_size
        temp_df['Top_k']=Top_k
        temp_df['Embedding_technique']=Embedding_technique
        temp_df['Transcript']=Transcript
        temp_df['Top_k_context_list'] = Top_k_context_list

        final_df=pd.concat([final_df,temp_df], axis=0)

    final_df.to_csv(f'./{rag_technique}_results.csv')

asyncio.run(main())

statring to run the scripts for cohere-rerank with top_k: 50
Running Transcript 0
    Running Question: single_hop_question
    Running Question: multiple_hop_question
    Running Question: fact_checking_question
    Running Question: mcq_question
Running Transcript 1
    Running Question: single_hop_question
    Running Question: multiple_hop_question
    Running Question: fact_checking_question
    Running Question: mcq_question


In [41]:
cohere_df=pd.read_csv('./cohere-rerank_results.csv')
cohere_df=cohere_df[['llm_response']]
cohere_df=cohere_df.rename(columns={'llm_response': 'cohere_llm_response'})

all_comp=pd.concat([comp_noRAG_vs_basicRAG_vs_RAGfusion, cohere_df],axis=1)
all_comp

Unnamed: 0,Question,Expected_answer,llm_response,basic_rag_llm_response,rag_fusion_llm_response,cohere_llm_response
0,What was the dividend declared by Kinder Morga...,The company declared a dividend of $0.2825 per...,"I'm sorry, but I don't have the specific infor...",The dividend declared by Kinder Morgan for the...,"For the fourth quarter of 2023, Kinder Morgan ...",Kinder Morgan declared a dividend of $0.2825 p...
1,What opportunities does Kinder Morgan see for ...,Kinder Morgan sees extensive opportunities to ...,"Kinder Morgan, a prominent energy infrastructu...",Kinder Morgan sees several opportunities for e...,Kinder Morgan sees multiple opportunities for ...,Kinder Morgan sees several opportunities for e...
2,What was the total amount spent on share repur...,"According to the context, Kinder Morgan spent ...","As of my last update in October 2023, I do not...",Kinder Morgan spent a total of $522 million on...,Kinder Morgan spent a total of $500 million on...,"In 2023, Kinder Morgan spent $2.5 billion on s..."
3,What was the leverage ratio (net debt to adjus...,B) 4.2 times,"I'm sorry, but I don't have access to real-tim...",B) 4.2 times,B) 4.2 times,B) 4.2 times
4,What was one of the standout performance areas...,"According to Clay Gaspar, a standout performan...",One of the standout performance areas for Devo...,One of the standout performance areas for Devo...,One of the standout performance areas for Devo...,One of the standout performance areas for Devo...
5,What are some of the strategic priorities that...,"According to Rick Muncrief, Devon Energy's str...","Devon Energy, as a major player in the oil and...",Some of the strategic priorities that will dri...,Devon Energy's strategic priorities for drivin...,Devon Energy's strategic priorities for drivin...
6,"According to the context, what was Devon Energ...","According to the context, Devon Energy deliver...","According to the information, Devon Energy's p...",Devon Energy's production growth rate in 2023 ...,"According to the context, Devon Energy's produ...",Devon Energy's production growth rate in 2023 ...
7,"According to Clay Gaspar, which of the followi...",(Option D) All of the above,D. All of the above,D. All of the above,D. All of the above,D. All of the above





# **Evaluation using RAGAS Metric**

This section evaluates the performance of our models using the RAGAS metric.

In [42]:
Intermediate_results_file_list=['./no_rag_results.csv', './basic_rag_results.csv', './RAG_Fusion_results.csv', './cohere-rerank_results.csv']

In [43]:
def RAGAS_Evaluation(filepath):
  dataset_df = pd.read_csv(filepath)
  dataset_df = dataset_df.head(4)

  #output column list and output dataframe
  columns = ['question','answer','contexts','ground_truth','context_precision','context_recall','faithfulness','answer_relevancy','answer_correctness']
  call_metrics=pd.DataFrame(columns=columns)
  rag_technique=filepath.replace('_results.csv','').replace('./','')

  #Evaluation calculation
  for indx,rows in dataset_df.iterrows():
      print(indx)

      if rag_technique=='no_rag':
        data = {
            'question': [rows['Question']],
            'answer': [rows['llm_response']],
            'contexts' : [[' ']],
            'ground_truth': [rows['Expected_answer']]
        }
      else:
        data = {
            'question': [rows['Question']],
            'answer': [rows['llm_response']],
            'contexts' : [literal_eval(rows['Top_k_context_list'])],
            'ground_truth': [rows['Expected_answer']]
        }

      dataset = pd.DataFrame.from_dict(data)
      dataset = Dataset.from_pandas(dataset)

      if rag_technique == 'no_rag':
        result = evaluate(
            dataset,
            metrics=[
                answer_relevancy,
                answer_correctness
            ],
        )

      else:
        result = evaluate(
          dataset,
          metrics=[
              context_precision,
              context_recall,
              faithfulness,
              answer_relevancy,
              answer_correctness
          ],
      )

      result_df = result.to_pandas()

      call_metrics=pd.concat([call_metrics,result_df])
      modified_data=dataset_df.head(indx+1)
      dataset_df_with_metrics = pd.concat([modified_data,call_metrics.drop(['question','answer','contexts','ground_truth'],axis=1).reset_index(drop=True)],axis=1)
      dataset_df_with_metrics.to_csv(f'Final_Results/{rag_technique}_metrics_file.csv')

In [44]:
for filename in Intermediate_results_file_list:
  RAGAS_Evaluation(filename)

0


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

1


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

2


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

3


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

0


Evaluating:   0%|          | 0/5 [00:00<?, ?it/s]

1


Evaluating:   0%|          | 0/5 [00:00<?, ?it/s]

2


Evaluating:   0%|          | 0/5 [00:00<?, ?it/s]

3


Evaluating:   0%|          | 0/5 [00:00<?, ?it/s]

0


Evaluating:   0%|          | 0/5 [00:00<?, ?it/s]

1


Evaluating:   0%|          | 0/5 [00:00<?, ?it/s]

2


Evaluating:   0%|          | 0/5 [00:00<?, ?it/s]

3


Evaluating:   0%|          | 0/5 [00:00<?, ?it/s]

0


Evaluating:   0%|          | 0/5 [00:00<?, ?it/s]

1


Evaluating:   0%|          | 0/5 [00:00<?, ?it/s]

2


Evaluating:   0%|          | 0/5 [00:00<?, ?it/s]

3


Evaluating:   0%|          | 0/5 [00:00<?, ?it/s]

In [45]:
Final_results_file_list=['./Final_Results/no_rag_metrics_file.csv', './Final_Results/basic_rag_metrics_file.csv', './Final_Results/RAG_Fusion_metrics_file.csv', './Final_Results/cohere-rerank_metrics_file.csv']
columns=['RAG_techniques', 'Avg_context_precision', 'Avg_context_recall', 'Avg_faithfulness', 'Avg_answer_relevancy', 'Avg_answer_correctness']
final_result_df=pd.DataFrame(columns=columns)

results_list = []
def results(filepath):
  final=pd.read_csv(filepath)
  final = final.fillna(0)
  rag_technique=filepath.replace('_metrics_file.csv','').replace('./','').replace('Final_Results/','')
  basic_rag_context_precision=final['context_precision'].mean()
  basic_rag_context_recall=final['context_recall'].mean()
  basic_rag_faithfulness=final['faithfulness'].mean()
  basic_rag_answer_relevancy=final['answer_relevancy'].mean()
  basic_rag_answer_correctness=final['answer_correctness'].mean()

  row_dict={'RAG_techniques': rag_technique, 'Avg_context_precision': basic_rag_context_precision, 'Avg_context_recall': basic_rag_context_recall, 'Avg_faithfulness': basic_rag_faithfulness, 'Avg_answer_relevancy': basic_rag_answer_relevancy, 'Avg_answer_correctness':basic_rag_answer_correctness}
  return row_dict

for final_result_file in Final_results_file_list:
  result=results(final_result_file)
  results_list.append(result)

final_result_df = pd.DataFrame(results_list)
final_result_df

Unnamed: 0,RAG_techniques,Avg_context_precision,Avg_context_recall,Avg_faithfulness,Avg_answer_relevancy,Avg_answer_correctness
0,no_rag,0.0,0.0,0.0,0.248901,0.529041
1,basic_rag,1.0,1.0,0.569444,0.931811,0.866764
2,RAG_Fusion,0.963542,1.0,0.75,0.934886,0.736549
3,cohere-rerank,0.914583,1.0,0.5,0.966204,0.741454


# **Result Analysis**

In this section, we analyze the results obtained from our evaluations and draw conclusions.