In [2]:
# import models
import openai
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

from llama_index.llms.anthropic import Anthropic

from llama_index.llms.cohere import Cohere
from llama_index.embeddings.cohere import CohereEmbedding

from llama_index.llms.mistralai import MistralAI
from llama_index.embeddings.mistralai import MistralAIEmbedding

from llama_index.llms.gemini import Gemini
from llama_index.embeddings.gemini import GeminiEmbedding

import pandas as pd

from llama_index.core import VectorStoreIndex, PromptTemplate, ServiceContext

from llama_index.core.indices.query.query_transform import HyDEQueryTransform
from llama_index.core.query_engine import TransformQueryEngine, RetrieverQueryEngine
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core.postprocessor import LLMRerank
from llama_index.core.retrievers import QueryFusionRetriever

from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
)

In [3]:
import os
from getpass import getpass

# OpenAI
os.environ["OPENAI_API_KEY"] = 'OPENAI_API_KEY'

# Cohere
API_KEY_COHERE = 'API_KEY_COHERE'
# Antrophic
API_KEY_ANTROPHIC = "API_KEY_ANTROPHIC"
# Google
API_KEY_GOOGLE = 'API_KEY_GOOGLE'


In [None]:
input_dir_path = '../context_files'

loader = SimpleDirectoryReader(
            input_dir = input_dir_path,
            required_exts=[".pdf"],
            recursive=True
        )
documents = loader.load_data()

In [5]:
# Service Context
from llama_index.core import Settings

# OpenAI - GPT 4
llm = OpenAI(model="gpt-4-turbo")
embed_model = OpenAIEmbedding(model="text-embedding-3-large")

# OpenAI - GPT 4o
llm = OpenAI(model="gpt-4o")
embed_model = OpenAIEmbedding(model="text-embedding-3-large")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-large")

# Antrophic - Claude 3 opus
llm = Anthropic(model="claude-3-opus-20240229", api_key=API_KEY_ANTROPHIC)
embed_model = OpenAIEmbedding(model="text-embedding-3-large")

# Cohere - Command R+
llm = Cohere(api_key=API_KEY_COHERE, model="command-r-plus")
embed_model = CohereEmbedding(cohere_api_key=API_KEY_COHERE, model_name="embed-english-v3.0", input_type="search_query",)

# Google - Gemini 1.5
llm = Gemini(api_key=API_KEY_GOOGLE, model="models/gemini-1.5-pro-latest")
embed_model = GeminiEmbedding(model_name="models/text-embedding-004")

Settings.llm = llm
Settings.embed_model = embed_model

In [None]:
# Create VectorStore

index = VectorStoreIndex.from_documents(documents, show_progress=True)

# Prompt Template

In [None]:
text_qa_template_str = (
    """  
    You are an expert in Nuclear Medicine that is trusted around the world for your factual accuracy.
    If the context isn't helpful, you can also answer the question on your pretrained knowledge

    Some rules to follow:
    1. Directly reference the given context according to Havard citation style in your answer. Do not include the path where the document is saved.
    2. Avoid statements like 'Based on the context, ...' or 'The context information ...' or anything along those lines.
    3. Focus on succinct answers that provide only the facts necessary, do not be verbose.
    4. Answer the question in the language of the question (if the question is in German, answer in German. If the question is in English answer in English)
    ---------------------
    {context_str}
    ---------------------
    Given the context information and not prior knowledge, answer the query.
    Query: {query_str}
    Answer: """) 
text_qa_template = PromptTemplate(text_qa_template_str)

# Naive RAG


In [8]:
# Naive RAG
query_engine_naive = index.as_query_engine(llm = llm, text_qa_template=text_qa_template)

In [10]:
def get_answers(dataframe, answer_list, context_list, llm):
    for i in range(len(dataframe['Question'])):
        print(dataframe['Question'][i])
        user_prompt = dataframe['Question'][i]
        try:
            response = query_engine_naive.query(user_prompt)
        except:
            pass
        # Get the context from the Llama index
        context = [x.text for x in response.source_nodes]

        answer_list.append(response)
        context_list.append(context)
        # dataframe.loc[i, 'context'] = context

        dataframe.loc[i, 'LLM'] = llm

        # adding naive RAG to every questionId
        question_id = dataframe.loc[i, 'QuestionId']
        new_question_id = str(question_id) + ' naive RAG'
        

        dataframe.loc[i, 'QuestionId'] = new_question_id

        print(response)
        print(i)

## GPT 4 - Naive RAG


In [61]:
gpt_4_context_naiveRAG_question_df = pd.read_csv("../Evaluation-Questions.csv")

In [62]:
gpt_answers = []
gpt_context = []

In [63]:
# create context column
gpt_4_context_naiveRAG_question_df['context'] = pd.NA

In [None]:
get_answers(gpt_4_context_naiveRAG_question_df, gpt_answers, gpt_context, 'gpt-4-turbo naive RAG')

In [20]:
# update dataframe
gpt_4_context_naiveRAG_question_df['answer'] = pd.DataFrame({'answers': gpt_answers})
gpt_4_context_naiveRAG_question_df['context'] = pd.DataFrame({'context': gpt_context})

In [21]:
filename = './csv-naiveRAG/GPT_4_context_naiveRAG_EvaluationQuestions.csv'
gpt_4_context_naiveRAG_question_df.to_csv(filename, sep=',', index=False, encoding='utf-8')

## GPT-4o - Naive RAG

In [11]:
gpt_4o_context_naiveRAG_question_df = pd.read_csv("../Evaluation-Questions.csv")

In [None]:
gpto_answers = []
gpto_context = []
get_answers(gpt_4o_context_naiveRAG_question_df, gpto_answers, gpto_context, 'gpt-4o naive RAG')

In [13]:
# update dataframe
gpt_4o_context_naiveRAG_question_df['answer'] = pd.DataFrame({'answers': gpto_answers})
gpt_4o_context_naiveRAG_question_df['context'] = pd.DataFrame({'context': gpto_context})

filename = './csv-naiveRAG/GPT_4o_context_naiveRAG_EvaluationQuestions.csv'
gpt_4o_context_naiveRAG_question_df.to_csv(filename, sep=',', index=False, encoding='utf-8')

## Claude 3 opus - Naive RAG


In [51]:
claude_opus_context_naiveRAG_question_df = pd.read_csv("../Evaluation-Questions.csv")

In [52]:
claude_answers = []
claude_context = []

In [None]:
get_answers(claude_opus_context_naiveRAG_question_df, claude_answers, claude_context, 'claude-3-opus-20240229 naive RAG')

In [54]:
# create context column
claude_opus_context_naiveRAG_question_df['context'] = pd.NA

In [55]:
# update dataframe
claude_opus_context_naiveRAG_question_df['answer'] = pd.DataFrame({'answers': claude_answers})
claude_opus_context_naiveRAG_question_df['context'] = pd.DataFrame({'context': claude_context})

In [56]:
filename = './csv-naiveRAG/Claude-3-opus_context_naiveRAG_EvaluationQuestions.csv'
claude_opus_context_naiveRAG_question_df.to_csv(filename, sep=',', index=False, encoding='utf-8')

## Command R+ - Naive RAG


In [9]:
command_r_plus_context_naiveRAG_question_df = pd.read_csv("../Evaluation-Questions.csv")

In [10]:
command_answers = []
command_context = []

In [None]:
get_answers(command_r_plus_context_naiveRAG_question_df, command_answers, command_context, 'command-r-plus naive RAG')

In [12]:
# create context column
command_r_plus_context_naiveRAG_question_df['context'] = pd.NA

In [13]:
# update dataframe
command_r_plus_context_naiveRAG_question_df['answer'] = pd.DataFrame({'answers': command_answers})
command_r_plus_context_naiveRAG_question_df['context'] = pd.DataFrame({'context': command_context})

In [14]:
filename = './csv-naiveRAG/Command_r_plus_context_naiveRAG_EvaluationQuestions.csv'
command_r_plus_context_naiveRAG_question_df.to_csv(filename, sep=',', index=False, encoding='utf-8')

## Gemini - Naive RAG


In [31]:
gemini_1_5_context_naiveRAG_question_df = pd.read_csv("../Evaluation-Questions.csv")

In [150]:
gemini_answers = []

In [32]:
gemini1_5_answers = []
gemini1_5_context = []

In [None]:
get_answers(gemini_1_5_context_naiveRAG_question_df, gemini1_5_answers, gemini1_5_context, 'gemini-1.5-pro-latest')

In [34]:
# create context column
gemini_1_5_context_naiveRAG_question_df['context'] = pd.NA

In [35]:
# update dataframe
gemini_1_5_context_naiveRAG_question_df['answer'] = pd.DataFrame({'answers': gemini1_5_answers})
gemini_1_5_context_naiveRAG_question_df['context'] = pd.DataFrame({'context': gemini1_5_context})

In [36]:
filename = './csv-naiveRAG/Gemini-1-5_context_naiveRAG_EvaluationQuestions.csv'
gemini_1_5_context_naiveRAG_question_df.to_csv(filename, sep=',', index=False, encoding='utf-8')