# Comparaison between RAG

In this notebook, I will use the testset with different RAG solution and compare them.

# Setup

Install additionnal library

In [1]:
%pip install giskard[llm]==2.12.0
%pip install mistralai


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


Import Library

In [2]:
import giskard
import pandas as pd
import importlib


Get API

In [3]:
# Import the required modules
import os
from dotenv import load_dotenv, find_dotenv

# Load environment variables from .env file
# If the .env file is not found, raise an error
load_dotenv(find_dotenv(raise_error_if_not_found=True))

True

In [4]:
# Retrieve the OPENAI_API_KEY from the environment variables
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# Check if the OPENAI_API_KEY is defined
# If not, raise an EnvironmentError with a descriptive message
if not OPENAI_API_KEY:
    raise EnvironmentError("OPENAI_API_KEY not defined in the .env file")
# If the OPENAI_API_KEY is defined, print a success message
print("OPENAI_API_KEY successfully loaded.")

OPENAI_API_KEY successfully loaded.


In [5]:
# Retrieve the MISTRAL_API_KEY from the environment variables
MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
# Check if the MISTRAL_API_KEY is defined
# If not, raise an EnvironmentError with a descriptive message
if not MISTRAL_API_KEY:
    raise EnvironmentError("MISTRAL_API_KEY not defined in the .env file")

# If the MISTRAL_API_KEY is defined, print a success message
print("MISTRAL_API_KEY successfully loaded.")

MISTRAL_API_KEY successfully loaded.


Setup Mistral API key as default 

In [6]:
import os
from giskard.llm.client.mistral import MistralClient

mc = MistralClient()
giskard.llm.set_default_client(mc)

Import Get RAG to test

In [7]:
import sys
print(sys.executable)
sys.path.append('../RAG_Basic')
sys.path.append('../RAG_Guillaume')

from Call_RAG import ask,ask_Llama3
import Call_RAG_advanced


/home/guillaume/.cache/pypoetry/virtualenvs/forestbot-QnTDFiWY-py3.10/bin/python


# Test Simple basic RAG 

## Wrap RAG model and create giskard model

In [8]:
def model_predict_basic(df: pd.DataFrame):
    """Wraps the LLM call in a simple Python function.

    The function takes a pandas.DataFrame containing the input variables needed
    by your model, and must return a list of the outputs (one for each row).
    """
    return [ask(question) for question in df["question"]]


giskard_model_basic = giskard.Model(
    model=model_predict_basic,
    model_type="text_generation",
    name="Question Answering on Forest Protection and Conservation Areas",
    description="This model provides comprehensive answers to questions related to forest protection and conservation areas in Central Africa.",
    feature_names=["question"],
)

2024-06-11 15:31:09,427 pid:239260 MainThread giskard.models.automodel INFO     Your 'prediction_function' is successfully wrapped by Giskard's 'PredictionFunctionModel' wrapper class.


In [9]:
# Just to test if model works. Useless in normal test mode
# examples = [
#     "Combien de pays son couvert par l'OFAC ? ",
#     "Le Cameroun est-il a jour de ses engagements ?",
# ]
# giskard_dataset = giskard.Dataset(pd.DataFrame({"question": examples}), target=None)
# print(giskard_model_basic.predict(giskard_dataset).prediction)

## General LLM test

In [None]:
#scan_results_basic = giskard.scan(giskard_model_basic)
scan_results_basic = giskard.scan(giskard_model_basic)
#Price Mistral ~0,5 USD, 100.000 token

In [None]:
scan_results_basic.to_html("model_scan_results_basic.html")
test_suite = scan_results_basic.generate_test_suite("My first test suite")
display(scan_results_basic)

## Test RAG Testset

### Import Knowledge base

In [10]:
from giskard.rag import KnowledgeBase
knowledge_base_df = pd.read_csv('knowledge_base.csv')
# Convertir toutes les valeurs de la colonne "text" en chaînes de caractères
knowledge_base_df['text'] = knowledge_base_df['text'].astype(str)

# Créer l'objet KnowledgeBase
knowledge_base = KnowledgeBase(knowledge_base_df)

### Test Reduced TestSet

In [None]:
#Test avec un reduce testset pour voir si ça marche
from giskard.rag import QATestset
#loaded_reduce_testset = QATestset.load("reduce_testset.jsonl")

from giskard.rag import evaluate, RAGReport
from giskard.rag.metrics.ragas_metrics import ragas_context_recall, ragas_context_precision

#reportBasicRAG_reduceset = evaluate(ask,
#                testset=loaded_reduce_testset,
#                knowledge_base=knowledge_base,
#                metrics=[ragas_context_precision])

In [20]:
display(reportBasicRAG_reduceset.to_html(embed=True))

2024-06-04 11:35:40,752 pid:30861 MainThread giskard.rag  INFO     Finding topics in the knowledge base.
2024-06-04 11:35:53,135 pid:30861 MainThread giskard.rag  INFO     Found 3 topics in the knowledge base.






### Test Normal testset (40 Q°)

In [12]:
from giskard.rag import QATestset
loaded_testset = QATestset.load("testset.jsonl")

In [16]:
from giskard.rag import evaluate, RAGReport
from giskard.rag.metrics.ragas_metrics import ragas_context_recall, ragas_context_precision

reportBasicRAG = evaluate(ask,
                testset=loaded_testset,
                knowledge_base=knowledge_base,
                metrics=[ragas_context_precision])

Asking questions to the agent:   0%|          | 0/40 [00:00<?, ?it/s]

correctness evaluation:   0%|          | 0/40 [00:00<?, ?it/s]

RAGAS Context Precision evaluation:   0%|          | 0/40 [00:00<?, ?it/s]

In [17]:
display(reportBasicRAG.to_html(embed=True))

2024-06-11 17:20:36,908 pid:239260 MainThread giskard.rag  INFO     Finding topics in the knowledge base.
2024-06-11 17:20:47,256 pid:239260 MainThread giskard.rag  INFO     Found 3 topics in the knowledge base.




# TEST Advanced RAG

## Wrap RAG model and create giskard model

In [18]:
importlib.reload(Call_RAG_advanced)
from Call_RAG_advanced import ask_advanced

def model_predict_advanced(df: pd.DataFrame):
    """Wraps the LLM call in a simple Python function.

    The function takes a pandas.DataFrame containing the input variables needed
    by your model, and must return a list of the outputs (one for each row).
    """
    return [ask_advanced(question) for question in df["question"]]


giskard_model_advanced = giskard.Model(
    model=model_predict_advanced,
    model_type="text_generation",
    name="Question Answering on Forest Protection and Conservation Areas",
    description="This model provides comprehensive answers to questions related to forest protection and conservation areas in Central Africa.",
    feature_names=["question"],
)

2024-06-11 17:21:00,188 pid:239260 MainThread giskard.models.automodel INFO     Your 'prediction_function' is successfully wrapped by Giskard's 'PredictionFunctionModel' wrapper class.


In [13]:
# Just to test if model works. Useless in normal test mode
examples = [
     "Combien de pays son couvert par l'OFAC ? ",
     "Le Cameroun est-il a jour de ses engagements ?",
]
giskard_dataset = giskard.Dataset(pd.DataFrame({"question": examples}), target=None)
print(giskard_model_advanced.predict(giskard_dataset).prediction)

2024-06-11 13:07:30,266 pid:141230 MainThread giskard.datasets.base INFO     Your 'pandas.DataFrame' is successfully wrapped by Giskard's 'Dataset' wrapper class.
2024-06-11 13:07:30,278 pid:141230 MainThread giskard.datasets.base INFO     Casting dataframe columns from {'question': 'object'} to {'question': 'object'}
V1
V1
2024-06-11 13:07:45,209 pid:141230 MainThread giskard.utils.logging_utils INFO     Predicted dataset with shape (2, 1) executed in 0:00:14.940609
["L'OFAC (Observatoire des Forêts d'Afrique Centrale) couvre 10 pays de l'Afrique centrale, à savoir : le Cameroun, la République centrafricaine, le Congo, le Gabon, la Guinée équatoriale, le Rwanda, Sao Tomé-et-Principe, le Tchad, et la République démocratique du Congo."
 'According to the context, it seems that the Cameroun has made some efforts to fulfill its international commitments, but it still needs to make more progress. The text mentions that the Cameroun has produced a National Action Plan for Combating Desertif

## General LLM test

In [17]:
scan_results_advanced = giskard.scan(giskard_model_advanced)

2024-06-11 13:37:41,229 pid:141230 MainThread httpx        INFO     HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
🔎 Running scan…
Estimated calls to your model: ~365
Estimated LLM calls for evaluation: 148

2024-06-11 13:37:43,118 pid:141230 MainThread giskard.scanner.logger INFO     Running detectors: ['LLMBasicSycophancyDetector', 'LLMCharsInjectionDetector', 'LLMHarmfulContentDetector', 'LLMImplausibleOutputDetector', 'LLMInformationDisclosureDetector', 'LLMOutputFormattingDetector', 'LLMPromptInjectionDetector', 'LLMStereotypesDetector', 'LLMFaithfulnessDetector']
Running detector LLMBasicSycophancyDetector…
2024-06-11 13:37:55,889 pid:141230 MainThread httpx        INFO     HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2024-06-11 13:37:55,912 pid:141230 MainThread giskard.datasets.base INFO     Casting dataframe columns from {'question': 'object'} to {'question': 'object'}
V1
Model used: gpt-3.5-turbo
2024-06-11 1



In [19]:
scan_results_advanced.to_html("model_scan_results_advanced.html")
test_suite_advanced = scan_results_advanced.generate_test_suite("My second test suite, for advance")
display(scan_results_advanced)



## Test normal testset (40 Q)

In [19]:
from giskard.rag import QATestset
loaded_testset = QATestset.load("testset.jsonl")

In [20]:
from giskard.rag import evaluate, RAGReport
from giskard.rag.metrics.ragas_metrics import ragas_context_recall, ragas_context_precision

reportAdvancedRAG = evaluate(ask_advanced,
                testset=loaded_testset,
                knowledge_base=knowledge_base,
                metrics=[ragas_context_precision])

Asking questions to the agent:   0%|          | 0/40 [00:00<?, ?it/s]

V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used: gpt-3.5-turbo
V1
Model used:

correctness evaluation:   0%|          | 0/40 [00:00<?, ?it/s]

RAGAS Context Precision evaluation:   0%|          | 0/40 [00:00<?, ?it/s]

In [21]:
display(reportAdvancedRAG.to_html(embed=True))

