## RAG

In [1]:
from vertexai.preview import rag
from vertexai.preview.generative_models import GenerativeModel, Tool
import vertexai
from sklearn.metrics import precision_score, recall_score, f1_score
import time

In [2]:
PROJECT_ID = "projet-ia-448520" # Trouvé dans la selection de projet
display_name = "test_corpus"
paths = ["gs://cours5bucket"] #Path vers le bucket
# paths = ["https://drive.google.com/file/d/123", "gs://my_bucket/my_files_dir"]  # Supports Google Cloud Storage and Google Drive Links

In [3]:
# Initialize Vertex AI API once per session
# Vertex AI RAG Engine ne fonctionne que us-central1 ou europe-west3
vertexai.init(project=PROJECT_ID, location="europe-west3")

In [4]:
# Create RagCorpus
# Configure embedding model, for example "text-embedding-004".
embedding_model_config = rag.EmbeddingModelConfig(
    publisher_model="publishers/google/models/text-embedding-004"
)

rag_corpus = rag.create_corpus(
    display_name=display_name,
    embedding_model_config=embedding_model_config,
)

In [5]:
# Import Files to the RagCorpus
response = rag.import_files(
    rag_corpus.name,
    paths,
    chunk_size=512,  # Optional
    chunk_overlap=100,  # Optional
    max_embedding_requests_per_min=900,  # Optional
)


In [6]:
# Direct context retrieval
response = rag.retrieval_query(
    rag_resources=[
        rag.RagResource(
            rag_corpus=rag_corpus.name,
            # Optional: supply IDs from `rag.list_files()`.
            # rag_file_ids=["rag-file-1", "rag-file-2", ...],
        )
    ],
    text="What IA models are the most efficient ?",
    similarity_top_k=5,  # Optional
    vector_distance_threshold=0.5,  # Optional
)
print(response)

contexts {
  contexts {
    source_uri: "gs://cours5bucket/deepseek-r1 paper.txt"
    text: "Since accessing the OpenAI-o1-1217 API is challenging in mainland China, we report its perfor-\nmance based on official reports. For distilled models, we also compare the open-source model\nQwQ-32B-Preview (Qwen, 2024a).\nEvaluation Setup We set the maximum generation length to 32,768 tokens for the models.\nWe found that using greedy decoding to evaluate long-output reasoning models results in\nhigher repetition rates and significant variability across different checkpoints. Therefore, we\ndefault to pass@ \360\235\221\230evaluation (Chen et al., 2021) and report pass@1 using a non-zero temperature.\nSpecifically, we use a sampling temperature of 0.6and a top- \360\235\221\235value of 0.95 to generate \360\235\221\230\nresponses (typically between 4and 64, depending on the test set size) for each question. Pass@1\nis then calculated as\npass@1 =1\n\360\235\221\230\360\235\221\230\342\210\221\3

In [7]:
# Enhance generation
# Create a RAG retrieval tool
rag_retrieval_tool = Tool.from_retrieval(
    retrieval=rag.Retrieval(
        source=rag.VertexRagStore(
            rag_resources=[
                rag.RagResource(
                    rag_corpus=rag_corpus.name,  # Currently only 1 corpus is allowed.
                    # Optional: supply IDs from `rag.list_files()`.
                    # rag_file_ids=["rag-file-1", "rag-file-2", ...],
                )
            ],
            similarity_top_k=3,  # Optional
            vector_distance_threshold=0.5,  # Optional
        ),
    )
)

In [8]:
# Create a gemini-pro model instance
rag_model = GenerativeModel(
    model_name="gemini-1.5-flash-001", tools=[rag_retrieval_tool]
)

In [9]:
# Generate response
response = rag_model.generate_content("What IA models are the most efficient ?")
print(response.text)

The paper "DeepSeek-R1" highlights the efficiency of distilled models, particularly DeepSeek-R1-7B. This model outperforms non-reasoning models like GPT-4o-0513. Further distillation of larger models, such as DeepSeek-R1-14B, DeepSeek-R1-32B, and DeepSeek-R1-70B, leads to significant improvements on various benchmarks. The study emphasizes that while reinforcement learning can be applied to distilled models to improve their performance, even simple SFT-distilled models show promising results. 



## EVALUATION

### Evaluation retriever

In [10]:
!pip install ragas langchain_google_vertexai



In [11]:
import ragas
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
    answer_similarity,
    answer_correctness
)

from ragas.evaluation import evaluate
from datasets import Dataset
from langchain_google_vertexai import ChatVertexAI, VertexAIEmbeddings
# from ragas.llms import LangchainLLMWrapper

In [12]:
metrics_eval = [answer_relevancy, 
        faithfulness, 
        context_recall,       
        context_precision,
        answer_correctness]

#### Ragas a besoin d'un dataset avec quatre champs : "question", "ground_truth", "answer", "contexts"

In [13]:
dict_dataset_eval = {
    "question" : ["What are the models launched by Deepseek ?", "How many Core Contributors ?", "Concerning only MMLU (Pass@1), what are the performance of OpenAI-o1-1217?",],
    "ground_truth" : ["Deepseek-r1-Zero, Deepseek-r1", "18", "The performance of OpenAI-o1-1217 on MMLU (Pass@1) is 91.8."],
    "answer" : [],
    "contexts" : []
}

In [14]:
def filling_datatset(dict_dataset_eval):
    
    for question in dict_dataset_eval["question"]:
        
        response_retriever = rag.retrieval_query(
            rag_resources=[
                rag.RagResource(
                    rag_corpus=rag_corpus.name,
                    # Optional: supply IDs from `rag.list_files()`.
                    # rag_file_ids=["rag-file-1", "rag-file-2", ...],
                )
            ],
            text=question,
            similarity_top_k=5,  # Optional
            vector_distance_threshold=0.5,  # Optional
        )
        
        retrieved_contexts = [context.text for context in response_retriever.contexts.contexts]
        
        dict_dataset_eval["contexts"].append(retrieved_contexts)
        
        response_generator = rag_model.generate_content(question)
        
        dict_dataset_eval["answer"].append(response_generator.text)        
        

In [15]:
filling_datatset(dict_dataset_eval)

In [16]:
dataset_eval = Dataset.from_dict(dict_dataset_eval)

In [17]:
vertextai_llm = ChatVertexAI(
    model_name="gemini-1.5-flash-001"
)

In [18]:
vertextai_embeddings = VertexAIEmbeddings(
    model_name="textembedding-gecko"
)

In [19]:
ragas_eval_with_gt = evaluate(dataset_eval, metrics_eval, llm=vertextai_llm, embeddings=vertextai_embeddings)

Evaluating:   0%|          | 0/15 [00:00<?, ?it/s]

Retrying langchain_google_vertexai.chat_models._acompletion_with_retry.<locals>._completion_with_retry_inner in 4.0 seconds as it raised ResourceExhausted: 429 Quota exceeded for aiplatform.googleapis.com/generate_content_requests_per_minute_per_project_per_base_model with base model: gemini-1.5-flash. Please submit a quota increase request. https://cloud.google.com/vertex-ai/docs/generative-ai/quotas-genai..
Retrying langchain_google_vertexai.chat_models._acompletion_with_retry.<locals>._completion_with_retry_inner in 4.0 seconds as it raised ResourceExhausted: 429 Quota exceeded for aiplatform.googleapis.com/generate_content_requests_per_minute_per_project_per_base_model with base model: gemini-1.5-flash. Please submit a quota increase request. https://cloud.google.com/vertex-ai/docs/generative-ai/quotas-genai..
Retrying langchain_google_vertexai.chat_models._acompletion_with_retry.<locals>._completion_with_retry_inner in 4.0 seconds as it raised ResourceExhausted: 429 Quota exceeded

In [20]:
ragas_eval_with_gt

{'answer_relevancy': 0.9117, 'faithfulness': 1.0000, 'context_recall': 1.0000, 'context_precision': 0.6222, 'answer_correctness': 0.5135}