In [None]:
from helper import load_mistral_api_key, get_text_embedding
import numpy as np

api_key = load_mistral_api_key(ret_key=True)

In [None]:
# scrap the content of a page
from bs4 import BeautifulSoup
import requests
import re

response = requests.get(
    "https://www.deeplearning.ai/the-batch/a-roadmap-explores-how-ai-can-detect-and-mitigate-greenhouse-gases/"
)
html_doc = response.text
soup = BeautifulSoup(html_doc, "html.parser")
tag = soup.find("div", re.compile("^prose--styled"))
text = tag.text
print(text)

In [None]:
# (optional) save the the content in a text file
file_name = "AI_greenhouse_gas.txt"
with open(file_name, 'w') as file:
    file.write(text)

In [None]:
# Step 1 : chunk the text into 512 characters chunks
chunk_size = 512
chunks = [text[i: i + chunk_size] for i in range(0, len(text), chunk_size)]
print(f"we have {len(chunks)} chunks of 512 characters")

In [None]:
# Step 2 : embed the chunks using mistral embedding API
text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])

In [None]:
embedding_dim = len(text_embeddings[0])
embedding_dim

In [None]:
# Step 3 : store the embeddings in a vector database (using Faiss)
!pip install faiss-cpu
import faiss

embedding_dim = text_embeddings.shape[1]
index = faiss.IndexFlatL2(embedding_dim)  # build the index
index.add(text_embeddings)  # add the embeddings to the index

In [None]:
# Step 3 : Embed the query using the same embedding model (mistral embedding)
question = "What are the ways that AI can reduce emissions in Agriculture?"
question_embeddings = np.array([get_text_embedding(question)])
question_embeddings

In [None]:
# Step 4 : Search the most similar chunks to the query
# D : distance , I : index of the k most similar chunks to the query vector
D, I = index.search(question_embeddings, k=2)
print(f"The 2 most similar chunks to the query are :", I)
print(f"The distances between the query and the 2 most similar chunks are : ", D)

In [None]:
# Step 5 : Retrieve the most similar chunks using their index
retrieved_chunk = [chunks[i] for i in I.tolist()[0]]
print(retrieved_chunk)

In [None]:
# Step 6 : Pass the retrieved chunks in the prompt to get the answer to the question
from helper import mistral

prompt = f"""
Context information is below.
---------------------
{retrieved_chunk}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {question}
Answer:
"""

response = mistral(prompt)
print(response)

# RAG with function calling 

In [None]:
# RAG complete basic method (split into chunk, embed, load into vector database, retrieve similar chunks, generate response)
def qa_with_context(text, question, chunk_size=512):
    # split document into chunks
    chunks = [text[i: i + chunk_size] for i in range(0, len(text), chunk_size)]
    # load into a vector database
    text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])
    d = text_embeddings.shape[1]
    index = faiss.IndexFlatL2(d)
    index.add(text_embeddings)
    # create embeddings for a question
    question_embeddings = np.array([get_text_embedding(question)])
    # retrieve similar chunks from the vector database
    D, I = index.search(question_embeddings, k=2)
    retrieved_chunk = [chunks[i] for i in I.tolist()[0]]
    # generate response based on the retrieve relevant text chunks

    prompt = f"""
    Context information is below.
    ---------------------
    {retrieved_chunk}
    ---------------------
    Given the context information and not prior knowledge, answer the query.
    Query: {question}
    Answer:
    """
    response = mistral(prompt)
    return response

In [None]:
# Define QA with context as a tool
import functools

names_to_functions = {
    "qa_with_context": functools.partial(qa_with_context, text=text)
}

qa_with_context_tool = {
    "type": "function",
    "function": {
            "name": "qa_with_context",
            "description": "Answer user question by retrieving relevant context",
            "parameters": {
                "type": "object",
                "properties": {
                    "question": {
                        "type": "string",
                        "description": "user question",
                    }
                },
                "required": ["question"],
            },
    },
}

tools = [
    qa_with_context_tool
]

In [None]:
# Initialize the chat with the new tool

from mistralai.client import MistralClient
from mistralai.models.chat_completion import ChatMessage
import os

question = """
What are the ways AI can mitigate climate change in transportation?
"""

client = MistralClient(api_key=os.getenv("MISTRAL_API_KEY"))
print(client)


response = client.chat(
    model="mistral-small-latest",
    messages=[ChatMessage(role="user", content=question)],
    tools=tools,
    tool_choice="any"
)

response

In [None]:
# Get the tool function called and the arguments to call the function with
import json

tool_function = response.choices[0].message.tool_calls[0].function
print("tool function object: ", tool_function)
print
tool_function_name = tool_function.name
tool_function_arguments = json.loads(tool_function.arguments)
print(f"Tool function called: {tool_function_name}")
print(f"Tool function arguments : {tool_function_arguments}")

In [None]:
# Call the tool function with the arguments to get the response
function_result = names_to_functions[tool_function_name](
    **tool_function_arguments)
function_result