# Multilingual RAG with Azure AI search

In [15]:
# Import required libraries
import os
import json
from dotenv import load_dotenv

from tenacity import retry, wait_random_exponential, stop_after_attempt
from openai import AzureOpenAI
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    SimpleField,
    SearchFieldDataType,
    SearchableField,
    SearchField,
    VectorSearch,
    HnswAlgorithmConfiguration,
    VectorSearchProfile,
    SemanticConfiguration,
    SemanticPrioritizedFields,
    SemanticField,
    SemanticSearch,
    SearchIndex,
    AzureOpenAIVectorizer,
    AzureOpenAIParameters
)


from azure.identity import DefaultAzureCredential, get_bearer_token_provider
import json

load_dotenv()

True

In [16]:
# Configure environment variables
service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
index_name = os.getenv("AZURE_SEARCH_INDEX_NAME")
key = os.getenv("AZURE_SEARCH_ADMIN_KEY")

AZURE_OPENAI_EMBEDDINGS_ADA_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_EMBEDDINGS_ADA_DEPLOYMENT_NAME")
OPENAI_DEPLOYMENT_ENDPOINT = os.getenv("OPENAI_DEPLOYMENT_ENDPOINT")
AZURE_OPENAI_GPT4_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_GPT4_DEPLOYMENT_NAME")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION")
azure_openai_embedding_dimensions = 1536
index_name = "product_data_csv"

credential = AzureKeyCredential(key)

In [17]:
# Configure OpenAI API
aoai_client = AzureOpenAI(
  azure_endpoint = OPENAI_DEPLOYMENT_ENDPOINT, 
  api_key=OPENAI_API_KEY,  
  api_version=AZURE_OPENAI_API_VERSION
)
credential = AzureKeyCredential(key)

In [81]:
from azure.search.documents.models import VectorizedQuery

search_client = SearchClient(endpoint=service_endpoint, index_name=index_name, credential=credential)

# Generate Document Embeddings using OpenAI Ada Model
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
# Function to generate embeddings for title and content fields, also used for query embeddings
def calc_embeddings(text):
    # model = "deployment_name"
    embeddings = aoai_client.embeddings.create(input = [text], model=AZURE_OPENAI_EMBEDDINGS_ADA_DEPLOYMENT_NAME).data[0].embedding
    return embeddings

def do_search(query, fields):
    embedding = calc_embeddings(query)
    vector_query = VectorizedQuery(vector=embedding, k_nearest_neighbors=3, fields=fields)
  
    results = search_client.search(  
        search_text=None,  
        vector_queries= [vector_query],
        select=["name", "description"],
    )  
    str_result = ""
    for result in results:  
        print(f"Score: {result['@search.score']}")  
        print(f"Name: {result['name']}")  
        print(f"Description: {result['description']}\n")  

        str_result += f" {result['name']}/n"

    return str_result

In [93]:
def call_openAI(question, answers):
    grounded_prompt="""
    You are a friendly assistant answering users questions.
    Answer the query using only the answers provided below in the friendly and concise bulleted manner.
    Answer ONLY with the facts listed in the list of answers below.
    If there isn't enough information below, say "you don't know" .

    Query: {question}
    answers:\n{answers}
    """
    # prepare prompt
    messages=[
        {
            "role": "user",
            "content": grounded_prompt.format(question=question, answers=answers)
        }
    ]
    response = aoai_client.chat.completions.create(
        model=AZURE_OPENAI_GPT4_DEPLOYMENT_NAME,
        messages = messages,
        temperature=0.7,
        max_tokens=800,
        top_p=0.95,
        frequency_penalty=0,
        presence_penalty=0,
        stop=None
    )

    return response.choices[0].message.content

In [94]:
def detect_and_translate(text):
    system_message = """You are a helpful assistant that translates text from any language to english. 
        If the text is already in English, return a json object as follows:
        {"language": "english", "translation": "text"}
        Answer in a clear and concise manner only translating the text in a JSON object as follows:
        {"language": "language_detected", "translation": "translated_text"}
    """


    response = aoai_client.chat.completions.create(
        model=AZURE_OPENAI_GPT4_DEPLOYMENT_NAME,
        messages = [
            {"role":"system","content":system_message},
            {"role":"user","content":text}
            ],
        response_format={ "type": "json_object" },
        temperature=0   
    )
    return response.choices[0].message.content

In [95]:
def translate(text, target_language):
    system_message = """You are a helpful assistant that translates text into """ + target_language + """.
    Answer in a clear and concise manner only translating the text.
    Text:
    """

    response = aoai_client.chat.completions.create(
        model=AZURE_OPENAI_GPT4_DEPLOYMENT_NAME,
        messages = [
            {"role":"system","content":system_message},
            {"role":"user","content":text}
            ],
        temperature=0   
    )
    return response.choices[0].message.content

In [101]:
question = "nonstick grills"
response = detect_and_translate(question)
json_response = json.loads(response)

answers = do_search(json_response["translation"], "name_embedding, description_embedding")
result = call_openAI(json_response["translation"], answers)
response = translate(result, json_response["language"])
display(response)

Score: 0.03333333507180214
Name: Cuisinart Countertop Griddler - GR4
Description: Cuisinart Countertop Griddler - GR4/ Nonstick Grill/ Knob Selector/ Light Indicator/ Temperature Controls/ Cleaning/Scraping Tool Included

Score: 0.032786883413791656
Name: Weber Cast Iron Griddle - 7531
Description: Weber Cast Iron Griddle - 7531/ Heavy-Duty Cast Iron Griddle/ Fits Weber Genesis Silver A & Spirit 500 Gas Grills

Score: 0.032258063554763794
Name: Weber Cast Iron Griddle - 7542
Description: Weber Cast Iron Griddle - 7542/ Heavy-Duty Cast Iron Griddle/ Two-Sided For Cooking A Variety Of Foods/ Fits Several Weber Grills



'- Cuisinart Countertop Griddler - GR4\n- Weber Cast Iron Griddle - 7531\n- Weber Cast Iron Griddle - 7542'

In [102]:
question = "Encuentrame todas los parrillas antiadherentes"
response = detect_and_translate(question)
json_response = json.loads(response)

answers = do_search(json_response["translation"], "name_embedding, description_embedding")
result = call_openAI(json_response["translation"], answers)
response = translate(result, json_response["language"])
display(response)

Score: 0.03333333507180214
Name: Cuisinart Countertop Griddler - GR4
Description: Cuisinart Countertop Griddler - GR4/ Nonstick Grill/ Knob Selector/ Light Indicator/ Temperature Controls/ Cleaning/Scraping Tool Included

Score: 0.032786883413791656
Name: Weber Cast Iron Griddle - 7531
Description: Weber Cast Iron Griddle - 7531/ Heavy-Duty Cast Iron Griddle/ Fits Weber Genesis Silver A & Spirit 500 Gas Grills

Score: 0.032258063554763794
Name: Weber Cast Iron Griddle - 7542
Description: Weber Cast Iron Griddle - 7542/ Heavy-Duty Cast Iron Griddle/ Two-Sided For Cooking A Variety Of Foods/ Fits Several Weber Grills



'- Plancha de Encimera Cuisinart - GR4'