# Retrieving Documents and Answering Questions

## Setup Environment and Azure API Keys

In [12]:
from notebookutils.mssparkutils.credentials import getSecret

KEYVAULT_ENDPOINT = "https://rag-demo-east-us-kv.vault.azure.net/"
# Azure AI Search
AI_SEARCH_NAME = getSecret(KEYVAULT_ENDPOINT, "AI-SEARCH-NAME")
AI_SEARCH_API_KEY = getSecret(KEYVAULT_ENDPOINT, "AI-SEARCH-API-KEY")
AI_SEARCH_INDEX_NAME = "rag-demo-index"
# Azure AI Services
AI_SERVICES_NAME = getSecret(KEYVAULT_ENDPOINT, "AI-SERVICES-NAME")
AI_SERVICES_API_KEY = getSecret(KEYVAULT_ENDPOINT, "AI-SERVICES-API-KEY")
AI_SERVICES_LOCATION = "eastus"
# Azure Open AI - (if F64 SKU is not used)
OPEN_AI_NAME = getSecret(KEYVAULT_ENDPOINT, "OPEN-AI-NAME")
OPEN_AI_API_KEY = getSecret(KEYVAULT_ENDPOINT, "OPEN-AI-API-KEY")
OPEN_AI_EMBEDDING_DEPLOYMENT_NAME = "text-embedding-ada-002" #1536
OPEN_AI_GPT_DEPLOYMENT_NAME = "gpt-35-turbo-16k" # deploymentName could be one of {gpt-35-turbo, gpt-35-turbo-16k}

StatementMeta(, 37a401f2-c62d-4c5b-b871-7e4a20236bf9, 14, Finished, Available)

## Generate Embeddings for User Questions
[Embedding Text with Azure OpenAI](https://microsoft.github.io/SynapseML/docs/Explore%20Algorithms/OpenAI/Quickstart%20-%20OpenAI%20Embedding/)

In [13]:
def gen_question_embedding(user_question):
    """Generates embedding for user_question using SynapseML."""
    from synapse.ml.services import OpenAIEmbedding

    df_ques = spark.createDataFrame([(user_question, 1)], ["questions", "dummy"])
    embedding = (
        OpenAIEmbedding()
        .setDeploymentName(OPEN_AI_EMBEDDING_DEPLOYMENT_NAME)
        .setCustomServiceName(OPEN_AI_NAME)
        .setSubscriptionKey(OPEN_AI_API_KEY)
        .setTextCol("questions")
        .setErrorCol("errorQ")
        .setOutputCol("embeddings")
    )
    df_ques_embeddings = embedding.transform(df_ques)
    row = df_ques_embeddings.collect()[0]
    question_embedding = row.embeddings.tolist()
    return question_embedding


StatementMeta(, 37a401f2-c62d-4c5b-b871-7e4a20236bf9, 15, Finished, Available)

## Retrieving Related Documents

In [14]:
import json 
import requests

def retrieve_top_chunks(k, question, question_embedding):
    """Retrieve the top K entries from Azure AI Search using hybrid search."""
    url = f"https://{AI_SEARCH_NAME}.search.windows.net/indexes/{AI_SEARCH_INDEX_NAME}/docs/search?api-version=2023-11-01"

    payload = json.dumps({
        "search": question,
        "top": k,
        "vectorQueries": [
            {
                "vector": question_embedding,
                "k": k,
                "fields": "contentVector",
                "kind": "vector"
            }
        ]
    })

    headers = {
        "Content-Type": "application/json",
        "api-key": AI_SEARCH_API_KEY,
    }

    response = requests.request("POST", url, headers=headers, data=payload)
    output = json.loads(response.text)
    return output


StatementMeta(, 37a401f2-c62d-4c5b-b871-7e4a20236bf9, 16, Finished, Available)

In [15]:
def get_context(user_question, retrieved_k = 5):
    # Generate embeddings for the question
    question_embedding = gen_question_embedding(user_question)

    # Retrieve the top K entries
    output = retrieve_top_chunks(retrieved_k, user_question, question_embedding)

    # concatenate the content of the retrieved documents
    context = [chunk["content"] for chunk in output["value"]]

    return context


StatementMeta(, 37a401f2-c62d-4c5b-b871-7e4a20236bf9, 17, Finished, Available)

## Respond to User Questions

In [16]:
from pyspark.sql import Row
from synapse.ml.services.openai import OpenAIChatCompletion


def make_message(role, content):
    return Row(role=role, content=content, name=role)

def get_response(user_question):
    context = get_context(user_question)

    # Write a prompt with context and user_question as variables 
    prompt = f"""
    context: {context}
    Answer the question based on the context above.
    If the information to answer the question is not present in the given context then reply "I don't know".
    """

    chat_df = spark.createDataFrame(
        [
            (
                [
                    make_message(
                        "system", prompt
                    ),
                    make_message("user", user_question),
                ],
            ),
        ]
    ).toDF("messages")

    chat_completion = (
        OpenAIChatCompletion()
        .setDeploymentName(OPEN_AI_GPT_DEPLOYMENT_NAME)
        .setCustomServiceName(OPEN_AI_NAME)
        .setSubscriptionKey(OPEN_AI_API_KEY)
        .setMessagesCol("messages")
        .setErrorCol("error")
        .setOutputCol("chat_completions")
    )

    result_df = chat_completion.transform(chat_df).select("chat_completions.choices.message.content")

    result = []
    for row in result_df.collect():
        content_string = ' '.join(row['content'])
        result.append(content_string)

    # Join the list into a single string
    result = ' '.join(result)
    
    return result

StatementMeta(, 37a401f2-c62d-4c5b-b871-7e4a20236bf9, 18, Finished, Available)

In [17]:
user_question = "what are my health benefits?"
response = get_response(user_question)
print(response)


StatementMeta(, 37a401f2-c62d-4c5b-b871-7e4a20236bf9, 19, Submitted, Running)

Failed to fetch cluster details
Traceback (most recent call last):
  File "/home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages/synapse/ml/fabric/token_utils.py", line 188, in _get_openai_mwc_token
    raise Exception(
Exception: get openai mwc token returns 403:b'{"Message":"FT1 SKU Not Supported","Source":"ML","error_code":"PERMISSION_DENIED"}'


In [None]:
user_question = "what are my health benefits?"
response = get_response(user_question)
print(response)


StatementMeta(, , , Waiting, )

In [None]:
user_question = "what are Split Copay For Office Visits?"
response = get_response(user_question)
print(response)


StatementMeta(, , , Waiting, )

In [None]:
user_question = "when is the next lunar eclypse"
response = get_response(user_question)
print(response)


StatementMeta(, , , Waiting, )