In [None]:
# Notebook to perform a RAG query


In [1]:
import os 
import re 
import json
import pickle
import math
from datetime import datetime
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.core.credentials import AzureKeyCredential  
from azure.search.documents import SearchClient  
from azure.search.documents.indexes import SearchIndexClient, SearchIndexerClient  
from azure.search.documents.models import VectorizableTextQuery, VectorizedQuery
from azure.search.documents.models import QueryType, QueryCaptionType, QueryAnswerType
from openai import AzureOpenAI
import openai
from tenacity import retry, wait_random_exponential, stop_after_attempt 
import time
import pandas as pd  
import concurrent.futures  
import random  
from IPython.display import Markdown, display  



In [None]:
openai_temperature = 0.1

#Load the configuration details for the AI Search Service and Azure OpenAI Instance
#Credentials should be secured using a more secure method such as Azure KeyVault
# Check if embedding settings are present
def has_embedding_settings(config):
    required_keys = [
        'openai_embedding_api_base',
        'openai_embedding_api_key', 
        'openai_embedding_api_version',
        'openai_embedding_model'
    ]
    return all(key in config and config[key] for key in required_keys)

if has_embedding_settings(config):
    print("Embedding settings found. Proceeding with embedding operations...")
else:
    print("Embedding settings not found or incomplete. Skipping embedding operations.")
    exit()

# Azure AI Search Config
search_service_name = config["search_service_name"]
search_service_url = "https://{}.search.windows.net/".format(search_service_name)
search_admin_key = config["search_admin_key"]
index_name = config["search_index_name"]
search_api_version = config["search_api_version"]

#Azure OpenAI
openai_embedding_api_base = config["openai_embedding_api_base"]
openai_embedding_api_key = config["openai_embedding_api_key"]
openai_embedding_api_version = config["openai_embedding_api_version"]
openai_embeddings_model = config["openai_embedding_model"]

openai_gpt_api_base = config["openai_gpt_api_base"]
openai_gpt_api_key = config["openai_gpt_api_key"]
openai_gpt_api_version = config["openai_gpt_api_version"]
openai_gpt_model = config["openai_gpt_model"]

# Check if all required AI Search variables are properly configured
search_vars = [search_service_name, search_admin_key, index_name, search_api_version]
ai_search_configured = True

for var in search_vars:
    if not var or var == "[redacted]" or var.strip() == "":
        ai_search_configured = False
        break

if not ai_search_configured:
    print("WARNING: AI Search variables not properly configured (some values are '[redacted]', empty, or None)")
    print("Search operations will not work properly. Please configure search_service_name, search_admin_key, search_index_name, and search_api_version.")
    index_client = None
    search_client = None
else:
    index_client = SearchIndexClient(
            endpoint=search_service_url, credential=AzureKeyCredential(search_admin_key))
    search_client = SearchClient(endpoint=search_service_url, index_name=index_name, credential=AzureKeyCredential(search_admin_key))

# gets the API Key from environment variable AZURE_OPENAI_API_KEY
embeddings_client = AzureOpenAI(
    api_version=openai_embedding_api_version,
    azure_endpoint=openai_embedding_api_base,
    api_key=openai_embedding_api_key
)

gpt_client = AzureOpenAI(
    api_version=openai_gpt_api_version,
    azure_endpoint=openai_gpt_api_base,
    api_key=openai_gpt_api_key
)

print ('Search Service Name:', search_service_name)
print ('Index Name:', index_name)
print ('Azure OpenAI Embeddings Base URL:', openai_embedding_api_base)
print ('Azure OpenAI Embeddings Model:', openai_embeddings_model)
print ('Azure OpenAI GPT Base URL:', openai_gpt_api_base)
print ('Azure OpenAI GPT Model:', openai_gpt_model)


In [None]:
max_tokens=2048

# Function to generate vectors for title and content fields, also used for query vectors
max_attempts = 6
max_backoff = 60
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(max_attempts))
def generate_embedding(text):
    if text == None:
        return None
        
    if len(text) < 10:
        return None
        
    # Initialize OpenAI client only if embedding settings are present
    if has_embedding_settings(config):
        client = openai.AzureOpenAI(
            azure_endpoint=config["openai_embedding_api_base"],
            api_key=config["openai_embedding_api_key"],
            api_version=config["openai_embedding_api_version"]
        )
    else:
        print("Embedding settings not available. Cannot initialize OpenAI client for embeddings.")
        client = None    
    counter = 0
    incremental_backoff = 1   # seconds to wait on throttline - this will be incremental backoff
    while True and counter < max_attempts:
        try:
            # text-embedding-3-small == 1536 dims
            response = client.embeddings.create(
                input=text,
                model=openai_embeddings_model
            )
            return json.loads(response.model_dump_json())["data"][0]['embedding']
        except openai.APIError as ex:
            # Handlethrottling - code 429
            if str(ex.code) == "429":
                incremental_backoff = min(max_backoff, incremental_backoff * 1.5)
                print ('Waiting to retry after', incremental_backoff, 'seconds...')
                time.sleep(incremental_backoff)
            elif str(ex.code) == "content_filter":
                print ('API Error', ex.code)
                return None
        except Exception as ex:
            counter += 1
            print ('Error - Retry count:', counter, ex)
    return None

@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
def generate_answer(question, content):
    max_attempts = 6
    max_backoff = 60
    system_prompt = """
    You are an intelligent assistant. 
    Use 'you' to refer to the individual asking the questions even if they ask with 'I'. 
    Sometimes the answer may be in a table.
    Focus the response on the intent of the users question. For example, if they ask "Who is", aim to respond with information about "Who" as opposed to "How to".
    Each source has a name followed by colon and the actual information. 
    Use square brackets to reference the source, for example [info1.txt]. 
    List each source separately, for example [info1.txt][info2.pdf].
    For every fact, always include a reference to the source of that fact, even if you used the source to infer the fact.
    Aim to be succint, but include any relevent information you find in the content such as special rules, legalities, restrictions or other relevent notes.
    Only answer the question using the source information below. 
    Do not make up an answer.
    """

    user_prompt = question + "\nSources:\n" + content

    counter = 0
    incremental_backoff = 1   # seconds to wait on throttline - this will be incremental backoff
    while True and counter < max_attempts:
        try:
            response = gpt_client.chat.completions.create(
                model=openai_gpt_model, 
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                temperature=openai_temperature,
                max_tokens=max_tokens,
                top_p=0.95,
                frequency_penalty=0,
                presence_penalty=0,
                stop=None,
                stream=False
            )
            return response.choices[0].message.content
            # elapsed_time = 0
            # answer = ''
            # for chunk in response:
            #     if len(chunk.choices) > 0 and chunk.choices[0].delta.content != None:
            #         answer += chunk.choices[0].delta.content
            #         print(chunk.choices[0].delta.content, end='')


            # return answer
        except openai.APIError as ex:
            # Handlethrottling - code 429
            if str(ex.code) == "429":
                incremental_backoff = min(max_backoff, incremental_backoff * 1.5)
                print ('Waiting to retry after', incremental_backoff, 'seconds...')
                time.sleep(incremental_backoff)
            elif str(ex.code) == "content_filter":
                print ('API Error', ex.code)
                return ""
        except Exception as ex:
            counter += 1
            print ('Error - Retry count:', counter, ex)
        
        return ""

citation_pattern = r'\[([^\]]+)\]'  
def extract_citations(text):
    citations = re.findall(citation_pattern, answer)  
    return citations

In [5]:
# Pg 10
query = "What was the intelligent cloud revenue in FY24 Q2"

neighbors=3
emb = generate_embedding(query)
vector_query = VectorizedQuery(vector=emb, k_nearest_neighbors=neighbors, fields="vector")
results = search_client.search(  
    search_text=query,  
    vector_queries= [vector_query],
    select=["doc_id, page_number, content"],
    top=neighbors,
    query_type='semantic', 
    semantic_configuration_name='vector-semantic-configuration'
)

content = ''
for result in results:  
    content += result['doc_id'] + ': ' + result['content'] + '\n\n'
    
answer = generate_answer(query, content)    
display(Markdown(answer))    


The intelligent cloud revenue for FY24 Q2 was approximately $25.88 billion [6ba17e30-a320-49a5-9d44-c0f96a0e2869-10].

In [6]:
# Print out all the citations
from IPython.display import Markdown, display  

citations = sorted(list(set(extract_citations(answer))))
for citation in citations:
    # Perform the lookup query  
    result = search_client.get_document(key=citation)  
    
    content = result.get('content', 'Content field not found')  
    content = content.replace('```', '\\```')    
    display(Markdown(content))  

    print ('\n==============================================================\n')


\```markdown
# Intelligent Cloud Overview

## Investor Metrics

|                        | FY23 Q3 | FY23 Q4 | FY24 Q1 | FY24 Q2 | FY24 Q3 |
|------------------------|---------|---------|---------|---------|---------|
| Server products and cloud services revenue growth (y/y) | 17% / 21%| 17% / 18%| 21%| 22% / 20%| 24%|

*Growth rates include non-GAAP CC growth (GAAP % / CC %).*

## Total Revenue

- Revenue grew 21% driven by Azure

## Operating Income

- Gross margin dollars grew 20% and gross margin percentage decreased slightly. Excluding the impact of the latest change in accounting estimate for useful lives, gross margin percentage increased slightly primarily driven by improvement in Azure, inclusive of scaling our AI infrastructure, partially offset by sales mix shift to Azure.
- Operating expenses grew 1% driven by investments in Azure
- Operating income grew 32%

## Revenue and Operating Income (in billions)

| Period   | Revenue | Operating Income | 
|----------|---------|------------------| 
| FY23 Q3  | $22.08  | $9.48            |
| FY23 Q4  | $23.99  | $10.53           |
| FY24 Q1  | $24.26  | $11.75           |
| FY24 Q2  | $25.88  | $12.46           |
| FY24 Q3  | $26.71  | $12.51           |

*Note: Numbers are approximate.*

---

*We have recast certain prior period amounts to conform to the way we internally manage and monitor our business. Includes non-GAAP constant currency ("CC") growth. See Appendix for reconciliation of GAAP and non-GAAP measures. Growth rates in GAAP and CC are equivalent unless otherwise noted.*
\```



