In [1]:
"""This program utilizes Azure Cognitive Search and Azure OpenAI to answer questions
from uploaded PDF documents in Azure Blob Storage.
It leverages Python virtual environment for development.
"""
import os
import openai
from azure.identity import DefaultAzureCredential
from azure.search.documents import SearchClient
from azure.search.documents.models import QueryType
from azure.search.documents.models import QueryLanguage
from azure.core.credentials import AzureKeyCredential
from azure.storage.blob import BlobServiceClient

# Replace these with your own values, either in environment variables or directly here
AZURE_STORAGE_ACCOUNT = os.environ.get("AZURE_STORAGE_ACCOUNT") or "your-storage-account-name"
AZURE_STORAGE_CONTAINER = os.environ.get("AZURE_STORAGE_CONTAINER") or "your-container-name"
AZURE_SEARCH_SERVICE = os.environ.get("AZURE_SEARCH_SERVICE") or "your-cg-search-service-name"
AZURE_SEARCH_INDEX = os.environ.get("AZURE_SEARCH_INDEX") or "your-cg-search-index-name"
AZURE_OPENAI_SERVICE = os.environ.get("AZURE_OPENAI_SERVICE") or "your-openai-service-name"
AZURE_OPENAI_GPT_DEPLOYMENT = os.environ.get("AZURE_OPENAI_GPT_DEPLOYMENT") or "your-model-name"
AZURE_OPENAI_CHATGPT_DEPLOYMENT = os.environ.get("AZURE_OPENAI_CHATGPT_DEPLOYMENT") or "chat"

KB_FIELDS_CONTENT = os.environ.get("KB_FIELDS_CONTENT") or "content"
KB_FIELDS_CATEGORY = os.environ.get("KB_FIELDS_CATEGORY") or "category"
KB_FIELDS_SOURCEPAGE = os.environ.get("KB_FIELDS_SOURCEPAGE") or "metadata_storage_name"

# Used by the OpenAI SDK
openai.api_type = "azure"
openai.api_base = f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com"
openai.api_version = "2022-12-01"

# set your API key in the OPENAI_API_KEY environment variable instead
openai.api_key = 'YOUR OPENAI API KEY'
# az search admin-key show --resource-group <myresourcegroup> --service-name <myservice>
search_key = "YOUR Cognitive Search Admin/Query KEY"

# Use the current user identity to authenticate with Azure OpenAI, Cognitive Search and Blob Storage
# (no secrets needed, 
# just use 'az login' locally, and managed identity when deployed on Azure).
# If you need to use keys, use separate AzureKeyCredential instances with the 
# keys for each service
# If you encounter a blocking error during a DefaultAzureCredntial resolution, you can exclude the problematic credential by using a parameter (ex. exclude_shared_token_cache_credential=True)
az_credential = DefaultAzureCredential()
azure_credential = AzureKeyCredential(search_key)
# Set up clients for Cognitive Search and Storage
search_client = SearchClient(
    endpoint=f"https://{AZURE_SEARCH_SERVICE}.search.windows.net",
    index_name=AZURE_SEARCH_INDEX,
    credential=azure_credential)
#Set up clients for Blob STORAGE
blob_client = BlobServiceClient(
    account_url=f"https://{AZURE_STORAGE_ACCOUNT}.blob.core.windows.net", 
    credential=az_credential)
blob_container = blob_client.get_container_client(AZURE_STORAGE_CONTAINER)

In [2]:
template = \
"You are an intelligent assistant helping users with their Diabetes related questions. " + \
"Use 'you' to refer to the individual asking the questions even if they ask with 'I'. " + \
"Answer the following question using only the data provided in the sources below. " + \
"""

###
Question: 'What is Type 1 Diabetes?'

Sources:
info1.txt: Type 1 diabetes, also known as insulin-dependent diabetes or juvenile-onset diabetes, typically develops in childhood or adolescence. 
info2.pdf: Type 1 diabetes occurs when the immune system mistakenly attacks and destroys the insulin-producing beta cells in the pancreas.

Answer:
Type 1 diabetes, also known as insulin-dependent diabetes or juvenile-onset diabetes, typically develops in childhood or adolescence. It occurs when the immune system mistakenly attacks and destroys the insulin-producing beta cells in the pancreas. The exact cause of this autoimmune response is not fully understood, but genetic and environmental factors are thought to play a role.

###
Question: '{q}'?

Sources:
{retrieved}

Answer:
"""

In [5]:
user_input = "What is type1 Diabetes?"

# Exclude category, to simulate scenarios where there's a set of docs you can't see
exclude_category = None
search = user_input
print("Searching:", search)
print("-------------------")
# Filter out documents with a specific category
filter = "category ne '{}'".format(exclude_category.replace("'", "''")) if exclude_category else None
# Perform the search using Azure Cognitive Search
r = search_client.search(search,
                         query_type=QueryType.SIMPLE,                          
                         top=3)
# Extract the relevant information from the search results
results = [doc[KB_FIELDS_SOURCEPAGE] + ": " + doc[KB_FIELDS_CONTENT].replace("\n", "").replace("\r", "") for doc in r]
content = "\n".join(results)
print("***********************")
print(content)
# Generate the prompt for the OpenAI model using the template and retrieved information
prompt = template.format(q=user_input, retrieved=content)
# Call the OpenAI GPT model to get the answer
completion = openai.Completion.create(
    engine= AZURE_OPENAI_GPT_DEPLOYMENT, 
    prompt=prompt, 
    temperature= 0.3, 
    max_tokens=1024, 
    n=1, 
    stop=["\n"])
# Print the answer and additional information
print(completion)
print({"data_points": results, "answer": completion.choices[0].text, "thoughts": f"Question:<br>{user_input}<br><br>Prompt:<br>" + prompt.replace('\n', '<br>')})


Searching: What is type1 Diabetes?
-------------------
***********************
DiabetesPathology.pdf: Diabetes mellitus, commonly referred to as diabetes, is a chronic metabolic disorder characterized by high blood sugar levels over a prolonged period. It arises due to either insufficient production or inadequate utilization of insulin, a hormone produced by the pancreas that regulates blood sugar levels. The pathology of diabetes involves several complex mechanisms and can be categorized into two main types: type 1 diabetes and type 2 diabetes.  Type 1 Diabetes: Type 1 diabetes, also known as insulin-dependent diabetes or juvenile-onset diabetes, typically develops in childhood or adolescence. It occurs when the immune system mistakenly attacks and destroys the insulin-producing beta cells in the pancreas. The exact cause of this autoimmune response is not fully understood, but genetic and environmental factors are thought to play a role. Pathophysiology of Type 1 Diabetes:  Autoimmun