# Azure OpenAI Service - Q&A with semantic answering tutorial

In this tutorial, you'll build a simple Q&A system, that can give semantic answers to questions. Three sample documents from the Azure documentation are provided.

In [2]:
import os
import json
import tiktoken
import openai
import numpy as np
from dotenv import load_dotenv
from openai.embeddings_utils import cosine_similarity
from tenacity import retry, wait_random_exponential, stop_after_attempt

# Load environment variables
load_dotenv()

# Configure Azure OpenAI Service API
openai.api_type = "azure"
openai.api_version = "2022-12-01"
openai.api_base = "https://domzisopenai.openai.azure.com/"
openai.api_key = "3673181a896c4997baba06bc82db5e6f"

# Define embedding model and encoding
EMBEDDING_MODEL = 'TextEmbeddingAda002'
EMBEDDING_ENCODING = 'cl100k_base'
EMBEDDING_CHUNK_SIZE = 8000
COMPLETION_MODEL = 'TextDavinci003'

# initialize tiktoken for encoding text
encoding = tiktoken.get_encoding(EMBEDDING_ENCODING)

Next, let's read the documents in `/data/qna/*.txt`, which are our sample documents:

In [3]:
# list all files in the samples directory
samples_dir = os.path.join(os.getcwd(), "../../data/qna/")
sample_files = os.listdir(samples_dir)

# read each file and remove and newlines (better for embeddings later)
documents = []
for file in sample_files:
    with open(os.path.join(samples_dir, file), "r") as f:
        content = f.read()
        content = content.replace("\n", " ")
        content = content.replace("  ", " ")
        documents.append(content)

# print some stats about the documents
print(f"Loaded {len(documents)} documents")
for doc in documents:
    num_tokens = len(encoding.encode(doc))
    print(f"Content: {doc[:80]}... \n---> Tokens: {num_tokens}\n")

Loaded 3 documents
Content:  # What is Azure OpenAI? The Azure OpenAI service provides REST API access to Op... 
---> Tokens: 1891

Content:  # What is conversational language understanding? Conversational language unders... 
---> Tokens: 1341

Content:  # What is Azure Cognitive Services Translator? Translator Service is a cloud-ba... 
---> Tokens: 739



Now that we have all documents loaded, we can embed them using our embedding model:

In [4]:
#test code
def get_embedding(text):
    return openai.Embedding.create(input=text, engine=EMBEDDING_MODEL)["data"][0]["embedding"]

get_embedding("This is a test string")

[-0.007274896837770939,
 0.011575490236282349,
 -0.0030399614479392767,
 -0.010446174070239067,
 -0.017990268766880035,
 0.013413911685347557,
 -0.009257766418159008,
 0.0030268297996371984,
 -0.008830989710986614,
 -0.028364218771457672,
 0.009192108176648617,
 0.017556926235556602,
 -0.015048794448375702,
 0.018003400415182114,
 0.0035225471947342157,
 0.010104753077030182,
 0.0204590056091547,
 3.847143489110749e-06,
 0.019040795043110847,
 -0.00591577822342515,
 -0.0007485002279281616,
 0.006628166418522596,
 -0.011536095291376114,
 0.025751033797860146,
 -0.01711045205593109,
 -0.007938042283058167,
 -0.0012286236742511392,
 -0.02062971703708172,
 0.012626017443835735,
 -0.007517831400036812,
 0.01575789973139763,
 -0.01575789973139763,
 -0.003450323361903429,
 -0.021023664623498917,
 -0.002181484131142497,
 -0.0006569895194843411,
 -0.007294594310224056,
 -0.026670245453715324,
 0.020747901871800423,
 -0.01902766339480877,
 0.010859819129109383,
 0.01213358249515295,
 2.303477231

In [12]:
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
def get_embedding(text):
    return openai.Embedding.create(input=text, engine=EMBEDDING_MODEL)["data"][0]["embedding"]

# Create embeddings for all docs
embeddings = [get_embedding(doc) for doc in documents]

# print some stats about the embeddings
for e in embeddings:
    print(e)

[-0.0058960202150046825, -0.008995079435408115, -0.0015300121158361435, -0.000143199969897978, -0.0022216150537133217, 0.014622945338487625, -0.006764979101717472, 0.001692941877990961, -0.02661185897886753, -0.03258594870567322, 0.03144543990492821, 0.014690832234919071, -0.03025062382221222, -0.01570914313197136, -0.031499750912189484, 0.017256975173950195, 0.006585077382624149, -0.01187350507825613, 0.01385581772774458, -0.011086011305451393, -0.007257162593305111, -0.004134342540055513, -0.015138888731598854, -0.010291729122400284, -0.002058685291558504, 0.02137095108628273, 0.01568198762834072, -0.020244020968675613, 0.001718399696983397, -0.011547645553946495, 0.02703276090323925, -0.002337023615837097, -0.010217052884399891, 0.0007094232714734972, 0.02069207839667797, 0.01613004505634308, 0.01294612605124712, -0.009477080777287483, 0.008804995566606522, 0.00015242841618601233, 0.01854683645069599, 0.020176133140921593, 0.01319052092730999, -0.00044466243707574904, -0.01360463444

Now that we have our embeddings, we can try to ask some questions and see if it retrieves the correct document. You can try the following questions:

* what is azure openai service?
* can translator be fine tuned?
* what is the difference between luis and clu?
* what is form recognizer? (should yield no result)

In [4]:
# create embedding for question
question = "what is azure openai service?"
qe = get_embedding(question)

# calculate cosine similarity between question and each document
similaries = [cosine_similarity(qe, e) for e in embeddings]

# Get the matching document, in this case we just use argmax of similarities
max_i = np.argmax(similaries)

# print some stats about the similarities
for i, s in enumerate(similaries):
    print(f"Similarity to {sample_files[i]} is {s}")
print(f"Matching document is {sample_files[max_i]}")

Similarity to overview_clu.txt is 0.7739918312624845
Similarity to overview_openai.txt is 0.8674443713390149
Similarity to overview_translator.txt is 0.7915684774211917
Matching document is overview_openai.txt


In [5]:
# Generate a prompt that we use for completion, in this case we put the matched document and the question in the prompt
prompt = f"""
Content:
{documents[max_i]}
Please answer the question below using only the content from above. If you don't know the answer or can't find it, say "I couldn't find the answer".
Question: {question}
Answer:"""

# get response from completion model
response = openai.Completion.create(
    engine=COMPLETION_MODEL,
    prompt=prompt,
    temperature=0.7,
    max_tokens=500,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0,
    stop=None
)
answer = response['choices'][0]['text']

# print the question and answer
print(f"Question was: {question}\nRetrieved answer was: {answer}")

Question was: what is azure openai service?
Retrieved answer was:  Azure OpenAI Service provides REST API access to OpenAI's powerful language models, including the GPT-3, Codex and Embeddings model series. Users can access the service through REST APIs, Python SDK, or the web-based interface in the Azure OpenAI Studio.


Great, that worked. Now we should have a simple understanding how Q&A can work using OpenAI embeddings and completions. Next step would be:

* Chunking of longer documents (you might run into token limits for embeddings and the answering prompt)
* Usage of a vector database (pinecone, redis, etc.) to scale the search part to a larger amount of documents
* Evaluation of the top k results, instead of just the best matching document
* ...and a few more!