# Queries with and without Azure OpenAI

## Set up variables

In [1]:
import os
import urllib
import requests
from collections import OrderedDict
from IPython.display import display, HTML
from langchain.llms import AzureOpenAI
from langchain.chat_models import AzureChatOpenAI
from langchain.vectorstores import FAISS
from langchain.docstore.document import Document
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.embeddings import HuggingFaceEmbeddings

from app.embeddings import OpenAIEmbeddings
from app.prompts import STUFF_PROMPT, REFINE_PROMPT, REFINE_QUESTION_PROMPT
from app.credentials import (
    DATASOURCE_CONNECTION_STRING,
    AZURE_SEARCH_API_VERSION,
    AZURE_SEARCH_ENDPOINT,
    AZURE_SEARCH_KEY,
    COG_SERVICES_NAME,
    COG_SERVICES_KEY,
    AZURE_OPENAI_ENDPOINT,
    AZURE_OPENAI_KEY,
    AZURE_OPENAI_API_VERSION
)

In [2]:
# Setup the Payloads header
headers = {'Content-Type': 'application/json','api-key': AZURE_SEARCH_KEY}
params = {'api-version': AZURE_SEARCH_API_VERSION}

## Multi-Index Search queries

In [3]:
# Index that we are going to query (from Notebook 01 and 02)
index1_name = "cogsrch-snowflake-index-files"
indexes = [index1_name]

In [4]:
QUESTION = "What is Snowflake?" 
# This questions is interesting since CLP means something in Computer science and means something different in medical field 

In [5]:
agg_search_results = []

for index in indexes:
    url = AZURE_SEARCH_ENDPOINT + '/indexes/'+ index + '/docs'
    url += '?api-version={}'.format(AZURE_SEARCH_API_VERSION)
    url += '&search={}'.format(QUESTION)
    url += '&select=*'
    url += '&$top=10'  # You can change this to anything you need/want
    url += '&queryLanguage=en-us'
    url += '&queryType=semantic'
    url += '&semanticConfiguration=my-semantic-config'
    url += '&$count=true'
    url += '&speller=lexicon'
    url += '&answers=extractive|count-3'
    url += '&captions=extractive|highlight-false'

    resp = requests.get(url, headers=headers)
    print(url)
    print(resp.status_code)

    search_results = resp.json()
    #print(search_results)
    agg_search_results.append(search_results)
    print("Results Found: {}, Results Returned: {}".format(search_results['@odata.count'], len(search_results['value'])))

https://azure-cog-search-g2ysbgn5zpihq.search.windows.net/indexes/cogsrch-snowflake-index-files/docs?api-version=2021-04-30-Preview&search=What is Snowflake?&select=*&$top=10&queryLanguage=en-us&queryType=semantic&semanticConfiguration=my-semantic-config&$count=true&speller=lexicon&answers=extractive|count-3&captions=extractive|highlight-false
200
Results Found: 3, Results Returned: 3


### Display the top results (from both searches) based on the score

In [6]:
display(HTML('<h4>Top Answers</h4>'))
#print(agg_search_results[0])
for search_results in agg_search_results:
    for result in search_results['@search.answers']:
        if result['score'] > 0.5: # Show answers that are at least 50% of the max possible score=1
            display(HTML('<h5>' + 'Answer - score: ' + str(result['score']) + '</h5>'))
            display(HTML(result['text']))
            
print("\n\n")
display(HTML('<h4>Top Results</h4>'))

file_content = OrderedDict()

for search_results in agg_search_results:
    for result in search_results['value']:
        if result['@search.rerankerScore'] > 0.4: # Show results that are at least 10% of the max possible score=4
            display(HTML('<h5>' + str(result['title']) + '&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score: '+ str(result['@search.rerankerScore']) + '</h5>'))
            display(HTML(result['@search.captions'][0]['text']))
            file_content[result['id']]={
                                    "title": result['title'],
                                    "chunks": result['pages'],
                                    "language": result['language'], 
                                    "caption": result['@search.captions'][0]['text'],
                                    "score": result['@search.rerankerScore'],
                                    "location": result['metadata_storage_path']                  
                                }






In [7]:
# Set the ENV variables that Langchain needs to connect to Azure OpenAI
os.environ["OPENAI_API_BASE"] = os.environ["AZURE_OPENAI_ENDPOINT"] = AZURE_OPENAI_ENDPOINT
os.environ["OPENAI_API_KEY"] = os.environ["AZURE_OPENAI_API_KEY"] = AZURE_OPENAI_KEY
os.environ["OPENAI_API_VERSION"] = os.environ["AZURE_OPENAI_API_VERSION"] = AZURE_OPENAI_API_VERSION

In [8]:
docs = []
for key,value in file_content.items():
    for page in value["chunks"]:
        docs.append(Document(page_content=page, metadata={"source": value["location"]}))
        
print("Number of chunks:",len(docs))

Number of chunks: 30


In [9]:
# Select the Embedder model
if len(docs) < 50:
    # OpenAI models are accurate but slower
    embedder = OpenAIEmbeddings(document_model_name="text-embedding-ada-002", query_model_name="text-embedding-ada-002") 
else:
    # Bert based models are faster (3x-10x) but not as great in accuracy as OpenAI models
    # Since this repo supports Multiple languages we need to use a multilingual model. 
    # But if English only is the requirement, use "multi-qa-MiniLM-L6-cos-v1"
    # The fastest english model is "all-MiniLM-L12-v2"
    if random.choice(list(file_content.items()))[1]["language"] == "en":
        embedder = HuggingFaceEmbeddings(model_name = 'multi-qa-MiniLM-L6-cos-v1')
    else:
        embedder = HuggingFaceEmbeddings(model_name = 'distiluse-base-multilingual-cased-v2')

In [10]:
embedder

OpenAIEmbeddings(client=<class 'openai.api_resources.embedding.Embedding'>, document_model_name='text-embedding-ada-002', query_model_name='text-embedding-ada-002', openai_api_key=None)

In [11]:
%%time
if(len(docs)>1):
    db = FAISS.from_documents(docs, embedder)
else:
    print("No results Found")

CPU times: user 129 ms, sys: 24.7 ms, total: 154 ms
Wall time: 1.61 s


In [12]:
docs_db = db.similarity_search(QUESTION, k=4)

In [13]:
import openai
# Make sure you have the deployment named "gpt-35-turbo" for the model "gpt-35-turbo (0301)". 
# Use "gpt-4" if you have it available.
llm = AzureChatOpenAI(deployment_name="gpt-35-turbo", temperature=0.9, max_tokens=500)
chain = load_qa_with_sources_chain(llm, chain_type="map_reduce", return_intermediate_steps=True)

In [14]:
%%time
response = chain({"input_documents": docs_db, "question": QUESTION}, return_only_outputs=True)

Token indices sequence length is longer than the specified maximum sequence length for this model (2106 > 1024). Running this sequence through the model will result in indexing errors


CPU times: user 674 ms, sys: 95.7 ms, total: 770 ms
Wall time: 23.3 s


In [15]:
answer = response['output_text']
#print(answer)
display(HTML('<h4>Azure OpenAI ChatGPT Answer:</h4>'))

print(answer.split("SOURCES:")[0])
print("Sources:")
print(answer.split("SOURCES:")[1].replace(' ',' ').split(","))

Snowflake is an enterprise-ready data warehouse as a service that is built for the cloud, highly elastic, and available, providing semi-structured and schema-less data at the speed of relational data on AWS and Azure. It has a multi-cluster, shared-data architecture, and supports self-describing, compact binary serialization, with a huge metadata layer. Snowflake is a game changer for an organization with a low load time and high load frequency. It is built on a SaaS model and has a rapidly growing user base and data volume, of over 1000 active customers serving tens of millions of queries per day over hundreds petabytes of data. It is a patent-pending new architecture that delivers the power of data warehousing, the flexibility of big data platforms, and the elasticity of the cloud, all at a fraction of the cost of traditional solutions. 

Sources:
[' https://depprocureformstorage.blob.core.windows.net/snowflake-data/Snowflake%20Internals.pdf', ' https://depprocureformstorage.blob.cor

In [16]:
# Uncomment if you want to inspect the results from each top similar chunk (k=4 by default)
response['intermediate_steps']

['Snowflake is an enterprise-ready data warehouse as a service, with a novel multi-cluster, shared-data architecture. It is highly elastic and available, and provides semi-structured and schema-less data at the speed of relational data. Snowflake offers a pure SaaS experience and has a rapidly growing user base and data volume.',
 '• The Snowflake Elastic Data Warehouse, or “Snowflake”\n• Built for the cloud\n• Multi-tenant, transactional, secure, highly scalable, elastic\n• Implemented from scratch (no Hadoop, Postgres etc.)\n• Currently runs on AWS and Azure \n• Serves tens of millions of queries per day over hundreds \npetabytes of data\n• 1000+ active customers, growing fast',
 'Snowflake is an enterprise data warehouse as a cloud service that supports self-describing, compact binary serialization and is designed for fast key-value lookup, comparison, and hashing. It is supported by all SQL operators (joins, group by, sort...). Snowflake also introduces automatic type inference and

# Summary
##### This answer is way better than taking just the result from Azure Cognitive Search. So the summary is:
- Azure Cognitive Search give us the top results (context)
- Azure OpenAI takes these results and understand the content and uses it as context to give the best answer
- Best of two worlds!