# Retrieval Augmented Generation

## Setup API clients

In [55]:
import os

import azure.identity
import dotenv
import openai
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizedQuery

dotenv.load_dotenv()

azure_credential = azure.identity.AzureDeveloperCliCredential(tenant_id=os.getenv("AZURE_TENANT_ID"))

# Initialize Azure OpenAI client
AZURE_OPENAI_SERVICE = os.getenv("AZURE_OPENAI_SERVICE")
AZURE_OPENAI_ADA_DEPLOYMENT = os.getenv("AZURE_OPENAI_ADA_DEPLOYMENT")

token_provider = azure.identity.get_bearer_token_provider(azure_credential, "https://cognitiveservices.azure.com/.default")
openai_client = openai.AzureOpenAI(
    api_version="2023-07-01-preview",
    azure_endpoint=f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com",
    azure_ad_token_provider=token_provider)

def get_embedding(text):
    get_embeddings_response = openai_client.embeddings.create(model=AZURE_OPENAI_ADA_DEPLOYMENT, input=text)
    return get_embeddings_response.data[0].embedding

# Initialize Azure search client
AZURE_SEARCH_SERVICE = os.getenv("AZURE_SEARCH_SERVICE")
AZURE_SEARCH_ENDPOINT = f"https://{AZURE_SEARCH_SERVICE}.search.windows.net"

AZURE_SEARCH_FULL_INDEX = "gptkbindex"
search_client = SearchClient(AZURE_SEARCH_ENDPOINT, AZURE_SEARCH_FULL_INDEX, credential=azure_credential)


## Prepare user question

In [56]:
user_question = "I want to buy iphone 15"
user_question_vector = get_embedding(user_question)

## Retrieve matching documents

The search call below does a **hybrid search**, performing both a full-text search and a vector search in parallel.
It merges those results using Reciprocal Rank Fusion (RRF). 
Finally, it re-ranks the merged results using the AI Search semantic ranker, a re-ranking model.

In [57]:
r = search_client.search(
        user_question,
        top=5, 
        vector_queries=[
                VectorizedQuery(vector=user_question_vector, k_nearest_neighbors=50, fields="embedding")],
        # query_type="semantic",
        semantic_configuration_name="default")

sources = "\n".join([f"{doc['sourcefile']}: {doc['content']}\n" for doc in r])

print(sources)

products.txt: Apple iPhone, $1099.00, Apple iPhones are renowned for their premium build quality and seamless integration with the Apple ecosystem. They feature high-resolution Retina displays, powerful A-series processors, and advanced camera systems with features like Night mode and Deep Fusion. iPhones run on iOS, which is known for its smooth performance, security, and a vast selection of apps.


products.txt: HP, $799.00, HP laptops are versatile and reliable, with a range of options from budget-friendly to high-end models.


products.txt: HP Laptop, $799.00, HP laptops are versatile and reliable, with a range of options from budget-friendly to high-end models. They are equipped with the latest technology, including fast processors, ample storage, and high-quality graphics, making them ideal for both work and play.


products.txt: Samsung Smartphone, $999.00, Samsung smartphones are known for their cutting-edge technology and innovative features. They offer high-resolution display

In [58]:
def search(question):
    r = search_client.search(
            question,
            top=5, 
            vector_queries=[
                    VectorizedQuery(vector=user_question_vector, k_nearest_neighbors=50, fields="embedding")],
            # query_type="semantic",
            semantic_configuration_name="default")

    sources = "\n".join([f"{doc['sourcefile']}: {doc['content']}\n" for doc in r])

    return sources

In [59]:
def generate_query(messages, question):
    clone_messages = list(messages)
    clone_messages.append({"role": "user", "content": f"This is the user question: {question}. Based on the history and current question, generate the query in max 20 words to search in the knowledge base, don't explain more."})
    response = openai_client.chat.completions.create(
        model=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"),
        temperature=0.7,
        messages=clone_messages
    )

    return response.choices[0].message.content


## Send question and documents to the model

In [60]:
from time import sleep


SYSTEM_MESSAGE = """
Assistant helps answer customer's questions about the available products. Be brief in your answers.
Answer ONLY with the facts listed in the list of sources below.
If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below.
Each source has a name followed by colon and the actual information, include the source name for each fact you use.
Use square brackets to reference the source, for example [info1.txt].
"""


messages = [{"role": "system", "content": SYSTEM_MESSAGE}]

while(True):
    sleep(1)
    question = input()
    if question == "exit":
        break
    query = generate_query(messages=messages, question=question)
    print(f"\tGenerated query: {query}")

    sources = search(question=query)
    print("Human > " + question)

    USER_MESSAGE = question + "\nSources: " + sources
    messages.append( {"role": "user", "content": USER_MESSAGE})
    # Now we can use the matches to generate a response
    response = openai_client.chat.completions.create(
        model=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"),
        temperature=0.7,
        messages=messages
    )

    answer = response.choices[0].message.content
    print("Bot > " + answer)


	Generated query: Customer question about available products and services.
Human > hi
Bot > Hello! How can I assist you today?
	Generated query: Available smartphones and their features along with prices.
Human > 
Bot > Hi! How can I assist you today?
