In [25]:
import os
import json
import openai
from dotenv import load_dotenv
from azure.identity import DefaultAzureCredential
from azure.search.documents import SearchClient
from azure.search.documents.models import QueryType

dotenv_path = f'../.azure/{config_data["defaultEnvironment"]}/.env'
load_dotenv(dotenv_path) # Load environment variables from .env file

# Replace these with your own values, either in environment variables or directly here
AZURE_STORAGE_ACCOUNT = os.environ.get("AZURE_STORAGE_ACCOUNT") or "mystorageaccount"
AZURE_STORAGE_CONTAINER = os.environ.get("AZURE_STORAGE_CONTAINER") or "content"
AZURE_SEARCH_SERVICE = os.environ.get("AZURE_SEARCH_SERVICE") or "gptkb"
AZURE_SEARCH_INDEX = os.environ.get("AZURE_SEARCH_INDEX") or "gptkbindex"
AZURE_OPENAI_SERVICE = os.environ.get("AZURE_OPENAI_SERVICE") or "myopenai"
AZURE_OPENAI_GPT_DEPLOYMENT = os.environ.get("AZURE_OPENAI_GPT_DEPLOYMENT") or "davinci"
AZURE_OPENAI_CHATGPT_DEPLOYMENT = os.environ.get("AZURE_OPENAI_CHATGPT_DEPLOYMENT") or "chat"
AZURE_OPENAI_CHATGPT_MODEL = os.environ.get("AZURE_OPENAI_CHATGPT_MODEL") or "gpt-35-turbo"
AZURE_OPENAI_EMB_DEPLOYMENT = os.environ.get("AZURE_OPENAI_EMB_DEPLOYMENT") or "embedding"

KB_FIELDS_CONTENT = os.environ.get("KB_FIELDS_CONTENT") or "content"
KB_FIELDS_CATEGORY = os.environ.get("KB_FIELDS_CATEGORY") or "category"
KB_FIELDS_SOURCEPAGE = os.environ.get("KB_FIELDS_SOURCEPAGE") or "sourcepage"

# Use the current user identity to authenticate with Azure OpenAI, Cognitive Search and Blob Storage (no secrets needed, 
# just use 'az login' locally, and managed identity when deployed on Azure). If you need to use keys, use separate AzureKeyCredential instances with the 
# keys for each service
#azure_credential = DefaultAzureCredential(exclude_shared_token_cache_credential = True)

# Used by the OpenAI SDK
openai.api_type = "azure"
openai.api_base = f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com"
openai.api_version = "2023-05-15"
openai.api_key = os.environ.get("OPENAI_API_KEY")

# Set up clients for Cognitive Search and Storage
admin_key = os.environ.get("AZURE_COGNITIVE_SEARCH_KEY")

# Set up clients for Cognitive Search and Storage
search_client = SearchClient(
    endpoint= os.environ.get("AZURE_COGNITIVE_SEARCH_ENDPOINT"),
    index_name=AZURE_SEARCH_INDEX,
    credential=AzureKeyCredential(admin_key))

In [71]:
SYSTEM = "system"
USER = "user"
ASSISTANT = "assistant"

system_message_chat_conversation = """You are a customer service assistant for BSH company, helping customers with their home appliance questions, including inquiries about purchasing new products, features, configurations, and troubleshooting.
Start answering thanking the user for their question. Respond in a slightly informal, and helpful tone, with a brief and clear answers. 
Answer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know without referring to the sources. 
Do not generate answers that don't use the sources below and avoid to just cite the source without answering the question. 
If asking a clarifying question to the user would help, ask the question. 
For tabular information, return it as an HTML table. Do not return markdown format. 
If the question is not in English, answer in the language used in the question. 
Each source has a name followed by a colon and the actual information; always include the source name for each fact you use in the response without referring to the sources. 
For example, if the question is 'What is the capacity of this washing machine?' and one of the information sources says 'WGB256090_EN-54.pdf: the capacity is 5kg', then answer with 'The capacity is 5kg [WGB256090_EN-54.pdf]'. 
If there are multiple sources, cite each one in their own square brackets. For example, use '[WGB256090_EN-54.pdf][SMS8YCI03E_EN-24.pdf]' and not in '[WGB256090_EN-54.pdf, SMS8YCI03E_EN-24.pdf]'. 
The name of the source follows a special format: <model_number>_<document_language>-<page_number>.pdf. 
You can Use this information from source name, especially if someone is asking a question about a specific model.
{follow_up_questions_prompt}
{injected_prompt}
"""

follow_up_questions_prompt_content = """Generate three very brief follow-up questions that the user would likely ask next about the home appliance they are interested in or need help with. 
Use double angle brackets to reference the questions, e.g. <<Is there a warranty on this washing machine?>>. 
Try not to repeat questions that have already been asked. 
Only generate questions and do not generate any text before or after the questions, such as 'Next Questions'
"""

query_prompt_template = """Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base about BSH company's home appliances, including buying guides, features, configurations, and troubleshooting.
Generate a search query based on the conversation and the new question. 
Ensure that the search query is in the same language as the new question.
If the question is not in English, answer in the language used in the question.
Do not include cited source filenames and document names e.g info.txt or doc.pdf in the search query terms.
Do not include any text inside [] or <<>> in the search query terms.
Do not include any special characters like '+'.
If you cannot generate a search query, return just the number 0.
Return the query enclosed in the quotes for e.g., 'washing machine with product id WGB256090'
"""

query_prompt_few_shots = [
{'role' : USER, 'content' : 'how to load the washing machine?' },
{'role' : ASSISTANT, 'content' : 'Show the procedure to load a washing machine' },
{'role' : USER, 'content' : 'Does my washing machine has wifi?' },
{'role' : ASSISTANT, 'content' : 'Check for the wifi feature on the specified washing machine' }
]

filter_prompt_template_1 = """Below is a history of the conversation so far, and a new question asked by the user. 
Identify the language of the question and return "en-us" if it's in english and "de-de" if it's in german.
If you don't know the language, return "unknown".
Possible answers are: "en-us", "de-de", "unknown".

Return the two answers enclosed in the quotes for e.g., 'en-us'
"""

filter_prompt_few_shots_1 = [
{'role' : USER, 'content' : 'how to load the washing machine?' },
{'role' : ASSISTANT, 'content' : 'en-us'}, 
{'role' : USER, 'content' : 'Gibt es Wifi auf meine Waschmachine mit produkt nummer WGB256090?' },
{'role' : ASSISTANT, 'content' : 'de-de'},
{'role' : USER, 'content' : 'what are the available programms for SMS6TCI00E washing machine?' },
{'role' : ASSISTANT, 'content' : 'en-us'},
]

filter_prompt_template_2 = """Below is a history of the conversation so far, and a new question asked by the user. 
First step: identify the language of the question and return "en-us" if it's in english and "de-de" if it's in german.
If you don't know the language, return "unknown".
Possible answers are: "en-us", "de-de", "unknown".

Second step: identify the product mentioned in the question and return the product id.
If you don't know the which product the client is talking about because it's not mentioned explicitly in the question, return "unknown".
Product ids are only "SMD6TCX00E", "SMS6TCI00E", "SMS8YCI03E", "WGB256A90", "WGB256090", "WUU28TA8". If it's not one of these ids, return "unknown".
Possible answers are: "SMD6TCX00E", "SMS6TCI00E", "SMS8YCI03E", "WGB256A90", "WGB256090", "WUU28TA8", "unknown".

Return the two answers in the comma separated format: e.g. 'en-us,SMS6TCI00E'
"""

filter_prompt_few_shots_2 = [
{'role' : USER, 'content' : 'how to load the washing machine?' },
{'role' : ASSISTANT, 'content' : 'en-us,unknown'}, 
{'role' : USER, 'content' : 'what are the available programms for SMS6TCI00E whashing machine?' },
{'role' : ASSISTANT, 'content' : 'en-us,SMS6TCI00E'},
{'role' : USER, 'content' : 'what are the available programms for the whashing machine?' },
{'role' : ASSISTANT, 'content' : 'en-us,unknown'}
]

In [72]:
user_input = "Ich habe eine Frage. Gibt es Wifi auf my Waschmachine mit produkt nummer WGB256090?"
request = {'history': [{'user': user_input}],
           'approach': 'rrr', 
           'overrides': {'retrieval_mode': 'hybrid', 
                         'semantic_ranker': True, 
                         'semantic_captions': False, 
                         'top': 5, 
                         'suggest_followup_questions': False
                         }
           }
overrides = request['overrides']
history = request["history"]

chatgpt_token_limit = get_token_limit(AZURE_OPENAI_CHATGPT_MODEL)

def get_messages_from_history(system_prompt: str, model_id: str, history, user_conv: str, few_shots = [], max_tokens: int = 4096):
    message_builder = MessageBuilder(system_prompt, model_id)

    # Add examples to show the chat what responses we want. It will try to mimic any responses and make sure they match the rules laid out in the system message.
    for shot in few_shots:
        message_builder.append_message(shot.get('role'), shot.get('content'))

    user_content = user_conv
    append_index = len(few_shots) + 1

    message_builder.append_message(USER, user_content, index=append_index)

    for h in reversed(history[:-1]):
        if h.get("bot"):
            message_builder.append_message(ASSISTANT, h.get('bot'), index=append_index)
        message_builder.append_message(USER, h.get('user'), index=append_index)
        if message_builder.token_length > max_tokens:
            break
    
    messages = message_builder.messages
    return messages

In [73]:
has_text = overrides.get("retrieval_mode") in ["text", "hybrid", None]
has_vector = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
use_semantic_captions = True if overrides.get("semantic_captions") and has_text else False
top = overrides.get("top") or 3
exclude_category = overrides.get("exclude_category") or None
filter = "category ne '{}'".format(exclude_category.replace("'", "''")) if exclude_category else None

user_q = 'User question: ' + history[-1]["user"]

print("prompt for query generation: " + user_q + "\n")

# STEP 1: Generate an optimized keyword search query based on the chat history and the last question
messages_filtering = get_messages_from_history(
    filter_prompt_template,
    AZURE_OPENAI_CHATGPT_MODEL,
    history,
    user_q,
    filter_prompt_few_shots,
    chatgpt_token_limit - len(user_q)
    )

chat_completion_filter = await openai.ChatCompletion.acreate(
    deployment_id=AZURE_OPENAI_CHATGPT_DEPLOYMENT,
    model=AZURE_OPENAI_CHATGPT_MODEL,
    messages=messages_filtering,
    temperature=0.0,
    max_tokens=32,
    n=1)

filtering_content = chat_completion_filter.choices[0].message.content

print(filtering_content)




prompt for query generation: User question: Ich habe eine Frage. Gibt es Wifi auf my Waschmachine mit produkt nummer WGB256090?

de-de,WGB256090


In [74]:
messages_query = get_messages_from_history(
    query_prompt_template,
    AZURE_OPENAI_CHATGPT_MODEL,
    history,
    user_q,
    query_prompt_few_shots,
    chatgpt_token_limit - len(user_q)
    )

chat_completion_query = await openai.ChatCompletion.acreate(
    deployment_id=AZURE_OPENAI_CHATGPT_DEPLOYMENT,
    model=AZURE_OPENAI_CHATGPT_MODEL,
    messages=messages_query,
    temperature=0.0,
    max_tokens=32,
    n=1)

query_content = chat_completion_query.choices[0].message.content

print(query_content)

"Waschmaschine mit Produktnummer WGB256090 WiFi-Funktion"


In [75]:
history

[{'user': 'Ich habe eine Frage. Gibt es Wifi auf my Waschmachine mit produkt nummer WGB256090?'}]