Merge pull request #578 from microsoft/dayland/7212-add-modes-to-chat-ux

Add Chat modes to UX
microsoft · Mar 21, 2024 · 47adda3 · 47adda3
2 parents b405d09 + cd97700
commit 47adda3
Show file tree

Hide file tree

Showing 23 changed files with 763 additions and 427 deletions.
diff --git a/app/backend/app.py b/app/backend/app.py
@@ -10,9 +10,10 @@
 from fastapi import FastAPI, HTTPException, Request
 from fastapi.responses import RedirectResponse
 import openai
-from approaches.chatrrrbingcompare import ChatReadRetrieveReadBingCompare
-from approaches.chatbingsearchcompare import ChatBingSearchCompare
+from approaches.comparewebwithwork import CompareWebWithWork
+from approaches.compareworkwithweb import CompareWorkWithWeb
 from approaches.chatreadretrieveread import ChatReadRetrieveReadApproach
+from approaches.chatwebretrieveread import ChatWebRetrieveRead
 from approaches.gpt_direct_approach import GPTDirectApproach
 from approaches.approach import Approaches
 from azure.core.credentials import AzureKeyCredential
@@ -26,7 +27,6 @@
     generate_account_sas,
 )
 from shared_code.status_log import State, StatusClassification, StatusLog, StatusQueryLevel
-from approaches.chatbingsearch import ChatBingSearch
 from azure.cosmos import CosmosClient
 
 
@@ -192,25 +192,25 @@
                                     ENV["AZURE_AI_TRANSLATION_DOMAIN"],
                                     str_to_bool.get(ENV["USE_SEMANTIC_RERANKER"])
                                 ),
-    Approaches.ChatBingSearch: ChatBingSearch(
+    Approaches.ChatWebRetrieveRead: ChatWebRetrieveRead(
                                     model_name,
                                     ENV["AZURE_OPENAI_CHATGPT_DEPLOYMENT"],
                                     ENV["TARGET_TRANSLATION_LANGUAGE"],
                                     ENV["BING_SEARCH_ENDPOINT"],
                                     ENV["BING_SEARCH_KEY"],
                                     str_to_bool.get(ENV["ENABLE_BING_SAFE_SEARCH"])
     ),
-    Approaches.ChatBingSearchCompare: ChatBingSearchCompare( 
+    Approaches.CompareWorkWithWeb: CompareWorkWithWeb( 
                                     model_name,
                                     ENV["AZURE_OPENAI_CHATGPT_DEPLOYMENT"],
                                     ENV["TARGET_TRANSLATION_LANGUAGE"],
                                     ENV["BING_SEARCH_ENDPOINT"],
                                     ENV["BING_SEARCH_KEY"],
                                     str_to_bool.get(ENV["ENABLE_BING_SAFE_SEARCH"])
     ),
-    Approaches.BingRRRCompare: ChatReadRetrieveReadBingCompare(
+    Approaches.CompareWebWithWork: CompareWebWithWork(
                                     search_client,
-                                    ENV["AZURE_OPENAI_SERVICE"],
+                                    ENV["AZURE_OPENAI_ENDPOINT"],
                                     ENV["AZURE_OPENAI_SERVICE_KEY"],
                                     ENV["AZURE_OPENAI_CHATGPT_DEPLOYMENT"],
                                     ENV["KB_FIELDS_SOURCEFILE"],

diff --git a/app/backend/approaches/approach.py b/app/backend/approaches/approach.py
@@ -11,9 +11,9 @@ class Approaches(Enum):
     ReadRetrieveRead = 1
     ReadDecomposeAsk = 2
     GPTDirect = 3
-    ChatBingSearch = 4
-    ChatBingSearchCompare = 5
-    BingRRRCompare = 6
+    ChatWebRetrieveRead = 4
+    CompareWorkWithWeb = 5
+    CompareWebWithWork = 6
 
 class Approach:
     """

diff --git a/app/backend/approaches/chatbingsearch.py → ...backend/approaches/chatwebretrieveread.py b/app/backend/approaches/chatbingsearch.py → ...backend/approaches/chatwebretrieveread.py
@@ -13,7 +13,7 @@
 from core.messagebuilder import MessageBuilder
 from core.modelhelper import get_token_limit
 
-class ChatBingSearch(Approach):
+class ChatWebRetrieveRead(Approach):
     """Class to help perform RAG based on Bing Search and ChatGPT."""
 
     SYSTEM_MESSAGE_CHAT_CONVERSATION = """You are an Azure OpenAI Completion system. Your persona is {systemPersona} who helps answer questions. {response_length_prompt}

diff --git a/app/backend/approaches/chatrrrbingcompare.py → app/backend/approaches/comparewebwithwork.py b/app/backend/approaches/chatrrrbingcompare.py → app/backend/approaches/comparewebwithwork.py
@@ -14,13 +14,13 @@
 )
 from core.modelhelper import get_token_limit
 
-class ChatReadRetrieveReadBingCompare(Approach):
+class CompareWebWithWork(Approach):
     """
-    Approach for comparing and contrasting answers from internal data and Bing Chat.
+    Approach for comparing and contrasting generative response answers based on web search results vs. based on work search results.
     """
 
     COMPARATIVE_SYSTEM_MESSAGE_CHAT_CONVERSATION = """You are an Azure OpenAI Completion system. Your persona is {systemPersona}. User persona is {userPersona}.
-    Compare and contrast the answers provided below from two sources of data. The first source is internal data indexed using a RAG pattern while the second source is from Bing Chat.
+    Compare and contrast the answers provided below from two sources of data. The first source is Web where data is retrieved from an internet search while the second source is Work where internal data indexed using a RAG pattern.
     Only explain the differences between the two sources and nothing else. Do not provide personal opinions or assumptions.
     Only answer in the language {query_term_language}.
     If you cannot find answer in below sources, respond with I am not sure. Do not provide personal opinions or assumptions.
@@ -29,8 +29,8 @@ class ChatReadRetrieveReadBingCompare(Approach):
     """
 
     COMPARATIVE_RESPONSE_PROMPT_FEW_SHOTS = [
-        {"role": Approach.USER ,'content': 'I am looking for comparative information in the Bing Search Response and want to compare against the Internal Documents'},
-        {'role': Approach.ASSISTANT, 'content': 'user is looking to compare information in Bing Search Response against Internal Documents.'}
+        {"role": Approach.USER ,'content': 'I am looking for comparative information on an answer based on Web search results and want to compare against an answer based on Work internal documents'},
+        {'role': Approach.ASSISTANT, 'content': 'User is looking to compare an answer based on Web search results against an answer based on Work internal documents.'}
     ]
 
     citations = {}
@@ -82,7 +82,7 @@ def __init__(
 
     async def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> Any:
         """
-        Runs the approach to compare and contrast answers from internal data and Bing Chat.
+        Runs the approach to compare and contrast answers from internal data and Web Search results.
 
         Args:
             history (Sequence[dict[str, str]]): The conversation history.
@@ -118,13 +118,13 @@ async def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]
         self.citations = rrr_response.get("citation_lookup")
 
         user_query = history[-1].get("user")
-        bing_answer = history[0].get("bot")
+        web_answer = history[0].get("bot")
         user_persona = overrides.get("user_persona", "")
         system_persona = overrides.get("system_persona", "")
         response_length = int(overrides.get("response_length") or 1024)
 
         # Step 2: Contruct the comparative system message with passed Rag response and Bing Search Response from above approach
-        bing_compare_query = user_query + "Internal Documents:\n" + rrr_response.get("answer") + "\n\n" + " Bing Search Response:\n" + bing_answer + "\n\n"
+        bing_compare_query = user_query + " Web search resutls:\n" + web_answer + "\n\n" + "Work internal Documents:\n" + rrr_response.get("answer") + "\n\n"
 
         messages = self.get_messages_builder(
             self.COMPARATIVE_SYSTEM_MESSAGE_CHAT_CONVERSATION.format(

diff --git a/...ckend/approaches/chatbingsearchcompare.py → app/backend/approaches/compareworkwithweb.py b/...ckend/approaches/chatbingsearchcompare.py → app/backend/approaches/compareworkwithweb.py
@@ -5,19 +5,19 @@
 from typing import Any, Sequence
 import urllib.parse
 import openai
-from approaches.chatbingsearch import ChatBingSearch
+from approaches.chatwebretrieveread import ChatWebRetrieveRead
 from approaches.approach import Approach
 from core.messagebuilder import MessageBuilder
 from core.modelhelper import get_token_limit
 
 
-class ChatBingSearchCompare(Approach):
+class CompareWorkWithWeb(Approach):
     """
-    Approach class for performing comparative analysis between Bing Search Response and Internal Documents.
+    Approach class for performing comparative analysis between Generative answer responses based on Bing search results vs. work internal document search results.
     """
 
     COMPARATIVE_SYSTEM_MESSAGE_CHAT_CONVERSATION = """You are an Azure OpenAI Completion system. Your persona is {systemPersona}. User persona is {userPersona}.
-    Compare and contrast the answers provided below from two sources of data. The first source is internal data indexed using a RAG pattern while the second source is from Bing Chat.
+    Compare and contrast the answers provided below from two sources of data. The first source is Work where internal data is indexed using a RAG pattern while the second source Web where results are from an internet search.
     Only explain the differences between the two sources and nothing else. Do not provide personal opinions or assumptions.
     Only answer in the language {query_term_language}.
     If you cannot find answer in below sources, respond with I am not sure. Do not provide personal opinions or assumptions.
@@ -26,22 +26,25 @@ class ChatBingSearchCompare(Approach):
     """
 
     COMPARATIVE_RESPONSE_PROMPT_FEW_SHOTS = [
-        {"role": Approach.USER ,'content': 'I am looking for comparative information in the Bing Search Response and want to compare against the Internal Documents'},
-        {'role': Approach.ASSISTANT, 'content': 'user is looking to compare information in Bing Search Response against Internal Documents.'}
+        {"role": Approach.USER ,'content': 'I am looking for comparative information on an answer based on Work internal documents and want to compare against an answer based on Web search results'},
+        {'role': Approach.ASSISTANT, 'content': 'User is looking to compare an answer based on Work internal documents against an answer based on Web search results.'}
     ]
 
     citations = {}
 
     def __init__(self, model_name: str, chatgpt_deployment: str, query_term_language: str, bing_search_endpoint: str, bing_search_key: str, bing_safe_search: bool):
         """
-        Initializes the ChatBingSearchCompare approach.
+        Initializes the CompareWorkWithWeb approach.
 
         Args:
             model_name (str): The name of the model to be used for chat-based language model.
             chatgpt_deployment (str): The deployment ID of the chat-based language model.
             query_term_language (str): The language to be used for querying the data.
+            bing_search_endpoint (str): The endpoint for the Bing Search API.
+            bing_search_key (str): The API key for the Bing Search API.
+            bing_safe_search (bool): The flag to enable or disable safe search for the Bing Search API.
         """
-        self.name = "ChatBingSearchCompare"
+        self.name = "CompareWorkWithWeb"
         self.model_name = model_name
         self.chatgpt_deployment = chatgpt_deployment
         self.query_term_language = query_term_language
@@ -62,7 +65,7 @@ async def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]
             Any: The result of the comparative analysis.
         """
         # Step 1: Call bing Search Approach for a Bing LLM Response and Citations
-        chat_bing_search = ChatBingSearch(self.model_name, self.chatgpt_deployment, self.query_term_language, self.bing_search_endpoint, self.bing_search_key, self.bing_safe_search)
+        chat_bing_search = ChatWebRetrieveRead(self.model_name, self.chatgpt_deployment, self.query_term_language, self.bing_search_endpoint, self.bing_search_key, self.bing_safe_search)
         bing_search_response = await chat_bing_search.run(history, overrides)
         self.citations = bing_search_response.get("citation_lookup")
 
@@ -73,7 +76,7 @@ async def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]
         response_length = int(overrides.get("response_length") or 1024)
 
         # Step 2: Contruct the comparative system message with passed Rag response and Bing Search Response from above approach
-        bing_compare_query = user_query + "Internal Documents:\n" + rag_answer + "\n\n" + " Bing Search Response:\n" + bing_search_response.get("answer") + "\n\n"
+        bing_compare_query = user_query + "Work internal documents:\n" + rag_answer + "\n\n" + " Web search results:\n" + bing_search_response.get("answer") + "\n\n"
 
         messages = self.get_messages_builder(
             self.COMPARATIVE_SYSTEM_MESSAGE_CHAT_CONVERSATION.format(
@@ -93,9 +96,9 @@ async def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]
         msg_to_display = '\n\n'.join([str(message) for message in messages])
 
         # Step 3: Final comparative analysis using OpenAI Chat Completion
-        bing_compare_resp = await self.make_chat_completion(messages)
+        compare_resp = await self.make_chat_completion(messages)
 
-        final_response = f"{urllib.parse.unquote(bing_compare_resp)}"
+        final_response = f"{urllib.parse.unquote(compare_resp)}"
 
         # Step 4: Append web citations from the Bing Search approach
         for idx, url in enumerate(self.citations.keys(), start=1):

diff --git a/app/backend/approaches/gpt_direct_approach.py b/app/backend/approaches/gpt_direct_approach.py
@@ -34,21 +34,15 @@ class GPTDirectApproach(Approach):
     USER = "user"
     ASSISTANT = "assistant"
 
-    system_message_chat_conversation = """You are an Azure OpenAI Completion system. Your persona is {systemPersona} who helps answer questions about an agency's data. {response_length_prompt}
-        User persona is {userPersona} 
-        Your goal is to provide accurate and relevant answers based on the facts. Make sure to avoid making assumptions or adding personal opinions.
-        
-        Emphasize the use of facts.
-        
-        Here is how you should answer every question:
-        -Please respond with relevant information from the data in the response.    
+    system_message_chat_conversation = """You are an Azure OpenAI Completion system. Your persona is {systemPersona} who helps users interact with a Large Language Model. {response_length_prompt}
+        User persona is {userPersona}. You are having a conversation with a user and you need to provide a response.    
         
         {follow_up_questions_prompt}
         {injected_prompt}
         
         """
     follow_up_questions_prompt_content = """
-        Generate three very brief follow-up questions that the user would likely ask next about their agencies data. Use triple angle brackets to reference the questions, e.g. <<<Are there exclusions for prescriptions?>>>. Try not to repeat questions that have already been asked.
+        Generate three very brief follow-up questions that the user would likely ask next about their previous chat context. Use triple angle brackets to reference the questions, e.g. <<<Are there exclusions for prescriptions?>>>. Try not to repeat questions that have already been asked.
         Only generate questions and do not generate any text before or after the questions, such as 'Next Questions'
         """
 
@@ -105,33 +99,7 @@ async def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]
         system_persona = overrides.get("system_persona", "")
         response_length = int(overrides.get("response_length") or 1024)
 
-        user_q = 'Generate search query for: ' + history[-1]["user"]
-
-        query_prompt=self.query_prompt_template.format(query_term_language=self.query_term_language)
-
-        # STEP 1: Generate an optimized keyword search query based on the chat history and the last question
-        messages = self.get_messages_from_history(
-            query_prompt,
-            self.model_name,
-            history,
-            user_q,
-            self.query_prompt_few_shots,
-            self.chatgpt_token_limit - len(user_q)
-            )
-
-        chat_completion = openai.ChatCompletion.create(
-            deployment_id=self.chatgpt_deployment,
-            model=self.model_name,
-            messages=messages,
-            temperature=0.0,
-            max_tokens=32,
-            n=1)
-
-        generated_query = chat_completion.choices[0].message.content
-        #if we fail to generate a query, return the last user question
-        if generated_query.strip() == "0":
-            generated_query = history[-1]["user"]
-
+        user_q = 'Generate response for: ' + history[-1]["user"]
 
         #Generate the follow up prompt to be sent to the GPT model
         follow_up_questions_prompt = (
@@ -140,39 +108,16 @@ async def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]
             else ""
         )
 
-        # Allow client to replace the entire prompt, or to inject into the existing prompt using >>>
-        prompt_override = overrides.get("prompt_template")
-
-        if prompt_override is None:
-            system_message = self.system_message_chat_conversation.format(
-                injected_prompt="",
-                follow_up_questions_prompt=follow_up_questions_prompt,
-                response_length_prompt=self.get_response_length_prompt_text(
-                    response_length
-                ),
-                userPersona=user_persona,
-                systemPersona=system_persona,
-            )
-        elif prompt_override.startswith(">>>"):
-            system_message = self.system_message_chat_conversation.format(
-                injected_prompt=prompt_override[3:] + "\n ",
-                follow_up_questions_prompt=follow_up_questions_prompt,
-                response_length_prompt=self.get_response_length_prompt_text(
-                    response_length
-                ),
-                userPersona=user_persona,
-                systemPersona=system_persona,
-            )
-        else:
-            system_message = self.system_message_chat_conversation.format(
-                follow_up_questions_prompt=follow_up_questions_prompt,
-                response_length_prompt=self.get_response_length_prompt_text(
-                    response_length
-                ),
-                userPersona=user_persona,
-                systemPersona=system_persona,
-            )
-
+        system_message = self.system_message_chat_conversation.format(
+            injected_prompt="",
+            follow_up_questions_prompt=follow_up_questions_prompt,
+            response_length_prompt=self.get_response_length_prompt_text(
+                response_length
+            ),
+            userPersona=user_persona,
+            systemPersona=system_persona,
+        )
+
         #Generate a contextual and content-specific answer using the search results and chat history.
         #Added conditional block to use different system messages for different models.
         messages = self.get_messages_from_history(
@@ -198,7 +143,7 @@ async def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]
         return {
             "data_points": [],
             "answer": f"{urllib.parse.unquote(chat_completion.choices[0].message.content)}",
-            "thoughts": f"Searched for:<br>{generated_query}<br><br>Conversations:<br>" + msg_to_display.replace('\n', '<br>'),
+            "thoughts": f"Searched for:<br>{user_q}<br><br>Conversations:<br>" + msg_to_display.replace('\n', '<br>'),
             "citation_lookup": {}
         }
 
diff --git a/app/frontend/src/api/models.ts b/app/frontend/src/api/models.ts
@@ -1,16 +1,20 @@
 // Copyright (c) Microsoft Corporation.
 // Licensed under the MIT license.
 
-import { string } from "prop-types";
+export const enum ChatMode {
+    WorkOnly = 0,
+    WorkPlusWeb = 1,
+    Ungrounded = 2
+}
 
 export const enum Approaches {
     RetrieveThenRead = 0,
     ReadRetrieveRead = 1,
     ReadDecomposeAsk = 2,
     GPTDirect = 3,
-    BingSearch = 4,
-    BingSearchCompare = 5,
-    BingRRRCompare = 6
+    ChatWebRetrieveRead = 4,
+    CompareWorkWithWeb = 5,
+    CompareWebWithWork = 6
 }
 
 export type AskRequestOverrides = {
@@ -43,8 +47,7 @@ export type AskResponse = {
     answer: string;
     thoughts: string | null;
     data_points: string[];
-    source: string;
-    comparative: boolean;
+    approach: Approaches;
     // citation_lookup: {}
     // added this for citation bug. aparmar.
     citation_lookup: { [key: string]: { citation: string; source_path: string; page_number: string } };