In [1]:
# # old logging
# from applicationinsights import TelemetryClient  
# from applicationinsights.logging import enable
# import os
# from dotenv import load_dotenv
# load_dotenv("credentials.env")

# instrumentation_key = os.environ.get("APPLICATION_INSIGHTS_INSTRUMENTATION_KEY"  )
# client = TelemetryClient(instrumentation_key)  
# enable(instrumentation_key)  

# event_properties = {  
#     'property1': 'value1',  
#     'property2': 'value2'  
# }  
# client.track_event('LanguageModelResult', event_properties)  
# client.flush()  


In [1]:
import os
import random
from langchain.chat_models import AzureChatOpenAI
from langchain.memory import ConversationBufferWindowMemory
from langchain.agents import ConversationalChatAgent, AgentExecutor, Tool
from langchain.memory import CosmosDBChatMessageHistory
from langchain.callbacks.manager import CallbackManager

from azure.monitor.events.extension import track_event
from azure.monitor.opentelemetry import configure_azure_monitor

#custom libraries that we will use later in the app
from common.utils_povel import DocSearchTool, CSVTabularTool, SQLDbTool, ChatGPTTool, BingSearchTool, run_agent, get_search_results
from common.callbacks import StdOutCallbackHandler
from common.prompts_povel import CUSTOM_CHATBOT_PREFIX, CUSTOM_CHATBOT_SUFFIX 

from dotenv import load_dotenv
load_dotenv()

from IPython.display import Markdown, HTML, display 

def printmd(string):
    display(Markdown(string))

MODEL_DEPLOYMENT_NAME = "gpt-4-32k" # Reminder: gpt-35-turbo models will create parsing errors and won't follow instructions correctly 
# MODEL_DEPLOYMENT_NAME = "gpt-35-turbo-16k" # Reminder: gpt-35-turbo models will create parsing errors and won't follow instructions correctly

In [2]:
configure_azure_monitor(disable_metric=True, connection_string=os.environ.get("APPLICATION_INSIGHTS_CONNECTION_STRING"))

In [3]:
os.environ["OPENAI_API_BASE"] = os.environ["AZURE_OPENAI_ENDPOINT"]
os.environ["OPENAI_API_KEY"] = os.environ["AZURE_OPENAI_API_KEY"]
os.environ["OPENAI_API_VERSION"] = os.environ["AZURE_OPENAI_API_VERSION"]
os.environ["OPENAI_API_TYPE"] = "azure"

In [4]:
cb_handler = StdOutCallbackHandler()
cb_manager = CallbackManager(handlers=[cb_handler])

llm = AzureChatOpenAI(deployment_name=MODEL_DEPLOYMENT_NAME, temperature=0.2, max_tokens=800)

# Uncomment the below line if you want to see the responses being streamed/typed
# llm = AzureChatOpenAI(deployment_name=MODEL_DEPLOYMENT_NAME, temperature=0.5, max_tokens=500, streaming=True, callback_manager=cb_manager)

In [5]:
vector_only_indexes = ["digge-ekonomi-index-files-on-your-data"] # funkar bättre på fråga : "@docsearch, Vilket konto ska jag fakturera egenavgift för hjälpmedel?"
# vector_only_indexes = ["1b-digge-100-full", "1b-digge-ekonomi-full"]
# vector_only_indexes = ["1b-digge-100-full", "digge-ekonomi-index-files-on-your-data"]

doc_search = DocSearchTool(
    llm=llm,
    vector_only_indexes=vector_only_indexes,
    k=10,
    similarity_k=20,
    reranker_th=1,
    sas_token=os.environ["BLOB_SAS_TOKEN"],
    callback_manager=cb_manager,
    return_direct=True,
    # This is how you can edit the default values of name and description
    name="@docsearch",
    description="useful when the questions includes the term: @docsearch.\n",
)

# SQLDbTool is a custom Tool class created to Q&A over a MS SQL Database
sql_search = SQLDbTool(llm=llm, k=30, callback_manager=cb_manager, return_direct=True)

# ChatGPTTool is a custom Tool class created to talk to ChatGPT knowledge
chatgpt_search = ChatGPTTool(llm=llm, callback_manager=cb_manager, return_direct=True)

# BingSearchTool is a langchain Tool class to use the Bing Search API (https://www.microsoft.com/en-us/bing/apis/bing-web-search-api)
www_search = BingSearchTool(llm=llm, k=5, callback_manager=cb_manager, return_direct=True)

In [6]:
tools = [www_search, sql_search, doc_search, chatgpt_search]

In [7]:
cosmos = CosmosDBChatMessageHistory(
    cosmos_endpoint=os.environ['AZURE_COSMOSDB_ENDPOINT'],
    cosmos_database=os.environ['AZURE_COSMOSDB_NAME'],
    cosmos_container=os.environ['AZURE_COSMOSDB_CONTAINER_NAME'],
    connection_string=os.environ['AZURE_COMOSDB_CONNECTION_STRING'],
    session_id="Agent-Test-Session" + str(random.randint(1, 1000)),
    user_id="Agent-Test-User" + str(random.randint(1, 1000))
    )
# prepare the cosmosdb instance
cosmos.prepare_cosmos()

In [8]:
llm_a = AzureChatOpenAI(deployment_name=MODEL_DEPLOYMENT_NAME, temperature=0.2, max_tokens=500)
agent = ConversationalChatAgent.from_llm_and_tools(llm=llm_a, tools=tools, system_message=CUSTOM_CHATBOT_PREFIX, human_message=CUSTOM_CHATBOT_SUFFIX)
memory = ConversationBufferWindowMemory(memory_key="chat_history", return_messages=True, k=10, chat_memory=cosmos)
agent_chain = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, memory=memory, handle_parsing_errors=True)

# # Let's see the custom prompt prefix we created for our brain agent
# printmd(agent_chain.agent.llm_chain.prompt.messages[0].prompt.template)

# # Also let's see the Prompt that the Agent uses to talk to the LLM
# printmd(agent_chain.agent.llm_chain.prompt.messages[2].prompt.template)

In [65]:
QUESTION = "@docsearch, Vilket konto ska jag fakturera egenavgift för hjälpmedel?"
response = run_agent(QUESTION, agent_chain)
printmd(response)

track_event("LanguageModelResult",
            {
                "Prompt": QUESTION, #User input text
                "Completion": response, # AI-generated text
            })


Tool: @docsearch


Du bör fakturera egenavgift för hjälpmedel på konto 3083<sup><a href="https://blobstoragejd5ypzfx2l6vi.blob.core.windows.net/digge-ekonomi-dokument/Artikelf%C3%B6rteckning%20f%C3%B6r%20Agresso%20weborder.pdf" target="_blank">[1]</a></sup>.

In [30]:
QUESTION = "@docsearch, Hur fungerar ingående moms?"
response = run_agent(QUESTION, agent_chain)
printmd(response)

track_event("LanguageModelResult",
            {
                "Prompt": QUESTION, #User input text
                "Completion": response, # AI-generated text
            })


Tool: @docsearch


Ingående moms fungerar genom att företag kan dra av momsen på inköp av varor och tjänster som används i verksamheten. Om kostnaden för förtäring per person överstiger 300 kr exklusive moms, kan hela den ingående momsen dras av. Om kostnaden överstiger 300 kr exklusive moms per person och förtäringstillfälle, begränsas avdraget till den ingående momsen som beräknas på beloppet 300 kr exklusive moms. För andra kostnader, som entréavgifter till evenemang, kan avdrag göras med momsen på ett underlag om 180 kronor exklusive moms. Vid inköp av varor från EU-land kan försäljning utan moms göras om köparen är momsregistrerad i ett annat EU-land och köparens VAT-nummer anges på fakturan. Vid försäljning utanför EU räknas det som export och ingen moms tillkommer. För mer information och detaljerade regler kan man besöka Skatteverkets hemsida<sup><a href="https://www.skatteverket.se/" target="_blank">[1]</a></sup><sup><a href="https://ec.europa.eu/taxation_customs/vies/#/vat-validation" target="_blank">[2]</a></sup>.

References:
[1] <a href="https://blobstoragejd5ypzfx2l6vi.blob.core.windows.net/digge-ekonomi-dokument/Momsguiden.pdf" target="_blank">[1]</a>
[2] <a href="https://blobstoragejd5ypzfx2l6vi.blob.core.windows.net/digge-ekonomi-dokument/Kontoplan.pdf" target="_blank">[2]</a>

Bad pipe message: %s [b"n'\x8bR\xaf\xad\xbd\xc1u\xebuO\x0b4\xeb\xb9\xdf\xa5\x00\x00|\xc0,\xc00\x00\xa3\x00\x9f\xcc\xa9\xcc\xa8\xcc\xaa\xc0\xaf\xc0\xad\xc0\xa3\xc0\x9f\xc0]\xc0a\xc0W\xc0S\xc0+\xc0/\x00\xa2\x00\x9e\xc0\xae\xc0\xac\xc0\xa2\xc0\x9e\xc0\\\xc0`\xc0V\xc0", b"$\xc0(\x00k\x00j\xc0#\xc0'\x00g\x00@\xc0\n\xc0\x14\x009\x008\xc0\t\xc0\x13\x003\x002\x00\x9d\xc0\xa1\xc0\x9d\xc0Q\x00\x9c\xc0\xa0\xc0\x9c\xc0P\x00=\x00<\x005\x00/\x00\x9a\x00\x99\xc0\x07\xc0\x11\x00\x96\x00\x05\x00\xff\x01\x00\x00j\x00\x00\x00\x0e\x00\x0c\x00\x00"]
Bad pipe message: %s [b'\xaa\xb9D>\x92e\rKn\x1ds\xc5\x05\xf0i\xe7\x13\xf9\x00\x00\xa2\xc0\x14\xc0\n\x009\x008\x007\x006\x00\x88\x00\x87\x00\x86\x00\x85\xc0\x19\x00:\x00\x89\xc0\x0f\xc0\x05\x005\x00\x84\xc0\x13\xc0\t\x003\x002\x001\x000\x00\x9a\x00\x99\x00\x98\x00\x97\x00E\x00D\x00C\x00B\xc0\x18\x004\x00\x9b\x00F\xc0\x0e\xc0\x04\x00/\x00\x96\x00A\x00\x07\xc0\x11\xc0\x07\xc0\x16\x00\x18\xc0\x0c\xc0\x02\x00\x05']
Bad pipe message: %s [b"\xc8k\x0e\xeb\x1d<\xbb.H\xe

In [9]:
# from azure.monitor.events.extension import track_event
# from azure.monitor.opentelemetry import configure_azure_monitor

# configure_azure_monitor(disable_metric=True, connection_string=os.environ.get("APPLICATION_INSIGHTS_CONNECTION_STRING"))

# QUESTION = "@sqlsearch, How many people died of covid in Texas in 2020?"
QUESTION = "@sqlsearch, How may patients were hospitalized during July 2020 in Texas, and nationwide as the total of all states? Use the hospitalizedIncrease column?"

response=run_agent(QUESTION, agent_chain)
printmd(response)

track_event("LanguageModelResult",
            {
                "Prompt": QUESTION, #User input text
                "Completion": response, # AI-generated text
            })

Tool: @sqlsearch
Action: sql_db_list_tables
Action Input: ""
The database contains a table named "covidtracking". This table might contain the data needed to answer the question. I should check the schema of this table to confirm.
Action: sql_db_schema
Action Input: "covidtracking"
The "covidtracking" table has the columns "state", "date", and "hospitalizedIncrease" which are relevant to the question. I can use these columns to construct a query that will return the total number of people hospitalized in Texas and all states in July 2020.
Action: sql_db_query_checker
Action Input: "SELECT state, SUM(hospitalizedIncrease) as TotalHospitalized FROM covidtracking WHERE date BETWEEN '2020-07-01' AND '2020-07-31' GROUP BY state HAVING state='TX' OR state='ALL'"
The SQL query syntax is correct. I can now run this query to get the total number of people hospitalized in Texas and all states in July 2020.
Action: sql_db_query
Action Input: "SELECT state, SUM(hospitalizedIncrease) as TotalHospit

In July 2020, there were no new hospitalizations recorded in Texas.

Explanation:
I queried the `covidtracking` table for the sum of the `hospitalizedIncrease` column where the state is 'TX' and the date is between '2020-07-01' and '2020-07-31'. The query returned a list of tuples with the state and the total number of new hospitalizations for that state in July 2020. To answer the question, I looked at the tuple for Texas, which shows that there were no new hospitalizations recorded in Texas in July 2020. 
I used the following query

```sql
SELECT state, SUM(hospitalizedIncrease) as TotalHospitalized 
FROM covidtracking 
WHERE date BETWEEN '2020-07-01' AND '2020-07-31' 
GROUP BY state 
HAVING state='TX' OR state='ALL'
```

In [None]:
QUESTION = "@bing, I need to take my girlfriend to dinner tonight in downtown Chicago. Please give me options for Italian and Sushi as well"
response=run_agent(QUESTION, agent_chain)
printmd(response)

track_event("LanguageModelResult",
            {
                "Prompt": QUESTION, #User input text
                "Completion": response, # AI-generated text
            })

In [None]:
QUESTION = """
compare the number of job opennings (provide the exact number), the average salary within 15 miles of Dallas, TX, for these ocupations:

- ADN Registerd Nurse 
- Occupational therapist assistant
- Dental Hygienist
- Graphic Designer
- Real Estate Agent

Create a table with your findings. Place the sources on each cell.
"""
response=run_agent(QUESTION, agent_chain)
printmd(response)

track_event("LanguageModelResult",
            {
                "Prompt": QUESTION, #User input text
                "Completion": response, # AI-generated text
            })

In [None]:
QUESTION = "@chatgpt, tell me the formula in physics for momentum"
response=run_agent(QUESTION, agent_chain)
printmd(response)

track_event("LanguageModelResult",
            {
                "Prompt": QUESTION, #User input text
                "Completion": response, # AI-generated text
            })