In [None]:
%pip install --upgrade numpy openai langchain azure-storage-blob azure-identity unstructured
%pip install --index-url=https://pkgs.dev.azure.com/azure-sdk/public/_packaging/azure-sdk-for-python/pypi/simple/ azure-search-documents==11.4.0a20230509004

In [35]:
# Mac OS: Requires libmagic installation via homebrew
# %brew install libmagic

In [1]:
generate_embeddings = False

In [2]:
# Reading blobs from Azure Storage Account

import os

from langchain.document_loaders import AzureBlobStorageContainerLoader


if generate_embeddings == True:
    loader = AzureBlobStorageContainerLoader(
        conn_str=os.getenv("AZURE_STORAGE_ACCOUNT_CONNECTION_STRING"),
        container=os.getenv("AZURE_STORAGE_CONTAINER_NAME"),
    )

    # Load and split blobs into chunks
    pages = loader.load_and_split()

In [3]:
# Insert text and embeddings into Azure Cognitive Search

from openai.embeddings_utils import get_embedding
from langchain.vectorstores.azuresearch import AzureSearch


def embeddings(text):
    return get_embedding(text, engine="embedding-ada")


# Make sure to create index first:
# https://github.com/Azure/cognitive-search-vector-pr/blob/main/docs/rest-api-reference/create-or-update-index.md
vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=os.getenv("AZURE_SEARCH_ENDPOINT"),
    azure_search_key=os.getenv("AZURE_SEARCH_API_KEY"),
    index_name=os.getenv("AZURE_SEARCH_INDEX_NAME"),
    embedding_function=embeddings,
)

# Upload documents
if generate_embeddings == True:
    vector_store.add_documents(documents=pages)

In [15]:
# Perform a similarity search

from langchain.tools import tool


@tool
def search_wiki(query):
    """Search for CWC Wiki data."""
    docs = vector_store.similarity_search(
        query=query,
        k=10,
        search_type="similarity",
    )

    return docs[0].page_content

In [31]:
# Set up an agent

from langchain.agents import AgentExecutor, AgentType, load_tools
from langchain.agents.loading import AGENT_TO_CLASS
from langchain.chat_models import AzureChatOpenAI
from langchain.llms import AzureOpenAI
from langchain.memory import ConversationBufferMemory


# Define system message
system_message = """You are a friendly AI assistant called Echo. You help answer questions from users about CWC wikipedia.

You have access to the following tool:

1. search_wiki: useful to search for information about CWC wikipedia. Remember that the search results are in markdown format so you need to parse it.
2. llm-math: useful to mathematics calculations.

Please always try to use the tools to find answers, do not use any information that isn't coming from the tools.

Before using any tool, remember to check the previous messages to see if you can find the answer.

"""

# Define format instructions
# Workaround #1 for the `Could not parse LLM output` issue:
# https://github.com/hwchase17/langchain/issues/1358#issuecomment-1569741405
format_instructions = """To use a tool, please use the following format:

```
Thought: Do I need to use a tool? Yes
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
```

When you have a response to say to the Human, or if you do not need to use a tool, you MUST use the following format(the prefix of "Thought: " and "{ai_prefix}: " are must be included):

```
Thought: Do I need to use a tool? No
{ai_prefix}: [your response here]
```"""

llm = AzureOpenAI(temperature=0, deployment_name="testdavinci003")
tools = load_tools(["llm-math"], llm=llm) + [search_wiki]
chat = AzureChatOpenAI(temperature=0.2, deployment_name="gpt-35-turbo")

agent_cls = AGENT_TO_CLASS[AgentType.CONVERSATIONAL_REACT_DESCRIPTION]

agent_obj = agent_cls.from_llm_and_tools(
    llm=chat,
    tools=tools,
    prefix=system_message,
    format_instructions=format_instructions,
)

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
agent_executor = AgentExecutor.from_agent_and_tools(
    agent=agent_obj,
    tools=tools,
    memory=memory,
    verbose=False,
)

In [32]:
# Testing

print(agent_executor("What does TPM stand for?")["output"])
print(agent_executor("What does TPM do?")["output"])

According to the CWC Wiki, TPM stands for Technical Program Manager. It is a role within Microsoft's Commercial Software Engineering organization that focuses on leading the coding with customers motion.
According to the CWC Wiki, TPMs perform many distinct functions throughout the CSE Execution Lifecycle. They partner with their peers in Development, PMO, and other teams within CSE and Microsoft to effectively perform their role. Additionally, TPMs are ever-learning and are expected to understand the customer, research about industry and market trends, develop technical abilities, and demonstrate thought leadership at various levels.
