In [None]:
#%pip install wikipedia==1.4.0 google-search-results==2.4.2 better-profanity==0.7.0 sqlalchemy==2.0.15 huggingface_hub transformers datasets langchain openai openpyxl

## Generate API tokens
For many of the services that we'll using in the notebook, we'll need some API keys. Follow the instructions below to generate your own. 

### Hugging Face Hub
1. Go to this [Inference API page](https://huggingface.co/inference-api) and click "Sign Up" on the top right.

<img src="https://files.training.databricks.com/images/llm/hf_sign_up.png" width=700>

2. Once you have signed up and confirmed your email address, click on your user icon on the top right and click the `Settings` button. 

3. Navigate to the `Access Token` tab and copy your token. 

<img src="https://files.training.databricks.com/images/llm/hf_token_page.png" width=500>

### SerpApi

1. Go to this [page](https://serpapi.com/search-api) and click "Register" on the top right. 
<img src="https://files.training.databricks.com/images/llm/serp_register.png" width=800>

2. After registration, navigate to your dashboard and `API Key` tab. Copy your API key. 
<img src="https://files.training.databricks.com/images/llm/serp_api.png" width=800>

In [None]:
# TODO
# Copy paste your tokens below

import os
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "<enter-your-huggingface-token>"
os.environ["SERPAPI_API_KEY"] = "<enter-your-google-serpapi-token>"

#### OPTIONAL: Use OpenAI's language model

If you'd rather use OpenAI, you need to generate an OpenAI key. 

Steps:
1. You need to [create an account](https://platform.openai.com/signup) on OpenAI. 
2. Generate an OpenAI [API key here](https://platform.openai.com/account/api-keys). 

Note: OpenAI does not have a free option, but it gives you $5 as credit. Once you have exhausted your $5 credit, you will need to add your payment method. You will be [charged per token usage](https://openai.com/pricing). 

**IMPORTANT**: It's crucial that you keep your OpenAI API key to yourself. If others have access to your OpenAI key, they will be able to charge their usage to your account!

In [None]:
# TODO

#OPENAI_API_KEY = "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"

## `Command Agent` - Our first vector database data science AI agent!

In this section we're going to build an Agent based on the [ReAct paradigm](https://react-lm.github.io/) (or though-action-observation loop) that will take instructions in plain text and perform data science analysis on data that we've stored in a vector database. The agent type we'll use is using zero-shot learning, which takes in the prompt and leverages the underlying LLMs' zero-shot abilities.

In [None]:
# For DaScie we need to load in some tools for it to use, as well as an LLM for the brain/reasoning
from langchain.agents import load_tools  # This will allow us to load tools we need
from langchain.agents import initialize_agent
from langchain.agents import (
    AgentType,
)  # We will be using the type: ZERO_SHOT_REACT_DESCRIPTION which is standard

from langchain.llms import OpenAI
from langchain.llms import AzureOpenAI
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory

llm = AzureOpenAI(
    openai_api_base="<enter-your-azure-openai-api-endpoint>",
    openai_api_version="2023-05-15",
    deployment_name="<enter-your-deployment-name", # This is the name of the gpt-35-turbo deployment you created in Azure (version 0331)
    openai_api_key="<enter-your-azure-openai-api-key>",
    openai_api_type="azure",
    temperature=0
)

tools = load_tools(["serpapi", "python_repl", "terminal"], llm=llm)
# We now create agent executor using the "initialize_agent" command.
command_agent = initialize_agent(
    tools, llm, verbose=True, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, early_stopping_method="generate"
)

In [None]:
command_agent.run("Generate 40 records of a call center FAQ for an ecommerce clothing store. Include products in these categories: designer, shoes, watches, jewelry, hand bags, clothes etc.  Store the customer utterance in the Question column and the store agent in the Answer column. Save the data in a spreadsheet in the local directory. DO NO REPEAT the questions!  Do not include the agent's name!  DO NOT GIVE a blank answer! DO NOT PROVIDE THE SAME ANSWER FOR DIFFERENT QUESTIONS! DO NOT REPEAT A QUESTION!! DO NOT REPEAT SAME TOPIC IN QUESTIONS!  ANSWER MUST ALWAYS BE RELEVANT TO THE QUESTION! LIMIT ANSWERS TO 50 WORDS! LIMIT EXAMPLES IN RESPONSES TO 3 OR LESS!")

### <i>Courtesy of Edx.org LLM & Databricks course</i>