In [94]:
import requests 
import io
import pandas as pd

In [95]:
import json
import os

with open('/Users/marie/Documents/github/config.json', 'r') as f:
    config = json.load(f)

    for key in config:
        os.environ[key] = config[key]

In [96]:
import langchain
langchain.__version__

'0.0.350'

In [97]:
langchain.debug = False

In [98]:
# ! pip install langchain --upgrade

In [99]:
# ! pip install pydantic==1.10.12 # langchain works with Pydantic v1 internally

### Defining tools

In [100]:
CH_HOST = 'http://localhost:8123'
pd.set_option('display.max_colwidth', 1000)

def get_clickhouse_data(query, host = CH_HOST, connection_timeout = 1500):
    r = requests.post(host, params = {'query': query}, 
      timeout = connection_timeout)

    return r.text

    # we don't need to raise exceptions in this case, we just need to return error message to LLM

    # if r.status_code == 200:
    #     return r.text
    # else:
    #     raise ValueError(r.text)

def get_clickhouse_df(query, host = CH_HOST, connection_timeout = 1500):
    data = get_clickhouse_data(query, host, connection_timeout) 
    df = pd.read_csv(io.StringIO(data), sep = '\t')
    return df

In [101]:
from langchain.chat_models import ChatOpenAI
from langchain.agents import tool
from pydantic import BaseModel, Field
from typing import Optional

In [102]:
class SQLQuery(BaseModel):
    query: str = Field(description="SQL query to execute")

@tool(args_schema = SQLQuery)
def execute_sql(query: str) -> str:
    """Returns the result of SQL query execution"""
    return get_clickhouse_data(query)

In [103]:
class SQLTable(BaseModel):
    database: str = Field(description="Database name")
    table: str = Field(description="Table name")

@tool(args_schema = SQLTable)
def get_table_columns(database: str, table: str) -> str:
    """Returns list of table column names and types in JSON"""
    
    q = '''
    select name, type
    from system.columns 
    where database = '{database}'
        and table = '{table}'
    format TabSeparatedWithNames
    '''.format(database = database, table = table)
    
    return str(get_clickhouse_df(q).to_dict('records'))

In [104]:
get_table_columns({'database': 'ecommerce', 'table': 'sessions'})

"[{'name': 'user_id', 'type': 'UInt64'}, {'name': 'session_id', 'type': 'UInt64'}, {'name': 'action_date', 'type': 'Date'}, {'name': 'session_duration', 'type': 'UInt64'}, {'name': 'os', 'type': 'String'}, {'name': 'browser', 'type': 'String'}, {'name': 'is_fraud', 'type': 'UInt8'}, {'name': 'revenue', 'type': 'Float32'}]"

In [105]:
class SQLTableColumn(BaseModel):
    database: str = Field(description="Database name")
    table: str = Field(description="Table name")
    column: str = Field(description="Column name")
    n: Optional[int] = Field(description="Number of rows, default limit 10")

@tool(args_schema = SQLTableColumn)
def get_table_column_distr(database: str, table: str, column: str, n:int = 10) -> str:
    """Returns top n values for the column in JSON"""

    q = '''
    select {column}, count(1) as count
    from {database}.{table} 
    group by 1
    order by 2 desc 
    limit {n}
    format TabSeparatedWithNames
    '''.format(database = database, table = table, column = column, n = n)
    
    return str(list(get_clickhouse_df(q)[column].values))

In [106]:
get_table_column_distr({'database': 'ecommerce', 'table': 'sessions', 'column': 'os'})

"['Windows', 'Android', 'iOS']"

In [107]:
from langchain.tools.render import format_tool_to_openai_function
sql_functions = list(map(format_tool_to_openai_function, [execute_sql, get_table_columns, get_table_column_distr]))

In [108]:
sql_tools = {
    'execute_sql': execute_sql,
    'get_table_columns': get_table_columns,
    'get_table_column_distr': get_table_column_distr
}

### OpenAI Functions Agent

In [109]:
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.agents.format_scratchpad import format_to_openai_function_messages
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser

In [61]:
llm = ChatOpenAI(temperature=0.1, model = 'gpt-4-1106-preview')\
  .bind(functions = sql_functions)

In [62]:
system_message = '''
You are working as a product analyst for the e-commerce company. 
Your work is very important, since your product team makes decisions based on the data you provide. So, you are extremely accurate with the numbers you provided. 
If you're not sure about the details of the request, you don't provide the answer and ask follow-up questions to have a clear understanding.
You are very helpful and try your best to answer the questions.

All the data is stored in SQL Database. Here is the list of tables (in the format <database>.<table>) with descriptions:
- ecommerce.users - information about the customers, one row - one customer
- ecommerce.sessions - information about the sessions customers made on our web site, one row - one session
'''

analyst_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_message),
        ("user", "{question}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

In [63]:
analyst_agent = (
    {
        "question": lambda x: x["question"],
        "agent_scratchpad": lambda x: format_to_openai_function_messages(x["intermediate_steps"]),
    }
    | analyst_prompt
    | llm
    | OpenAIFunctionsAgentOutputParser()
)

In [50]:
analyst_agent.invoke({"question": "How many active customers from the United Kingdom do we have?", "intermediate_steps": []})

AgentActionMessageLog(tool='execute_sql', tool_input={'query': "SELECT COUNT(DISTINCT user_id) AS active_customers_uk FROM ecommerce.sessions WHERE country = 'United Kingdom' AND active = TRUE"}, log='\nInvoking: `execute_sql` with `{\'query\': "SELECT COUNT(DISTINCT user_id) AS active_customers_uk FROM ecommerce.sessions WHERE country = \'United Kingdom\' AND active = TRUE"}`\n\n\n', message_log=[AIMessage(content='', additional_kwargs={'function_call': {'arguments': '{"query":"SELECT COUNT(DISTINCT user_id) AS active_customers_uk FROM ecommerce.sessions WHERE country = \'United Kingdom\' AND active = TRUE"}', 'name': 'execute_sql'}})])

In [67]:
from langchain_core.agents import AgentFinish

question = "How many active customers from the United Kingdom do we have?"
intermediate_steps = []
num_iters = 0

while True:
    if num_iters >= 10:  
        break

    output = analyst_agent.invoke(
        {
            "question": question,
            "intermediate_steps": intermediate_steps,
        }
    )
    num_iters += 1

    if isinstance(output, AgentFinish):
        model_output = output.return_values["output"]
        break
    else:
        print(f'Executing tool: {output.tool}, arguments: {output.tool_input}')
        observation = sql_tools[output.tool](output.tool_input)
        print(f'Observation: {observation}')
        print()
        intermediate_steps.append((output, observation))
        
print('Model output:', model_output)

[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence] Entering Chain run with input:
[0m{
  "question": "How many active customers from the United Kingdom do we have?",
  "intermediate_steps": []
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence > 2:chain:RunnableParallel] Entering Chain run with input:
[0m{
  "question": "How many active customers from the United Kingdom do we have?",
  "intermediate_steps": []
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence > 2:chain:RunnableParallel > 3:chain:<lambda>] Entering Chain run with input:
[0m{
  "question": "How many active customers from the United Kingdom do we have?",
  "intermediate_steps": []
}
[36;1m[1;3m[chain/end][0m [1m[1:chain:RunnableSequence > 2:chain:RunnableParallel > 3:chain:<lambda>] [0ms] Exiting Chain run with output:
[0m{
  "output": "How many active customers from the United Kingdom do we have?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence > 2:chain:Runnable

In [69]:
from langchain.agents import AgentExecutor

analyst_agent_executor = AgentExecutor(
    agent=analyst_agent, 
    tools=[execute_sql, get_table_columns, get_table_column_distr], 
    verbose=True,
    max_iterations=10, # early stopping criteria
    early_stopping_method='generate', 
    # to ask model to generate the final answer after stopping
)

analyst_agent_executor.invoke(
  {"question": "How many active customers from the United Kingdom do we have?"}
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `execute_sql` with `{'query': "SELECT COUNT(*) AS active_customers_uk FROM ecommerce.users WHERE country = 'United Kingdom' AND active = TRUE"}`


[0m[36;1m[1;3mCode: 47. DB::Exception: Missing columns: 'active' while processing query: 'SELECT count() AS active_customers_uk FROM ecommerce.users WHERE (country = 'United Kingdom') AND (active = true)', required columns: 'country' 'active', maybe you meant: 'country'. (UNKNOWN_IDENTIFIER) (version 23.12.1.414 (official build))
[0m[32;1m[1;3m
Invoking: `get_table_columns` with `{'database': 'ecommerce', 'table': 'users'}`


[0m[33;1m[1;3m[{'name': 'user_id', 'type': 'UInt64'}, {'name': 'country', 'type': 'String'}, {'name': 'is_active', 'type': 'UInt8'}, {'name': 'age', 'type': 'UInt64'}][0m[32;1m[1;3m
Invoking: `execute_sql` with `{'query': "SELECT COUNT(*) AS active_customers_uk FROM ecommerce.users WHERE country = 'United Kingdom' AND is_active = 1"}`




{'question': 'How many active customers from the United Kingdom do we have?',
 'output': 'We have 111,469 active customers from the United Kingdom.'}

In [112]:
langchain.debug = True

In [168]:
from langchain.agents import AgentType, Tool, initialize_agent
from langchain.schema import SystemMessage

agent_kwargs = {
    "system_message": SystemMessage(content=system_message)
}

analyst_agent_openai = initialize_agent(
    llm=ChatOpenAI(temperature=0.1, model = 'gpt-4-1106-preview'),
    agent=AgentType.OPENAI_FUNCTIONS, 
    tools=[execute_sql, get_table_columns, get_table_column_distr], 
    agent_kwargs=agent_kwargs,
    verbose=True,
    max_iterations=10,
    early_stopping_method='generate'
)

In [125]:
analyst_agent_openai.get_input_schema().schema()

{'title': 'ChainInput',
 'type': 'object',
 'properties': {'input': {'title': 'Input'}}}

In [126]:
analyst_agent_openai.get_output_schema().schema()

{'title': 'ChainOutput',
 'type': 'object',
 'properties': {'output': {'title': 'Output'}}}

In [127]:
analyst_agent_openai.run("How many active customers from the United Kingdom do we have?")

[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor] Entering Chain run with input:
[0m{
  "input": "How many active customers from the United Kingdom do we have?"
}
[32;1m[1;3m[llm/start][0m [1m[1:chain:AgentExecutor > 2:llm:ChatOpenAI] Entering LLM run with input:
[0m{
  "prompts": [
    "System: \nYou are working as a product analyst for the e-commerce company. \nYour work is very important, since your product team makes decisions based on the data you provide. So, you are extremely accurate with the numbers you provided. \nIf you're not sure about the details of the request, you don't provide the answer and ask follow-up questions to have a clear understanding.\nYou are very helpful and try your best to answer the questions.\n\nAll the data is stored in SQL Database. Here is the list of tables (in the format <database>.<table>) with descriptions:\n- ecommerce.users - information about the customers, one row - one customer\n- ecommerce.sessions - information about the se

'We have 111,469 active customers from the United Kingdom.'

In [128]:
langchain.debug = False

In [129]:
analyst_agent_openai.run("How many active customers from the United Kingdom do we have?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `execute_sql` with `{'query': "SELECT COUNT(*) AS active_customers_uk FROM ecommerce.users WHERE country = 'United Kingdom' AND is_active = 1"}`


[0m[36;1m[1;3m111469
[0m[32;1m[1;3mWe have 111,469 active customers from the United Kingdom.[0m

[1m> Finished chain.[0m


'We have 111,469 active customers from the United Kingdom.'

### ReAct agent

In [135]:
agent_kwargs = {
    "prefix": system_message
}

analyst_agent_react = initialize_agent(
    llm=ChatOpenAI(temperature=0.1, model = 'gpt-4-1106-preview'),
    agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, 
    tools=[execute_sql, get_table_columns, get_table_column_distr], 
    agent_kwargs=agent_kwargs,
    verbose=True,
    max_iterations=10,
    early_stopping_method='generate'
)

In [167]:
analyst_agent_react.agent

StructuredChatAgent(llm_chain=LLMChain(prompt=ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='\nYou are working as a product analyst for the e-commerce company. \nYour work is very important, since your product team makes decisions based on the data you provide. So, you are extremely accurate with the numbers you provided. \nIf you\'re not sure about the details of the request, you don\'t provide the answer and ask follow-up questions to have a clear understanding.\nYou are very helpful and try your best to answer the questions.\n\nAll the data is stored in SQL Database. Here is the list of tables (in the format <database>.<table>) with descriptions:\n- ecommerce.users - information about the customers, one row - one customer\n- ecommerce.sessions - information about the sessions customers made on our web site, one row - one session\n\n\nexecute_sql: execute_sql(query: str) -> st

In [165]:
for message in analyst_agent_react.agent.llm_chain.prompt.messages:
    print(message.prompt.template)


You are working as a product analyst for the e-commerce company. 
Your work is very important, since your product team makes decisions based on the data you provide. So, you are extremely accurate with the numbers you provided. 
If you're not sure about the details of the request, you don't provide the answer and ask follow-up questions to have a clear understanding.
You are very helpful and try your best to answer the questions.

All the data is stored in SQL Database. Here is the list of tables (in the format <database>.<table>) with descriptions:
- ecommerce.users - information about the customers, one row - one customer
- ecommerce.sessions - information about the sessions customers made on our web site, one row - one session


execute_sql: execute_sql(query: str) -> str - Returns the result of SQL query execution, args: {{'query': {{'title': 'Query', 'description': 'SQL query to execute', 'type': 'string'}}}}
get_table_columns: get_table_columns(database: str, table: str) -> str 

In [145]:
langchain.debug = False
analyst_agent_react.run("How many active customers from the United Kingdom do we have?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To answer this question, I need to define what is meant by "active customers" and then query the database for users from the United Kingdom who meet this criteria. I will first need to know the structure of the `ecommerce.users` table to understand what columns are available that could help identify active customers and their location.

Action:
```
{
  "action": "get_table_columns",
  "action_input": {
    "database": "ecommerce",
    "table": "users"
  }
}
```

[0m
Observation: [33;1m[1;3m[{'name': 'user_id', 'type': 'UInt64'}, {'name': 'country', 'type': 'String'}, {'name': 'is_active', 'type': 'UInt8'}, {'name': 'age', 'type': 'UInt64'}][0m
Thought:[32;1m[1;3mThe `ecommerce.users` table contains a column named `is_active` which likely indicates whether a customer is active or not, and a `country` column which can be used to filter users by their location. Since we are interested in active customers from the 

'There are 111,469 active customers from the United Kingdom.'

### Plan-and-execute agent

In [171]:
# ! pip install langchain_experimental

In [170]:
import langchain_experimental
langchain_experimental.__version__

'0.0.47'

In [192]:
from langchain_experimental.plan_and_execute import PlanAndExecute, load_agent_executor, load_chat_planner

In [173]:
help(load_agent_executor)

Help on function load_agent_executor in module langchain_experimental.plan_and_execute.executors.agent_executor:

load_agent_executor(llm: langchain_core.language_models.base.BaseLanguageModel, tools: List[langchain_core.tools.BaseTool], verbose: bool = False, include_task_in_prompt: bool = False) -> langchain_experimental.plan_and_execute.executors.base.ChainExecutor
    Load an agent executor.
    
    Args:
        llm: BaseLanguageModel
        tools: List[BaseTool]
        verbose: bool. Defaults to False.
        include_task_in_prompt: bool. Defaults to False.
    
    Returns:
        ChainExecutor



In [185]:
model = ChatOpenAI(temperature=0.1, model = 'gpt-4-1106-preview')
planner = load_chat_planner(model)
executor = load_agent_executor(model, tools = [execute_sql, get_table_columns, get_table_column_distr], verbose=True)
# analyst_agent_plan_and_execute = PlanAndExecute(planner=planner, executor=executor)

In [190]:
executor.chain.agent.llm_chain.prompt.messages[0].prompt.template = system_message + '\n' + executor.chain.agent.llm_chain.prompt.messages[0].prompt.template

In [193]:
analyst_agent_plan_and_execute = PlanAndExecute(planner=planner, executor=executor)

In [194]:
analyst_agent_plan_and_execute.run("How many active customers from the United Kingdom do we have?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction:
```
{
  "action": "Final Answer",
  "action_input": "The data regarding customers is stored in a SQL Database, specifically in the table named 'ecommerce.users'."
}
```[0m

[1m> Finished chain.[0m


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To determine the criteria for an "active" customer, I need to understand what data is available in the 'ecommerce.users' and 'ecommerce.sessions' tables. This will help me identify which columns could be used to define an "active" customer based on recent purchases, account activity, or other engagement metrics.

Action:
```
{
  "action": "get_table_columns",
  "action_input": {
    "database": "ecommerce",
    "table": "users"
  }
}
```

[0m
Observation: [33;1m[1;3m[{'name': 'user_id', 'type': 'UInt64'}, {'name': 'country', 'type': 'String'}, {'name': 'is_active', 'type': 'UInt8'}, {'name': 'age', 'type': 'UInt64'}][0m
Thought:[32;1m[1;3mThe 'eco

RateLimitError: Error code: 429 - {'error': {'message': 'Request too large for gpt-4-1106-preview in organization org-eGZuuWbzTfKNKZWXUZ0upLq5 on tokens_usage_based per min: Limit 150000, Requested 235832. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens_usage_based', 'param': None, 'code': 'rate_limit_exceeded'}}

### BabyAGI with tools

In [211]:
# ! pip install tiktoken

In [196]:
# ! pip install chromadb

In [197]:
from langchain.embeddings.openai import OpenAIEmbeddings

In [198]:
embedding = OpenAIEmbeddings()

In [199]:
from langchain.vectorstores import Chroma
persist_directory = 'vector_store'

vectordb = Chroma(
    persist_directory=persist_directory,
    embedding_function=embedding
)

In [230]:
from langchain.agents import AgentExecutor, Tool, ZeroShotAgent
from langchain.chains import LLMChain
from langchain.llms import OpenAI
from langchain.utilities import SerpAPIWrapper
from langchain.prompts import PromptTemplate
from langchain_experimental.autonomous_agents import BabyAGI

todo_prompt_message = '''
You are a planner who is an expert at coming up with a todo list for a given objective. Come up with a todo list for this objective: {objective}
'''

todo_prompt = PromptTemplate.from_template(todo_prompt_message)
todo_chain = LLMChain(llm=OpenAI(temperature=0.1, model = 'gpt-4-1106-preview'), prompt=todo_prompt)
tools = [
    execute_sql, get_table_columns, get_table_column_distr,
    Tool(
        name="TODO",
        func=todo_chain.run,
        description="useful for when you need to come up with todo lists. Input: an objective to create a todo list for. Output: a todo list for that objective. Please be very clear what the objective is!",
    )
]


prefix = """
You are an AI who performs one task based on the following objective: {objective}. Take into account these previously completed tasks: {context}.

You are asked questions related to analytics for e-commerce product.
Your work is very important, since your product team makes decisions based on the data you provide. So, you are extremely accurate with the numbers you provided. 
If you're not sure about the details of the request, you don't provide the answer and ask follow-up questions to have a clear understanding.
You are very helpful and try your best to answer the questions.

All the data is stored in SQL Database. Here is the list of tables (in the format <database>.<table>) with descriptions:
- ecommerce.users - information about the customers, one row - one customer
- ecommerce.sessions - information about the sessions customers made on our web site, one row - one session
"""
suffix = """Question: {task}
{agent_scratchpad}"""
prompt = ZeroShotAgent.create_prompt(
    tools,
    prefix=prefix,
    suffix=suffix,
    input_variables=["objective", "task", "context", "agent_scratchpad"],
)


In [231]:
llm = OpenAI(temperature=0.1)
llm_chain = LLMChain(llm=llm, prompt=prompt)
tool_names = [tool.name for tool in tools]
analyst_agent_babyagi = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names)
analyst_agent_babyagi_executor = AgentExecutor.from_agent_and_tools(
    agent=analyst_agent_babyagi, tools=tools, verbose=True
)

In [232]:
baby_agi = BabyAGI.from_llm(
    llm=llm,
    vectorstore=vectordb,
    task_execution_chain=analyst_agent_babyagi_executor,
    verbose=True,
    max_iterations=10
)

In [233]:
# baby_agi

In [234]:
baby_agi("Find, how many active customers from the United Kingdom we have.")

[95m[1m
*****TASK LIST*****
[0m[0m
1: Make a todo list
[92m[1m
*****NEXT TASK*****
[0m[0m
1: Make a todo list


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find out how many active customers from the United Kingdom we have
Action: execute_sql
Action Input: SELECT COUNT(*) FROM ecommerce.users WHERE country = 'UK' AND active = 1[0m
Observation: [36;1m[1;3mCode: 47. DB::Exception: Missing columns: 'active' while processing query: 'SELECT count() FROM ecommerce.users WHERE (country = 'UK') AND (active = 1)', required columns: 'country' 'active', maybe you meant: 'country'. (UNKNOWN_IDENTIFIER) (version 23.12.1.414 (official build))
[0m
Thought:[32;1m[1;3m I need to get the columns of the ecommerce.users table
Action: get_table_columns
Action Input: ecommerce.users[0m

ValidationError: 1 validation error for SQLTable
table
  field required (type=value_error.missing)

In [235]:
langchain.debug = True

In [236]:
baby_agi("Find, how many active customers from the United Kingdom we have.")

[32;1m[1;3m[chain/start][0m [1m[1:chain:BabyAGI] Entering Chain run with input:
[0m{
  "objective": "Find, how many active customers from the United Kingdom we have."
}
[95m[1m
*****TASK LIST*****
[0m[0m
1: Make a todo list
[92m[1m
*****NEXT TASK*****
[0m[0m
1: Make a todo list
[32;1m[1;3m[chain/start][0m [1m[1:chain:BabyAGI > 2:chain:AgentExecutor] Entering Chain run with input:
[0m{
  "objective": "Find, how many active customers from the United Kingdom we have.",
  "context": "",
  "task": "Make a todo list"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:BabyAGI > 2:chain:AgentExecutor > 3:chain:LLMChain] Entering Chain run with input:
[0m{
  "objective": "Find, how many active customers from the United Kingdom we have.",
  "context": "",
  "task": "Make a todo list",
  "agent_scratchpad": "",
  "stop": [
    "\nObservation:",
    "\n\tObservation:"
  ]
}
[32;1m[1;3m[llm/start][0m [1m[1:chain:BabyAGI > 2:chain:AgentExecutor > 3:chain:LLMChain > 4:llm:OpenAI] En

ValidationError: 1 validation error for SQLTable
table
  field required (type=value_error.missing)

### AutoGPT

In [249]:
! rm -rf autogpt

In [251]:
embedding = OpenAIEmbeddings()
from langchain.vectorstores import Chroma
persist_directory = 'autogptt'

vectordb = Chroma(
    persist_directory=persist_directory,
    embedding_function=embedding
)

In [252]:
@tool()
def get_tables() -> str:
    """Returns list of tables in the format <database>.<table>"""
    
    return ['ecommerce.users', 'ecommerce.sessions']

In [253]:
from langchain_experimental.autonomous_agents import AutoGPT
analyst_agent_autogpt = AutoGPT.from_llm_and_tools(
    ai_name="Harry",
    ai_role="Assistant",
    tools= [execute_sql, get_table_columns, get_table_column_distr, get_tables],
    llm=ChatOpenAI(temperature=0.1, model = 'gpt-4-1106-preview'),
    memory=vectordb.as_retriever(),
)

analyst_agent_autogpt.chain.verbose = True

In [254]:
analyst_agent_autogpt.chain.prompt

AutoGPTPrompt(input_variables=['memory', 'messages', 'goals', 'user_input'], ai_name='Harry', ai_role='Assistant', tools=[StructuredTool(name='execute_sql', description='execute_sql(query: str) -> str - Returns the result of SQL query execution', args_schema=<class '__main__.SQLQuery'>, func=<function execute_sql at 0x12d9359d0>), StructuredTool(name='get_table_columns', description='get_table_columns(database: str, table: str) -> str - Returns list of table column names and types in JSON', args_schema=<class '__main__.SQLTable'>, func=<function get_table_columns at 0x12d935af0>), StructuredTool(name='get_table_column_distr', description='get_table_column_distr(database: str, table: str, column: str, n: int = 10) -> str - Returns top n values for the column in JSON', args_schema=<class '__main__.SQLTableColumn'>, func=<function get_table_column_distr at 0x12d935940>), StructuredTool(name='get_tables', description='get_tables() -> str - Returns list of tables in the format <database>.<t

In [255]:
langchain.debug = False

In [256]:
analyst_agent_autogpt.run(["Find how many active customers from the United Kingdom we have."])



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are Harry, Assistant
Your decisions must always be made independently without seeking user assistance.
Play to your strengths as an LLM and pursue simple strategies with no legal complications.
If you have completed all your tasks, make sure to use the "finish" command.

GOALS:

1. Find how many active customers from the United Kingdom we have.


Constraints:
1. ~4000 word limit for short term memory. Your short term memory is short, so immediately save important information to files.
2. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember.
3. No user assistance
4. Exclusively use the commands listed in double quotes e.g. "command name"

Commands:
1. execute_sql: execute_sql(query: str) -> str - Returns the result of SQL query execution, args json schema: {"query": {"title": "Query", "description": "SQL query to e

Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1


Prompt after formatting:
[32;1m[1;3mSystem: You are Harry, Assistant
Your decisions must always be made independently without seeking user assistance.
Play to your strengths as an LLM and pursue simple strategies with no legal complications.
If you have completed all your tasks, make sure to use the "finish" command.

GOALS:

1. Find how many active customers from the United Kingdom we have.


Constraints:
1. ~4000 word limit for short term memory. Your short term memory is short, so immediately save important information to files.
2. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember.
3. No user assistance
4. Exclusively use the commands listed in double quotes e.g. "command name"

Commands:
1. execute_sql: execute_sql(query: str) -> str - Returns the result of SQL query execution, args json schema: {"query": {"title": "Query", "description": "SQL query to execute", "type": "string"}}
2. get_table_co

Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


Prompt after formatting:
[32;1m[1;3mSystem: You are Harry, Assistant
Your decisions must always be made independently without seeking user assistance.
Play to your strengths as an LLM and pursue simple strategies with no legal complications.
If you have completed all your tasks, make sure to use the "finish" command.

GOALS:

1. Find how many active customers from the United Kingdom we have.


Constraints:
1. ~4000 word limit for short term memory. Your short term memory is short, so immediately save important information to files.
2. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember.
3. No user assistance
4. Exclusively use the commands listed in double quotes e.g. "command name"

Commands:
1. execute_sql: execute_sql(query: str) -> str - Returns the result of SQL query execution, args json schema: {"query": {"title": "Query", "description": "SQL query to execute", "type": "string"}}
2. get_table_co

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


Prompt after formatting:
[32;1m[1;3mSystem: You are Harry, Assistant
Your decisions must always be made independently without seeking user assistance.
Play to your strengths as an LLM and pursue simple strategies with no legal complications.
If you have completed all your tasks, make sure to use the "finish" command.

GOALS:

1. Find how many active customers from the United Kingdom we have.


Constraints:
1. ~4000 word limit for short term memory. Your short term memory is short, so immediately save important information to files.
2. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember.
3. No user assistance
4. Exclusively use the commands listed in double quotes e.g. "command name"

Commands:
1. execute_sql: execute_sql(query: str) -> str - Returns the result of SQL query execution, args json schema: {"query": {"title": "Query", "description": "SQL query to execute", "type": "string"}}
2. get_table_co

'The number of active customers from the United Kingdom is 111,469.'

### SQLDatabaseChain

In [258]:
from langchain.agents import create_sql_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.agents.agent_types import AgentType
from langchain.llms.openai import OpenAI
from langchain.sql_database import SQLDatabase

In [261]:
uri = 'clickhouse+native://localhost/default'
db = SQLDatabase.from_uri(uri)
toolkit = SQLDatabaseToolkit(db=db, llm=OpenAI(temperature=0))