In [1]:
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI



In [7]:
import json
import os

with open('/Users/marie/Documents/github/config.json', 'r') as f:
    config = json.load(f)

    for key in config:
        os.environ[key] = config[key]

## Extraction example
### Basic

In [8]:
import openai

extraction_functions = [
    {
        "name": "extract_information",
        "description": "extracts information",
        "parameters": {
            "type": "object",
            "properties": {
                "metric": {
                    "type": "string",
                    "description": "main metric we need to calculate, for example, 'number of users' or 'number of sessions'",
                },
                "filters": {
                    "type": "string",
                    "description": "filters to apply to the calculation (do not include filters on dates here)",
                },
                "dimensions": {
                    "type": "string",
                    "description": "parameters to split you metric by",
                },
                "period_start": {
                    "type": "string",
                    "description": "start day of the period for report",
                },
                "period_end": {
                    "type": "string",
                    "description": "end day of the period for report",
                },
                "output_type": {
                    "type": "string",
                    "description": "the desired output",
                    "enum": ["number", "visualisation"]
                }
            },
            "required": ["metric"],
        },
    }
]

messages = [
    {
        "role": "system",
        "content": "Extract the relevant information from the provided request. Extract ONLY the information presented in the initial request, don't add anything else. Return partial information if something is missing."
    },
    {
        "role": "user",
        "content": "How did number of iOS users change over time?"
    }
]

response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo-1106",
    messages=messages,
    functions=extraction_functions
)

print(response)

APIRemovedInV1: 

You tried to access openai.ChatCompletion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742


In [9]:
from pydantic import BaseModel, Field, ConfigDict
from typing import Optional

class RequestStructure(BaseModel):
  """extracts information"""
  metric: str = Field(description = "main metric we need to calculate, for example, 'number of users' or 'number of sessions'")
  filters: Optional[str] = Field(description = "filters to apply to the calculation (do not include filters on dates here)")
  dimensions: Optional[str] = Field(description = "parameters to split you metric by")
  period_start: Optional[str] = Field(description = "start day of the period for report")
  period_end: Optional[str] = Field(description = "end day of the period for report")
  output_type: Optional[str] = Field(description = "the desired output", enum = ["number", "visualisation"])

In [10]:
from langchain.utils.openai_functions import convert_pydantic_to_openai_function
extract_info_function = convert_pydantic_to_openai_function(RequestStructure, name = 'extract_information')

In [11]:
extract_info_function

{'name': 'extract_information',
 'description': 'extracts information',
 'parameters': {'description': 'extracts information',
  'properties': {'metric': {'description': "main metric we need to calculate, for example, 'number of users' or 'number of sessions'",
    'title': 'Metric',
    'type': 'string'},
   'filters': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
    'description': 'filters to apply to the calculation (do not include filters on dates here)',
    'title': 'Filters'},
   'dimensions': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
    'description': 'parameters to split you metric by',
    'title': 'Dimensions'},
   'period_start': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
    'description': 'start day of the period for report',
    'title': 'Period Start'},
   'period_end': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
    'description': 'end day of the period for report',
    'title': 'Period End'},
   'output_type': {'anyOf': [{'type': 'string'}, 

In [12]:
model = ChatOpenAI(temperature=0.1, model = 'gpt-3.5-turbo-1106')\
  .bind(functions = [extract_info_function])

prompt = ChatPromptTemplate.from_messages([
    ("system", "Extract the relevant information from the provided request."),
    ("human", "{request}")
])

extraction_chain = prompt | model

In [13]:
extraction_chain.invoke({'request': "How many customers visited our site on iOS in April 2023 from different countries?"})

AIMessage(content='', additional_kwargs={'function_call': {'arguments': '{"metric":"number of users","filters":"device = \'iOS\'","dimensions":"country","period_start":"2023-04-01","period_end":"2023-04-30","output_type":"number"}', 'name': 'extract_information'}})

In [14]:
print("""AIMessage(content='', additional_kwargs={'function_call': {'name': 'extract_information', 'arguments': '{"metric":"number of customers","filters":"device = \'iOS\'","dimensions":"country","period_start":"2023-04-01","period_end":"2023-04-30","output_type":"number"}'}}""")

AIMessage(content='', additional_kwargs={'function_call': {'name': 'extract_information', 'arguments': '{"metric":"number of customers","filters":"device = 'iOS'","dimensions":"country","period_start":"2023-04-01","period_end":"2023-04-30","output_type":"number"}'}}


### Built-in tools

In [15]:
# ! pip install langchain_experimental

In [16]:
# !pip install -U duckduckgo-search

In [18]:
from langchain.agents import Tool
# from langchain_experimental.utilities import PythonREPL
from langchain.tools import DuckDuckGoSearchRun, DuckDuckGoSearchResults

In [20]:
# python_repl = PythonREPL()
# python_repl.run("print(100*30/40)")

In [21]:
os.environ["GOOGLE_CSE_ID"] = "<id>"
os.environ["GOOGLE_API_KEY"] = "<your_api_key>"

In [22]:
from langchain.tools import Tool
from langchain.utilities import GoogleSearchAPIWrapper

search = GoogleSearchAPIWrapper()
def goole_results(query):
    return search.results(query, 5)


tool = Tool(
    name="Google Search",
    description="Search Google to get basic information about the world.",
    func=top5_results,
)

ImportError: google-api-python-client is not installed. Please install it with `pip install google-api-python-client>=2.100.0`

In [23]:
from langchain.tools.render import format_tool_to_openai_function

format_tool_to_openai_function(tool)

NameError: name 'tool' is not defined

### Custom tool

In [24]:
from langchain.agents import tool

In [25]:
from langchain.agents import tool

@tool
def percentage_difference(metric1: float, metric2: float) -> float:
    """Calculates the percentage difference between metrics"""
    return (metric2 - metric1)/metric1*100

In [26]:
percentage_difference.name

'percentage_difference'

In [27]:
percentage_difference.args

{'metric1': {'title': 'Metric1', 'type': 'number'},
 'metric2': {'title': 'Metric2', 'type': 'number'}}

In [28]:
percentage_difference.description

'percentage_difference(metric1: float, metric2: float) -> float - Calculates the percentage difference between metrics'

In [29]:
format_tool_to_openai_function(percentage_difference)

{'name': 'percentage_difference',
 'description': 'percentage_difference(metric1: float, metric2: float) -> float - Calculates the percentage difference between metrics',
 'parameters': {'title': 'percentage_differenceSchemaSchema',
  'type': 'object',
  'properties': {'metric1': {'title': 'Metric1', 'type': 'number'},
   'metric2': {'title': 'Metric2', 'type': 'number'}},
  'required': ['metric1', 'metric2']}}

In [35]:
# ! pip install langchain==0.0.327

In [37]:
import pydantic 
pydantic.__version__

'2.5.2'

In [34]:
class Metrics(BaseModel):
    metric1: float = Field(description="Base metric value to calculate the difference")
    metric2: float = Field(description="New metric value that we compare with the baseline")

@tool(args_schema=Metrics)
def percentage_difference(metric1: float, metric2: float) -> float:
    """Calculates the percentage difference between metrics"""
    return (metric2 - metric1)/metric1*100

ValidationError: 1 validation error for StructuredTool
args_schema
  subclass of BaseModel expected (type=type_error.subclass; expected_class=BaseModel)

In [119]:
format_tool_to_openai_function(percentage_difference)

{'name': 'percentage_difference',
 'description': 'percentage_difference(metric1: float, metric2: float) -> float - Calculates the percentage difference between metrics',
 'parameters': {'title': 'Metrics',
  'type': 'object',
  'properties': {'metric1': {'title': 'Metric1',
    'description': 'Base metric value to calculate the difference',
    'type': 'number'},
   'metric2': {'title': 'Metric2',
    'description': 'New metric value that we compare with the baseline',
    'type': 'number'}},
  'required': ['metric1', 'metric2']}}

In [122]:
model = ChatOpenAI(temperature=0.1, model = 'gpt-3.5-turbo-1106')\
  .bind(functions = [format_tool_to_openai_function(percentage_difference)])

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a product analyst willing to help your product team. You are very strict to the point and accurate. You use only facts."),
    ("user", "{request}")
])

analyst_chain = prompt | model

In [125]:
result = analyst_chain.invoke({'request': "In April we had 100 users and in May only 95. What is difference in percent?"})

In [126]:
result

AIMessage(content='', additional_kwargs={'function_call': {'name': 'percentage_difference', 'arguments': '{"metric1":100,"metric2":95}'}})

In [128]:
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
analyst_chain = prompt | model | OpenAIFunctionsAgentOutputParser()

In [129]:
result = analyst_chain.invoke({'request': "There were 100 users in April and 110 users in May. How did the number of users changed?"})

In [130]:
result

AgentActionMessageLog(tool='percentage_difference', tool_input={'metric1': 100, 'metric2': 110}, log="\nInvoking: `percentage_difference` with `{'metric1': 100, 'metric2': 110}`\n\n\n", message_log=[AIMessage(content='', additional_kwargs={'function_call': {'name': 'percentage_difference', 'arguments': '{"metric1":100,"metric2":110}'}})])

In [133]:
observation = percentage_difference(result.tool_input)
print(observation)

10.0


In [132]:
result.tool

'percentage_difference'

In [136]:
messages = [
    ("system", "You are a product analyst willing to help your product team. You are very strict to the point and accurate. You use only facts."),
    ("user", "There were 100 users in April and 110 users in May. How did the number of users changed?"),
    ("assistant", "Observation: %f" % observation)
]

model = ChatOpenAI(temperature=0.1, model = 'gpt-3.5-turbo-1106')\
  .bind(functions = [format_tool_to_openai_function(percentage_difference)])

prompt = ChatPromptTemplate.from_messages(messages)

analyst_chain = prompt | model | OpenAIFunctionsAgentOutputParser()

In [138]:
analyst_chain.invoke({})

AgentActionMessageLog(tool='percentage_difference', tool_input={'metric1': 100, 'metric2': 110}, log="\nInvoking: `percentage_difference` with `{'metric1': 100, 'metric2': 110}`\n\n\n", message_log=[AIMessage(content='', additional_kwargs={'function_call': {'name': 'percentage_difference', 'arguments': '{"metric1":100,"metric2":110}'}})])

In [139]:
from langchain.prompts import MessagesPlaceholder

model = ChatOpenAI(temperature=0.1, model = 'gpt-3.5-turbo-1106')\
  .bind(functions = [format_tool_to_openai_function(percentage_difference)])

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a product analyst willing to help your product team. You are very strict to the point and accurate. You use only facts, not inventing information. You need to be accurate so check all basic info you're using at Wikipedia."),
    ("user", "{request}"),
    MessagesPlaceholder(variable_name="observations")
])

analyst_chain = prompt | model | OpenAIFunctionsAgentOutputParser()
result1 = analyst_chain.invoke({
    'request': "There were 100 users in April and 110 users in May. How did the number of users changed?",
    "observations": []
})

In [143]:
from langchain.agents.format_scratchpad import format_to_openai_functions

In [140]:
result1

AgentActionMessageLog(tool='percentage_difference', tool_input={'metric1': 100, 'metric2': 110}, log="\nInvoking: `percentage_difference` with `{'metric1': 100, 'metric2': 110}`\n\n\n", message_log=[AIMessage(content='', additional_kwargs={'function_call': {'name': 'percentage_difference', 'arguments': '{"metric1":100,"metric2":110}'}})])

In [141]:
result1.message_log

[AIMessage(content='', additional_kwargs={'function_call': {'name': 'percentage_difference', 'arguments': '{"metric1":100,"metric2":110}'}})]

In [144]:
format_to_openai_functions([(result1, observation), ])

[AIMessage(content='', additional_kwargs={'function_call': {'name': 'percentage_difference', 'arguments': '{"metric1":100,"metric2":110}'}}),
 FunctionMessage(content='10.0', name='percentage_difference')]

In [145]:
result2 = analyst_chain.invoke({
    'request': "There were 100 users in April and 110 users in May. How did the number of users changed?",
    "observations": format_to_openai_functions([(result1, observation)])
})

In [146]:
result2

AgentFinish(return_values={'output': 'The number of users increased by 10%.'}, log='The number of users increased by 10%.')

In [192]:
import langchain
langchain.debug = False

In [148]:
result2 = analyst_chain.invoke({
    'request': "There were 100 users in April and 110 users in May. How did the number of users changed?",
    "observations": format_to_openai_functions([(result1, observation)])
})

[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence > 2:prompt:ChatPromptTemplate] Entering Prompt run with input:
[0m[inputs]
[36;1m[1;3m[chain/end][0m [1m[1:chain:RunnableSequence > 2:prompt:ChatPromptTemplate] [1ms] Exiting Prompt run with output:
[0m{
  "lc": 1,
  "type": "constructor",
  "id": [
    "langchain",
    "prompts",
    "chat",
    "ChatPromptValue"
  ],
  "kwargs": {
    "messages": [
      {
        "lc": 1,
        "type": "constructor",
        "id": [
          "langchain",
          "schema",
          "messages",
          "SystemMessage"
        ],
        "kwargs": {
          "content": "You are a product analyst willing to help your product team. You are very strict to the point and accurate. You use only facts, not inventing information.",
          "additional_kwargs": {}
        }
      },
      {
        "lc": 1,
        "type": "constr

In [152]:
print('''{
  "prompts": [
    "System: You are a product analyst willing to help your product team. You are very strict to the point and accurate. You use only facts, not inventing information.\nHuman: There were 100 users in April and 110 users in May. How did the number of users changed?\nAI: {'name': 'percentage_difference', 'arguments': '{\"metric1\":100,\"metric2\":110}'}\nFunction: 10.0"
  ]
}''')

{
  "prompts": [
    "System: You are a product analyst willing to help your product team. You are very strict to the point and accurate. You use only facts, not inventing information.
Human: There were 100 users in April and 110 users in May. How did the number of users changed?
AI: {'name': 'percentage_difference', 'arguments': '{"metric1":100,"metric2":110}'}
Function: 10.0"
  ]
}


In [177]:
import datetime
import random

class Filters(BaseModel):
    month: str = Field(description="Month of customer's activity in the format %Y-%m-%d")
    city: Optional[str] = Field(description="City of residence for customers (by default no filter)", 
                    enum = ["London", "Berlin", "Amsterdam", "Paris"])

@tool(args_schema=Filters)
def get_monthly_active_users(month: str, city: str = None) -> float:
    """Returns number of active customers for the specified month"""
    dt = datetime.datetime.strptime(month, '%Y-%m-%d')
    total = dt.year + 10*dt.month
    if city is None:
        return total
    else:
        return total*random.random()

In [179]:
# !pip install wikipedia

In [188]:
import wikipedia

class Wikipedia(BaseModel):
    term: str = Field(description="Term to search for")

@tool(args_schema=Wikipedia)
def get_summary(term: str) -> str:
    """Returns basic knowledge related to the term from Wikipedia"""
    return wikipedia.summary(term)

In [189]:
toolkit = {
    'percentage_difference': percentage_difference,
    'get_monthly_active_users': get_monthly_active_users,
    'get_summary': get_summary
}

analyst_functions = [format_tool_to_openai_function(f) for f in toolkit.values()]

In [212]:
from langchain.prompts import MessagesPlaceholder

model = ChatOpenAI(temperature=0.1, model = 'gpt-4-1106-preview')\
  .bind(functions = analyst_functions)

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a product analyst willing to help your product team. You are very strict to the point and accurate. \
        You use only information provided in the initial request. \
        If you need to determine some information i.e. what is the name of the capital, you can use Wikipedia."),
    ("user", "{request}"),
    MessagesPlaceholder(variable_name="observations")
])

analyst_chain = prompt | model | OpenAIFunctionsAgentOutputParser()

In [213]:
# result1 = analyst_chain.invoke({
#     'request': "How many users were in April 2023 from Berlin?",
#     "observations": []
# })

In [224]:
result1 = analyst_chain.invoke({
    'request': "How did the number of users from the capital of Germany between April and May 2023?",
    "observations": []
})

In [225]:
result1

AgentActionMessageLog(tool='get_summary', tool_input={'term': 'Capital of Germany'}, log="\nInvoking: `get_summary` with `{'term': 'Capital of Germany'}`\n\n\n", message_log=[AIMessage(content='', additional_kwargs={'function_call': {'name': 'get_summary', 'arguments': '{"term":"Capital of Germany"}'}})])

In [228]:
observation1 = toolkit[result1.tool](result1.tool_input)
print(observation)

The capital of Germany is the  city state of Berlin. It is the seat of the President of Germany, whose official residence is Schloss Bellevue. The Bundesrat ("federal council") is the representation of the Federal States (Bundesländer) of Germany and has its seat at the former Prussian Herrenhaus (House of Lords). Though most of the ministries are seated in Berlin, some of them, as well as some minor departments, are seated in Bonn, the former capital of West Germany.
Although Berlin is officially the capital of the Federal Republic of Germany, 8,000 out of the 18,000 total officials employed at the federal bureaucracy still work in Bonn, about 600 km (370 mi) away from Berlin.


In [229]:
result2 = analyst_chain.invoke({
    'request': "How did the number of users from the capital of Germany between April and May 2023?",
    "observations": format_to_openai_functions([(result1, observation1)])
})

In [230]:
result2

AgentActionMessageLog(tool='get_monthly_active_users', tool_input={'month': '2023-04-01', 'city': 'Berlin'}, log="\nInvoking: `get_monthly_active_users` with `{'month': '2023-04-01', 'city': 'Berlin'}`\n\n\n", message_log=[AIMessage(content='', additional_kwargs={'function_call': {'name': 'get_monthly_active_users', 'arguments': '{"month":"2023-04-01","city":"Berlin"}'}})])

In [232]:
observation2 = toolkit[result2.tool](result2.tool_input)
print(observation2)

167.85816148955004


In [233]:
result3 = analyst_chain.invoke({
    'request': "How did the number of users from the capital of Germany between April and May 2023?",
    "observations": format_to_openai_functions([(result1, observation1), (result2, observation2)])
})

In [234]:
result3

AgentActionMessageLog(tool='get_monthly_active_users', tool_input={'month': '2023-05-01', 'city': 'Berlin'}, log="\nInvoking: `get_monthly_active_users` with `{'month': '2023-05-01', 'city': 'Berlin'}`\n\n\n", message_log=[AIMessage(content='', additional_kwargs={'function_call': {'name': 'get_monthly_active_users', 'arguments': '{"month":"2023-05-01","city":"Berlin"}'}})])

In [235]:
observation3 = toolkit[result3.tool](result3.tool_input)
print(observation3)

1046.2141109179674


In [236]:
result4 = analyst_chain.invoke({
    'request': "How did the number of users from the capital of Germany between April and May 2023?",
    "observations": format_to_openai_functions(
      [(result1, observation1), (result2, observation2), 
      (result3, observation3)])
})

In [237]:
result4

AgentActionMessageLog(tool='percentage_difference', tool_input={'metric1': 167.85816148955004, 'metric2': 1046.2141109179674}, log="\nInvoking: `percentage_difference` with `{'metric1': 167.85816148955004, 'metric2': 1046.2141109179674}`\n\n\n", message_log=[AIMessage(content='', additional_kwargs={'function_call': {'name': 'percentage_difference', 'arguments': '{"metric1":167.85816148955004,"metric2":1046.2141109179674}'}})])

In [238]:
observation4 = toolkit[result4.tool](result4.tool_input)
print(observation4)

523.2727093124389


In [239]:
result5 = analyst_chain.invoke({
    'request': "How did the number of users from the capital of Germany between April and May 2023?",
    "observations": format_to_openai_functions(
      [(result1, observation1), (result2, observation2), 
      (result3, observation3), (result4, observation4)])
})

In [240]:
result5

AgentFinish(return_values={'output': 'The number of users from Berlin, the capital of Germany, increased by approximately 523.27% between April and May 2023.'}, log='The number of users from Berlin, the capital of Germany, increased by approximately 523.27% between April and May 2023.')