### Initialization:

In [None]:
# Install packages

# %pip install openai==0.28
# %pip install --upgrade langchain
# %pip install --upgrade pydantic
# %pip install --upgrade typing

In [None]:
# Import packages

from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI
import langchain
import os
import openai

In [None]:
# Store OpenAI API key in the venv

import json
import os

with open('/Users/robertford/Documents/OpenAI-API/config.json', 'r') as f:
    config = json.load(f)

    for key in config:
        os.environ['OPENAI_API_KEY'] = config[key]

api_key=os.environ['OPENAI_API_KEY']

# print(os.environ['OPENAI_API_KEY']) # Check that it imported the key ok

### Extract information into a structured output using a Pydantic (data validation) function:

In [None]:
# create a class inheriting from the BaseModel class and define all the fields (arguments):

from pydantic.v1 import BaseModel, Field
from typing import Optional

class RequestStructure(BaseModel):
  """extracts information"""
  metric: str = Field(description = "main metric we need to calculate, for example, 'number of users' or 'number of sessions'")
  filters: Optional[str] = Field(description = "filters to apply to the calculation (do not include filters on dates here)")
  dimensions: Optional[str] = Field(description = "parameters to split you metric by")
  period_start: Optional[str] = Field(description = "start day of the period for report")
  period_end: Optional[str] = Field(description = "end day of the period for report")
  output_type: Optional[str] = Field(description = "the desired output", enum = ["number", "visualisation"])

In [None]:
# Use LangChain to convert this Pydantic class into an OpenAI function:

from langchain.utils.openai_functions import convert_pydantic_to_openai_function

extract_info_function = convert_pydantic_to_openai_function(RequestStructure, name = 'extract_information')

In [None]:
# LangChain validates the class needed to use OpenAI:

extract_info_function

In [None]:
# Defining a LangChain chain

from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI

prompt = ChatPromptTemplate.from_messages([
    ("system", "Extract the relevant information from the provided request. \
            Extract ONLY the information presented in the initial request. \
            Don't add anything else. \
            Return partial information if something is missing."),
    ("human", "{request}")
])

model = ChatOpenAI(temperature=0.1, model = 'gpt-3.5-turbo-1106')\
  .bind(functions = [extract_info_function])

extraction_chain = prompt | model

In [None]:
# Invoke 'request' defined in the chain into a structured ouput:

extraction_chain.invoke({'request': "How many customers visited our site on iOS in April 2023 from different countries?"})

### Define a tool that performs an explicit calculation:

In [None]:
# Teach the LLM data analyst to calculate the difference between two metrics using an explicit calculation.

from langchain.agents import tool

@tool
def percentage_difference(metric1: float, metric2: float) -> float:
    """Calculates the percentage difference between metrics"""
    return (metric2 - metric1)/metric1*100

In [None]:
# Now the function has the name and description parameters to be passed to the LLM:
print(percentage_difference.name)
print(percentage_difference.args)
print(percentage_difference.description)

In [None]:
# Use the above parameters to create an OpenAI function spec:

from langchain.tools.render import format_tool_to_openai_function

# format_tool_to_openai_function(percentage_difference)
format_tool_to_openai_function(percentage_difference)

In [None]:
# Use Pydantic to specify a schema for the arguments:
import pydantic.v1
from typing import Type

pydantic.__version__

In [None]:
# Import from pydantic.v1 because there is an error if using just pydantic
# Use Pyndantic to specify a schema:
from pydantic.v1 import BaseModel

class Metrics(BaseModel):
    metric1: float = Field(description="Base metric value to calculate the difference")
    metric2: float = Field(description="New metric value that we compare with the baseline")

@tool(args_schema=Metrics)
def percentage_difference(metric1: float, metric2: float) -> float:
    """Calculates the percentage difference between metrics"""
    return (metric2 - metric1)/metric1*100

In [None]:
percentage_difference

In [None]:
# Using a tool in practice to defining a chain and passing our tool to the function
model = ChatOpenAI(temperature=0.1, model = 'gpt-3.5-turbo-1106')\
  .bind(functions = [format_tool_to_openai_function(percentage_difference)])

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a product analyst willing to help your product team. You are very strict to the point and accurate. You use only facts."),
    ("user", "{request}")
])

analyst_chain = prompt | model
analyst_chain.invoke({'request': "In April we had 100 users and in May only 95. What is difference in percent?"})

In [None]:
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
analyst_chain = prompt | model | OpenAIFunctionsAgentOutputParser()
result = analyst_chain.invoke({'request': "There were 100 users in April and 110 users in May. How did the number of users changed?"})
result

In [None]:
# Execute the function as the LLM requested like this

observation = percentage_difference(result.tool_input)
print(observation)
# 10

In [None]:
# Define a message list to pass to the model observations to get a final answer from the model:

from langchain.prompts import MessagesPlaceholder

model = ChatOpenAI(temperature=0.1, model = 'gpt-3.5-turbo-1106')\
  .bind(functions = [format_tool_to_openai_function(percentage_difference)])

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a product analyst willing to help your product team. You are very strict to the point and accurate. You use only facts, not inventing information."),
    ("user", "{request}"),
    MessagesPlaceholder(variable_name="observations")
])

analyst_chain = prompt | model | OpenAIFunctionsAgentOutputParser()
result1 = analyst_chain.invoke({
    'request': "There were 100 users in April and 110 users in May. How did the number of users changed?",
    "observations": []
})

observation = percentage_difference(result1.tool_input)
print(observation)
# 10

In [None]:
# Then, we need to add the observation to our observations variable. We could use format_to_openai_functions function to format our results in an expected way for the model.

from langchain.agents.format_scratchpad import format_to_openai_functions
format_to_openai_functions([(result1, observation), ])

In [None]:
# Let’s invoke our chain one more time, passing the function execution result as an observation.

import langchain
langchain.debug = True

result2 = analyst_chain.invoke({
    'request': "There were 100 users in April and 110 users in May. How did the number of users changed?",
    "observations": format_to_openai_functions([(result1, observation)])
})

print(result2)

In [None]:
# Let’s add a couple more tools to our analyst’s toolkit,
# using Pydantic to specify the input arguments for our function.

import datetime
import random

class Filters(BaseModel):
    month: str = Field(description="Month of customer's activity in the format %Y-%m-%d")
    city: Optional[str] = Field(description="City of residence for customers (by default no filter)", 
                    enum = ["London", "Berlin", "Amsterdam", "Paris"])

@tool(args_schema=Filters)
def get_monthly_active_users(month: str, city: str = None) -> int:
    """Returns number of active customers for the specified month"""
    dt = datetime.datetime.strptime(month, '%Y-%m-%d')
    total = dt.year + 10*dt.month
    if city is None:
        return total
    else:
        return int(total*random.random())

In [None]:
# Use the Wikipedia package to allow the LLM to look up information it needs:

# !pip install wikipedia
import wikipedia

class Wikipedia(BaseModel):
    term: str = Field(description="Term to search for")

@tool(args_schema=Wikipedia)
def get_summary(term: str) -> str:
    """Returns basic knowledge about the given term provided by Wikipedia"""
    return wikipedia.summary(term)

In [None]:
# Let’s define a dictionary with all the functions our model knows now. This dictionary will help us to do routing later.

toolkit = {
    'percentage_difference': percentage_difference,
    'get_monthly_active_users': get_monthly_active_users,
    'get_summary': get_summary
}

analyst_functions = [format_tool_to_openai_function(f) 
  for f in toolkit.values()]

In [None]:
# Force LLM to consult with Wikipedia if it needs some basic knowledge.
# Changed the model to GPT 4 because it’s better for handling tasks requiring reasoning.

from langchain.prompts import MessagesPlaceholder

model = ChatOpenAI(temperature=0.1, model = 'gpt-4-1106-preview')\
  .bind(functions = analyst_functions)

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a product analyst willing to help your product team. You are very strict to the point and accurate. \
        You use only information provided in the initial request. \
        If you need to determine some information i.e. what is the name of the capital, you can use Wikipedia."),
    ("user", "{request}"),
    MessagesPlaceholder(variable_name="observations")
])

analyst_chain = prompt | model | OpenAIFunctionsAgentOutputParser()

print(analyst_chain)

In [None]:
# We can invoke our chain with all the functions. Let’s start with a pretty straightforward query.
# Turn off debug mode

import langchain
langchain.debug = False

result1 = analyst_chain.invoke({
    'request': "How many users were in April 2023 from Berlin?",
    "observations": []
})
print(result1)

In [None]:
# Let’s try to make task a bit more complex by not telling the LLM what the capitol of Botswana is:

result1 = analyst_chain.invoke({
    'request': "How did the number of users from the capital of Botswana change between April and May 2023?",
    "observations": []
})

result1

In [None]:
# So now the model has to do a few things:
# call Wikipedia to get the capital of Botswana
# call the get_monthly_active_users function twice to get MAU for April and May
# call percentage_difference to calculate the difference between metrics.

observation1 = toolkit[result1.tool](result1.tool_input)
print(observation1)

In [None]:
result2 = analyst_chain.invoke({
    'request': "How did the number of users from the capital of Botswana change between April and May 2023?",
    "observations": format_to_openai_functions([(result1, observation1)])
})

print(result2)

In [None]:
# The model wants to execute get_monthly_active_users with arguments {'month': '2023-04-01', 'city': 'Berlin'}. Let's do it and return the information to the model again:

observation2 = toolkit[result2.tool](result2.tool_input)

print(observation2)

In [None]:
result3 = analyst_chain.invoke({
    'request': "How did the number of users from the capital of Botswana change between April and May 2023?",
    "observations": format_to_openai_functions([(result1, observation1), (result2, observation2)])
})

print(result3)

In [None]:
observation3 = toolkit[result3.tool](result3.tool_input)

print(observation3)

In [None]:
result4 = analyst_chain.invoke({
    'request': "How did the number of users from the capital of Botswana change between April and May 2023?",
    "observations": format_to_openai_functions(
      [(result1, observation1), (result2, observation2), 
      (result3, observation3)])
})

print(result4)

In [None]:
observation4 = toolkit[result4.tool](result4.tool_input)
print(observation4)

result5 = analyst_chain.invoke({
    'request': "How did the number of users from the capital of Botswana change between April and May 2023?",
    "observations": format_to_openai_functions(
      [(result1, observation1), (result2, observation2), 
      (result3, observation3), (result4, observation4)])
})



### Output

In [None]:
output = result5.return_values['output']
print(output)