In [122]:
from llm_workflow.agents import Tool, OpenAIFunctions

import yaml
with open('/code/source/notebooks/openai_functions.yml') as f:
    tools = yaml.safe_load(f)

tools = {x['name']: Tool.from_dict(x) for x in tools}
print(tools)

{'xxxxx': <llm_workflow.agents.Tool object at 0xffff85e54c10>, 'yyyyyy': <llm_workflow.agents.Tool object at 0xffff85e54e50>}


In [123]:
# tools = OpenAIFunctions(
#     model_name='gpt-3.5-turbo-0613',
#     tools=tools.values(),
# )
# question = "I want to plot a graph of amount which is numeric and amount_2 which is numeric."
# response = tools(question)
# response

[(<llm_workflow.agents.Tool at 0xffff85e54c10>,
  {'x_variable': 'amount', 'y_variable': 'amount_2'})]

```
- name: ask_weather
  description: Use this function to answer questions about the weather for a particular city.
  inputs:
    location:
      description: The city and state, e.g. San Francisco, CA
      type: string
    unit:
      description: The temperature unit to use. The model needs to infer this from the `location`.
      type: string
      enum:
        - celsius
        - fahrenheit
  required:
    - location
    - unit
```

```
 'unit': {
                'type': 'string',
                'enum': ['celsius', 'fahrenheit'],
                'description': "The temperature unit to use. The model needs to infer this from the `location`.",  # noqa
        }
```

In [126]:
column_types = {'checking_balance': 'string', 'months_loan_duration': 'numeric', 'credit_history': 'string', 'purpose': 'string', 'amount': 'numeric', 'savings_balance': 'string', 'employment_duration': 'string', 'percent_of_income': 'numeric', 'years_at_residence': 'numeric', 'age': 'numeric', 'other_credit': 'string', 'housing': 'string', 'existing_loans_count': 'numeric', 'job': 'string', 'dependents': 'numeric', 'phone': 'string', 'default': 'string'}
column_types

{'checking_balance': 'string',
 'months_loan_duration': 'numeric',
 'credit_history': 'string',
 'purpose': 'string',
 'amount': 'numeric',
 'savings_balance': 'string',
 'employment_duration': 'string',
 'percent_of_income': 'numeric',
 'years_at_residence': 'numeric',
 'age': 'numeric',
 'other_credit': 'string',
 'housing': 'string',
 'existing_loans_count': 'numeric',
 'job': 'string',
 'dependents': 'numeric',
 'phone': 'string',
 'default': 'string'}

In [155]:
import uuid

with open('/code/source/config/graphing_configurations.yml') as f:
    configs = yaml.safe_load(f)['configurations']

print(len(configs))

tools = []
for config in configs:
    # config = configs[0]
    variables = {k:v for k, v in config['selected_variables'].items() if v is not None}
    required_variables = list(variables.keys())
    for graph_type in config['graph_types']:
        if 'info' not in graph_type:
            continue
        description = graph_type['info']
        # graph_type = config['graph_types'][0]
        if 'optional_variables' in graph_type:
            optional_variables = graph_type['optional_variables']
            variables.update({k:v['types'] for k, v in optional_variables.items() if v is not None})
        # print(f"variables: {variables}")
        # print(f"required_variables: {required_variables}")
        valid_graph = True
        inputs = {}
        for k, valid_column_types in variables.items():
            valid_column_names = [n for n, t in column_types.items() if t in valid_column_types]
            inputs[k] = {
                'type': 'string',
                'description': f"{k.replace('_variable', ' axis')} that supports {', '.join(valid_column_types)} columns",   # noqa
            }
            if len(valid_column_names) == 0:
                if k in required_variables:
                    # if there are no valid columns that support the corresponding types and the
                    # variable is required (e.g. there are no dates columns in the dataset and a
                    # date is required for the graph), then skip this graph/tool altogether
                    valid_graph = False
                    break
            else:
                inputs[k]['enum'] = valid_column_names
        if valid_graph:
            # inputs = {'inputs': inputs}
            tools.append(Tool(
                name=str(uuid.uuid4()),
                description=description,
                inputs=inputs,
                required=required_variables,
            ))
len(tools)

15


3

In [156]:
tools[-1].to_dict()

{'name': 'b5284b2d-8cf0-4f20-912f-cc55f1959dec',
 'description': 'Scatter plot of the relationship between two numeric variables.  There are options to add a color variable, size variable, and/or facet (group by) variable.',
 'parameters': {'type': 'object',
  'properties': {'x_variable': {'type': 'string',
    'description': 'x axis that supports numeric columns',
    'enum': ['months_loan_duration',
     'amount',
     'percent_of_income',
     'years_at_residence',
     'age',
     'existing_loans_count',
     'dependents']},
   'y_variable': {'type': 'string',
    'description': 'y axis that supports numeric columns',
    'enum': ['months_loan_duration',
     'amount',
     'percent_of_income',
     'years_at_residence',
     'age',
     'existing_loans_count',
     'dependents']},
   'color_variable': {'type': 'string',
    'description': 'color axis that supports numeric, date, boolean, string, categorical columns',
    'enum': ['checking_balance',
     'months_loan_duration',
  

In [160]:
agent = OpenAIFunctions(
    model_name='gpt-3.5-turbo-1106',
    tools=tools,
)
formatted_colum_names = '\n'.join([f"{k}: {v}" for k, v in column_types.items()])
# prompt = "Plot the counts of checking_balance on the x axis and credit_history on the y axis."
prompt = "Plot the duration of the loan against the amount."
template = f"""
The user is asking to create a plot based on the following column names and types. Infer
the correct column names and the correct axes from the users question.
Choose a tool that uses all of the columns listed by the user. Prioritize the required columns.

Valid columns and types: 

{formatted_colum_names}

User question: {prompt}
"""
print(template)
response = agent(template)
response


The user is asking to create a plot based on the following column names and types. Infer
the correct column names and the correct axes from the users question.
Choose a tool that uses all of the columns listed by the user. Prioritize the required columns.

Valid columns and types: 

checking_balance: string
months_loan_duration: numeric
credit_history: string
purpose: string
amount: numeric
savings_balance: string
employment_duration: string
percent_of_income: numeric
years_at_residence: numeric
age: numeric
other_credit: string
housing: string
existing_loans_count: numeric
job: string
dependents: numeric
phone: string
default: string

User question: Plot the duration of the loan against the amount.



[(<llm_workflow.agents.Tool at 0xffff85335850>,
  {'x_variable': 'months_loan_duration', 'y_variable': 'amount'})]

In [161]:
tool, args = response[0]
print(tool.description)
print(tool.required)
print(args)
# tool.inputs|

Scatter plot of the relationship between two numeric variables.  There are options to add a color variable, size variable, and/or facet (group by) variable.
['x_variable', 'y_variable']
{'x_variable': 'months_loan_duration', 'y_variable': 'amount'}


In [162]:
print(f"Total Cost:            ${agent.history()[0].cost:.5f}")
print(f"Total Tokens:          {agent.history()[0].total_tokens:,}")
print(f"Total Prompt Tokens:   {agent.history()[0].input_tokens:,}")
print(f"Total Response Tokens: {agent.history()[0].response_tokens:,}")

Total Cost:            $0.00117
Total Tokens:          1,128
Total Prompt Tokens:   1,084
Total Response Tokens: 44


In [163]:
tool, args = response[1]
print(tool.description)
print(args)

IndexError: list index out of range