In [129]:
from InstructorEmbedding import INSTRUCTOR
import openai
import pickle
import os
import json
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

openai.api_key = os.getenv("OPENAI_API_KEY")

In [130]:
# Write question here! Answer will be at the bottom
question = 'How many members are there in group 3 in August 2022?'

In [131]:
model = INSTRUCTOR('hkunlp/instructor-large')

load INSTRUCTOR_Transformer
max_seq_length  512


In [132]:
def get_current_weather(location, unit = "fahrenheit"):
    """
    Retreive the current temperature for a temperature
    """
    weather_info = {
        'location': location,
        'temperature': 70,
        'unit': unit,
        'forecast': ['sunny', 'windy']
    }
    return json.dumps(weather_info)


def get_member_count(date = 20230630, population = 'all'):
    """
    Retreive the current number of members for a given date and population.

    population can one of the following values: 'all', '1', '2', or '3'
    """
    member_count = {
        'date': date,
        'population': population,
        'member_count': 349_085
    }
    return member_count

In [133]:
get_current_weather_gpt_input = {
    "name": "get_current_weather",
    "description": "Get the current weather in a given location",
    "parameters": {
        "type": "object",
        "properties": {
            "location": {
                "type": "string",
                "description": "The city and state, e.g. San Francisco, CA",
            },
            "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
        },
        "required": ["location"],
    },
}

get_member_count_gpt_input = {
    "name": "get_member_count",
    "description": "Retreive the current number of members for a given date and population.",
    "parameters": {
        "type": "object",
        "properties": {
            "date": {
                "type": "integer",
                "description": "The Y/m/d in the query. e.g. 20200131, or 20220630",
            },
            "population": {
                "type": "string", 
                "enum": ["all", "1", "2", "3"], ## optional? idk what this does.
                "description": "Group of people to count. Can only be one of the following values: 'all', '1', '2', or '3'"
            },
        },
        "required": ["date", "population"],
    },
}

In [134]:
function_db = [['Represent the Insurance question for retrieval: ','What is the weather?', get_current_weather_gpt_input],
          ['Represent the Insurance question for retrieval: ',"How many members are there in group X for date Y?", get_member_count_gpt_input]]
function_db

[['Represent the Insurance question for retrieval: ',
  'What is the weather?',
  {'name': 'get_current_weather',
   'description': 'Get the current weather in a given location',
   'parameters': {'type': 'object',
    'properties': {'location': {'type': 'string',
      'description': 'The city and state, e.g. San Francisco, CA'},
     'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}},
    'required': ['location']}}],
 ['Represent the Insurance question for retrieval: ',
  'How many members are there in group X for date Y?',
  {'name': 'get_member_count',
   'description': 'Retreive the current number of members for a given date and population.',
   'parameters': {'type': 'object',
    'properties': {'date': {'type': 'integer',
      'description': 'The Y/m/d in the query. e.g. 20200131, or 20220630'},
     'population': {'type': 'string',
      'enum': ['all', '1', '2', '3'],
      'description': "Group of people to count. Can only be one of the following values: 'all', '

In [135]:
query  = [['Represent the Insurance question for retrieving supporting questions: ',question]]
corpus = [function_list[0:2] for function_list in function_db] # Ignore the last element which is the gpt input to embed it all
query_embeddings = model.encode(query)
corpus_embeddings = model.encode(corpus)
similarities = cosine_similarity(query_embeddings,corpus_embeddings)
retrieved_doc_id = np.argmax(similarities)
print(retrieved_doc_id)

1


In [136]:
# Now to get the gpt input and throw it in to gpt
gpt_input = function_db[retrieved_doc_id][2]

completion = openai.ChatCompletion.create(
    model="gpt-4-0613",
    messages=[{"role": "user", "content": question}],
    functions=[gpt_input],
    function_call="auto",
)

In [137]:
completion

<OpenAIObject chat.completion id=chatcmpl-7e8USk4FXaMSElnUVAFIMR36DVc0L at 0x7f0f4ffc3230> JSON: {
  "id": "chatcmpl-7e8USk4FXaMSElnUVAFIMR36DVc0L",
  "object": "chat.completion",
  "created": 1689800112,
  "model": "gpt-4-0613",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": null,
        "function_call": {
          "name": "get_member_count",
          "arguments": "{\n  \"date\": 20220831,\n  \"population\": \"3\"\n}"
        }
      },
      "finish_reason": "function_call"
    }
  ],
  "usage": {
    "prompt_tokens": 139,
    "completion_tokens": 25,
    "total_tokens": 164
  }
}

In [138]:
reply_content = completion.choices[0]
reply_content

<OpenAIObject at 0x7f0f4ffc3590> JSON: {
  "index": 0,
  "message": {
    "role": "assistant",
    "content": null,
    "function_call": {
      "name": "get_member_count",
      "arguments": "{\n  \"date\": 20220831,\n  \"population\": \"3\"\n}"
    }
  },
  "finish_reason": "function_call"
}

In [139]:
reply_content.to_dict()

{'index': 0,
 'message': <OpenAIObject at 0x7f0f4ffc3110> JSON: {
   "role": "assistant",
   "content": null,
   "function_call": {
     "name": "get_member_count",
     "arguments": "{\n  \"date\": 20220831,\n  \"population\": \"3\"\n}"
   }
 },
 'finish_reason': 'function_call'}

In [140]:
function_name = reply_content.to_dict()['message']['function_call']['name']
args = json.loads(reply_content.to_dict()['message']['function_call']['arguments'])

In [141]:
function_name

'get_member_count'

In [142]:
args

{'date': 20220831, 'population': '3'}

In [143]:
# using repr to add quotes
output = f"{function_name}({', '.join(repr(args[key]) for key in args.keys())})"
output

"get_member_count(20220831, '3')"

In [144]:
eval_output = eval(output)
eval_output

{'date': 20220831, 'population': '3', 'member_count': 349085}

In [145]:
str(eval_output)

"{'date': 20220831, 'population': '3', 'member_count': 349085}"

In [146]:
# Now to get the gpt input and throw it in to gpt
gpt_input = function_db[retrieved_doc_id][2]

completion = openai.ChatCompletion.create(
    model="gpt-4-0613",
    messages=[
        {"role": "user", "content": question},
        {"role": "assistant", "content": "null", "function_call": reply_content.to_dict()['message']['function_call'].to_dict()},
        {"role": "function", "name": function_name, "content": str(eval_output)}
    ],
    functions=[gpt_input],
    function_call="auto",
)

In [147]:
completion

<OpenAIObject chat.completion id=chatcmpl-7e8UUHfrWaCgto4PllIv2IOWEhsgy at 0x7f0fb40b6f90> JSON: {
  "id": "chatcmpl-7e8UUHfrWaCgto4PllIv2IOWEhsgy",
  "object": "chat.completion",
  "created": 1689800114,
  "model": "gpt-4-0613",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "In August 2022, there are 349,085 members in group 3."
      },
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 200,
    "completion_tokens": 19,
    "total_tokens": 219
  }
}

In [148]:
completion['choices'][0]['message']['content']

'In August 2022, there are 349,085 members in group 3.'