# Performance comparision between FastAssert and the OpenAI API

## Installation and Setup

In [1]:
!pip install requests openai



In [53]:
import os

# Setup the OpenAI client
from openai import OpenAI

client = OpenAI(
    # This is the default and can be omitted
    api_key=os.environ.get("OPENAI_API_KEY"),
)

def openai_constrained_generation(prompt: str, json_schema, model_name="gpt-3.5-turbo"):

    tools = [
      {
        "type": "function",
        "function": {
          "name": "get_current_weather",
          "description": "Get the current weather in a given location",
          "parameters": {
            "type": "object",
            "properties": {
              "location": {
                "type": "string",
                "description": "The city and state, e.g. San Francisco, CA",
              },
              "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
            },
            "required": ["location", "unit"],
          },
        }
      }
    ]
    messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
    completion = client.chat.completions.create(
      model=model_name,
      messages=messages,
      tools=tools,
      tool_choice={"type": "function", "function": {"name": "get_current_weather"}}
    )

    return completion.choices[0].message.tool_calls[0].function.arguments

In [54]:
openai_constrained_generation("What's the weather like in Boston today?", json_ob)

'{\n  "location": "Boston, MA",\n  "unit": "celsius"\n}'

## Connect and test our FastAssert server
Make sure the docker container is running and can serve requests.

In [28]:
import requests
import time

BASE_URL = "http://127.0.0.1:8000"

# Ping the server
ping_response = requests.get(f"{BASE_URL}/health")

# Check if the request was successful
if ping_response.status_code == 200:
    print("Server is up and running!")
else:
    raise ValueError(f"Failed to reach the server, status code: {ping_response.status_code}")

Server is up and running!


In [29]:
# Create the test 
def constrained_generation(prompt: str, json_schema):

    url = f"{BASE_URL}/generate"
    payload = {
        "prompt": prompt,
        "schema": json_schema
    }

    response = requests.post(url, json=payload)

    return response.text

def time_constrained_generation(prompt: str, json_schema):

    start_time = time.time()
    constrained_generation(prompt, json_schema)
    end_time = time.time()

    return end_time - start_time

In [30]:
json_ob = {
  "type": "object",
  "properties": {
    "location": {
      "type": "string",
      "description": "The city and state, e.g., San Francisco, CA"
    },
    "unit": {
      "type": "string",
      "enum": ["celsius", "fahrenheit"],
      "description": "The temperature unit"
    }
  },
  "required": ["location", "unit"],
  "additionalProperties": False
}

constrained_generation("What's the weather like in Boston today?", json_ob)

'{"text":["What\'s the weather like in Boston today?{\\n\\n\\"location\\": \\"Boston, MA\\",\\n\\n\\"unit"]}'

In [33]:
time_constrained_generation("What's the weather like in Boston today?", json_ob)

0.2996985912322998

## Let's benchmark the two methods

In [52]:
inputs = [{
            "location": "Paris", 
            "unit": "celsius"
          },
          {
            "location": "Sydney",
            "unit": "celsius"
          },
          {
            "location": "Berlin",
            "unit": "celsius"
          },
          {
            "location": "Moscow",
            "unit": "celsius"
          },
          {
            "location": "Rio de Janeiro",
            "unit": "celsius"
          },
          {
            "location": "Toronto",
            "unit": "fahrenheit"
          },
          {
            "location": "Mumbai",
            "unit": "celsius"
          },
          {
            "location": "New York, NY",
            "unit": "fahrenheit"
          },
          {
            "location": "London",
            "unit": "celsius"
          },
          {
            "location": "Tokyo",
            "unit": "celsius"
          }]

In [56]:
openai_gpt3_5_times = []
openai_gpt4_times = []
fastassert_times = []

for row in inputs:
    # Time OpenAI gpt-3.5-turbo
    start_time = time.time()
    answer = openai_constrained_generation(f"What's the weather like in {row.get('location')} today?", json_ob)
    end_time = time.time()
    openai_gpt3_5_times.append(end_time - start_time)

    # Time OpenAI gpt-4
    start_time = time.time()
    answer = openai_constrained_generation(f"What's the weather like in {row.get('location')} today?", json_ob, model_name="gpt-4")
    end_time = time.time()
    openai_gpt4_times.append(end_time - start_time)

    # Time FastAssert
    start_time = time.time()
    answer = constrained_generation(f"What's the weather like in {row.get('location')} today?", json_ob)
    end_time = time.time()
    fastassert_times.append(end_time - start_time)

# Calculate the means and standard deviations


In [58]:
import statistics

# Assuming inputs are defined and the above loop code has been executed

# Calculate mean and standard deviation for OpenAI GPT-3.5 times
mean_gpt3_5 = statistics.mean(openai_gpt3_5_times)
std_dev_gpt3_5 = statistics.stdev(openai_gpt3_5_times)

# Calculate mean and standard deviation for OpenAI GPT-4 times
mean_gpt4 = statistics.mean(openai_gpt4_times)
std_dev_gpt4 = statistics.stdev(openai_gpt4_times)

# Calculate mean and standard deviation for FastAssert times
mean_fastassert = statistics.mean(fastassert_times)
std_dev_fastassert = statistics.stdev(fastassert_times)

# Print the results
print(f"OpenAI GPT-3.5 Mean: {mean_gpt3_5} s, Standard Deviation: {std_dev_gpt3_5}")
print(f"OpenAI GPT-4 Mean: {mean_gpt4} s, Standard Deviation: {std_dev_gpt4}")
print(f"FastAssert Mean: {mean_fastassert} s, Standard Deviation: {std_dev_fastassert}")

OpenAI GPT-3.5 Mean: 0.9235001802444458 s, Standard Deviation: 0.3361904433780215
OpenAI GPT-4 Mean: 1.4428787231445312 s, Standard Deviation: 0.37582730624512495
FastAssert Mean: 0.3033578395843506 s, Standard Deviation: 0.005584525695486718
