# Testing models

To find the best model for our purpose, we decided to test a handful of the most common models on the market. 

We are testing these six different models from Anthropic, OpenAI and Google. 

- `claude-3-opus-20240229`
- `claude-3-sonnet-20240229`
- `claude-3-haiku-20240307`
- `gpt-3.5-turbo-1106`
- `gpt-4-1106-preview`
- `gemini-pro`

### Step 1: Load in the necessary libraries

In [None]:
import pandas as pd
import json
import os
from dotenv import load_dotenv
import psycopg2
import requests

import anthropic
import openai
from openai import OpenAI
import google.generativeai as genai
import rich

from langchain.evaluation import JsonValidityEvaluator
from pydantic import BaseModel, Field
from guardrails import Guard
from guardrails.hub import ValidLength
from rich import print
import guardrails as gd
from typing import Optional

from tqdm.notebook import tqdm
tqdm.pandas()

### Step 2: Connect to the database and pull data

In [None]:
db_password = os.getenv("COMMONS_DB_PASSWORD")

In [None]:
conn = psycopg2.connect(
    f"postgresql://doadmin:{db_password}@commons-database-do-user-15654205-0.c.db.ondigitalocean.com:25060/commons?sslmode=require"
)

cursor = conn.cursor()

Get a random sample of 200 comments from the database for testing.

In [None]:
cursor.execute(
    f"""SELECT comment_id, docket_id, agency_id, comment, comment_pdf_extracted, commenter_first_name, commenter_last_name, commenter_organization, commenter_address1, commenter_address2, commenter_zip, commenter_city, commenter_state_province_region, commenter_country, commenter_email, receive_date, posted_date, postmark_date, api_url, attachment_read, attachment_url, duplicate_comments, withdrawn, title, full_text from COMMENTS
               ORDER BY RANDOM()
               LIMIT 200;"""
)
colnames = [desc[0] for desc in cursor.description]
result = cursor.fetchall()
cursor.close()

And turn it into a dataframe

In [None]:
df = pd.DataFrame(result, columns=colnames)

### Step 3: Define the models

But first we need to load the api keys.

In [None]:
openai.organization = None
openai.api_key = os.getenv("OPENAI_API_KEY")
anthropic_key = os.getenv("ANTHROPIC_API_KEY")
gemini_key = os.getenv("GEMINI_API_KEY")

Create three columns to keep check of the models.

In [None]:
df['ai_process_gpt-3.5-turbo-1106'] = False
df['ai_process_gpt-4-1106-preview'] = False
df['ai_process_claude-3-opus-20240229'] = False
df['ai_process_claude-3-sonnet-20240229'] = False
df['ai_process_claude-3-haiku-20240307'] = False
df['ai_process_gemini-1.0-pro'] = False

This is the json output template that guardrails will use to check the output of the code.

In [None]:
val = ValidLength(min=5, on_fail="reask")

class Comment(BaseModel):
    ai_first_name: Optional[str] = (Field(default=None, description="The commenter's first name"))
    ai_middle_name: Optional[str] = (Field(default=None, description="The commenter's middle name or initial"))
    ai_last_name: Optional[str] = (Field(default=None, description="The commenter's last name"))
    ai_email: Optional[str] = (Field(default=None, description="The commenter's email address, if mentioned"))
    ai_phone: Optional[str] = (Field(default=None, description="The commenter's phone number, if mentioned"))
    ai_address: Optional[str] = (Field(default=None, description="Mailing address of the commenter"))
    ai_city: Optional[str] = (Field(default=None, description="City of the commenter"))
    ai_state: Optional[str] = (Field(default=None, description="State of the commenter e.g. MA for Massachusetts, MD for Maryland or PA for Pennsylvania."))
    ai_zip: Optional[str] = (Field(default=None, description="Zipcode of the commenter"))
    ai_country: Optional[str] = (Field(default=None, description="Country of the commenter"))
    ai_job_title: Optional[str] = (Field(default=None, description="Job title of the commenter"))
    ai_org: Optional[str] = (Field(default=None, description="Organization of the commenter"))
    ai_summary: str = Field(description="A short two sentence summary of main points of the comment",
                            validators=[val])

In [None]:
system_prompt: str = """
Read the following public comment on a federal regulation and extract ALL of the relevant information, 
including the commenter's first name, middle name, last name, email adress, phone number, address, city, 
state, zipcode, country, job title, affiliated organization, and a short 2 sentences long summary of what the 
commenter is saying. The summary cannot be empty or the string 'None'. If any of the information can't be derived, 
you must return null and nothing else.

Example output format 1: 
'{"ai_first_name": "Mary",
"ai_middle_name": "C.",
"ai_last_name": "Smith",
"ai_email": "msmith@gmail.com", 
"ai_phone": "913-593-4889",
"ai_address": "1604 Grand Ave.",
"ai_city": "St. Paul",
"ai_state": "MN",
"ai_zip": "55105",
"ai_country": "United States",
"ai_job_title": "High school teacher",
"ai_org": "Seattle Public Schools",
"ai_summary": "The proposed regulation will help protect the environment and help keep the air clean for her five children. The administration must make an effort to pass this as soon as possible."}'

Example output format 2:
'{"ai_first_name": "David",
"ai_middle_name": "James",
"ai_last_name": "Roberts",
"ai_email": "david_roberts@hotmail.com", 
"ai_phone": null,
"ai_address": null,
"ai_city": "Sioux Falls",
"ai_state": "SD",
"ai_zip": null,
"ai_country": "United States",
"ai_job_title": "Mechanic",
"ai_org": null,
"ai_summary": "This regulation will hurt small businesses and make it harder for people to get to work. The administration should not pass this regulation."}'

Example output format 3:
'{"ai_first_name": "Hanna",
"ai_middle_name": null,
"ai_last_name": "Chen",
"ai_email": "bdfarms@aol.com", 
"ai_phone": "785-551-2009",
"ai_address": "490 Del Matro Ave.",
"ai_city": "Windsor Heights",
"ai_state": "IA",
"ai_zip": "50324",
"ai_country": "United States",
"ai_job_title": "Organizer",
"ai_org": "Natural Resources Defense Council",
"ai_summary": "The commenter expresses their love for the state and the environment. They say their family will continue to live in the state."}'

Do not use the examples provided in the prompt to fill in the fields.

Comment: ${comment}

${gr.complete_json_suffix_v2}
"""

### Step 4: Run the models

#### Anthropic Model `Claude-3`

Here we are testing three versions of Anthropic's new model `Claude-3`. They are:

- `Claude 3 Opus`: the most intelligent and capable model.

- `Claude 3 Sonnet`: strikes the balance between intelligence and speed—particularly for high-volume tasks.

- `Claude 3 Haiku`: the fastest and most affordable model.

(source: Email Laura got from Anthropic on March 17th, 2024)

In [None]:
anthropic_models = ["claude-3-opus-20240229", "claude-3-sonnet-20240229", "claude-3-haiku-20240307"]

In [None]:
client = anthropic.Anthropic(api_key=anthropic_key)

In [None]:
def make_claude_request(prompt: str, max_tokens: int, engine: str, **kwargs) -> str:
    """Claude LLM API wrapper.

    Args:
        prompt (str): The prompt to be passed to the LLM API
        engine (str): The name of the model to be used
        max_tokens (int): The token limit to be passed to the LLM API
        **kwargs: Any additional arguments to be passed to the LLM API

    Returns:
        str: The output of the LLM API
    """

    message = client.messages.create(
        max_tokens=max_tokens,
        model=engine,
        messages=[{"role": "user", "content": prompt}],
        **kwargs
    )

    return message.content[0].text

In [None]:
def ask_anthropic(comment: str, claude_model: str ):
    
    guard = Guard.from_pydantic(output_class=Comment, prompt=system_prompt)

    try:
        raw_llm_output, validated_output, *rest = guard(
            llm_api=make_claude_request,
            engine= claude_model,
            prompt_params={"comment": comment},
            max_tokens=1024,
            temperature=0
        )
    
    except Exception as e:
        rich.print(guard.history.last.tree)
        raise e

    return validated_output

# run it on the df column full text and save the results in a new column

for claude_model in anthropic_models:
    df[f'ai_response_{claude_model}'] = df['full_text'].progress_apply(lambda x: ask_anthropic(comment=x, claude_model=claude_model))

#### OpenAI Model `GPT-3.5` and `GPT-4`

In [None]:
gpt_models = ["gpt-3.5-turbo-1106", "gpt-4-1106-preview"]

In [None]:
# Function that asks GPT for the information
def ask_gpt(prompt, comment, gpt_model):
    guard = Guard.from_pydantic(output_class=Comment, prompt=prompt + comment)

    raw_llm_output, validated_output, *rest = guard(
        llm_api = openai.chat.completions.create,
            model=gpt_model,
            response_format={"type": "json_object"},
            temperature=0
        )

    evaluator = JsonValidityEvaluator()
    output = validated_output
    prediction = json.dumps(output)
    result = evaluator.evaluate_strings(prediction=prediction)

    if result['score'] == 1:
        prediction = json.loads(prediction)
        df[f'ai_process_{gpt_model}'] = True
        return prediction
    else:
        print("Generated response is not valid JSON.")
        return None

In [None]:
for gpt_model in gpt_models:
    def process_row(row):
        if not row[f'ai_process_{gpt_model}']:
            row[f'ai_response_{gpt_model}'] = ask_gpt(system_prompt, row['full_text'], gpt_model)
            row[f'ai_process_{gpt_model}'] = True  # Assuming ask_gpt sets ai_process to True
        return row

    rounds = 0

    while not df[f'ai_process_{gpt_model}'].all() and rounds < 10:
        df = df.apply(process_row, axis=1)
        rounds += 1
        print(f"Round {rounds} complete")
    
    print(F"Done with {gpt_model}")

#### Google Model `Gemini`

In [None]:
genai.configure(api_key = gemini_key)

In [None]:
safety_settings = [
  {
    "category": "HARM_CATEGORY_HARASSMENT",
    "threshold": "BLOCK_NONE"
  },
  {
    "category": "HARM_CATEGORY_HATE_SPEECH",
    "threshold": "BLOCK_NONE"
  },
  {
    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
    "threshold": "BLOCK_NONE"
  },
  {
    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "threshold": "BLOCK_NONE"
  },
]

In [None]:
def make_gemini_request(system_prompt: str, max_tokens: int = 2048, temperature: float = 0, **kwargs) -> str:
    """Gemini LLM API wrapper.

    Args:
        prompt (str): The prompt to be passed to the LLM API
        **kwargs: Any additional arguments to be passed to the LLM API

    Returns:
        str: The output of the LLM API
    """
    gen_config = genai.types.GenerationConfig(
        max_output_tokens=max_tokens,
        temperature=temperature,
        **kwargs)
    
    model = genai.GenerativeModel('gemini-pro', generation_config=gen_config, safety_settings=safety_settings)

    try:
        response = model.generate_content(
            system_prompt,
            generation_config=gen_config
        )

        print(response)
        return response.text
    except Exception as e:
        print(e)
        print(response.prompt_feedback)
        return None


In [None]:
def ask_gemini(comment):
    guard = Guard.from_pydantic(output_class=Comment, prompt=system_prompt)
    raw_llm_output, validated_output, *rest = guard(
        llm_api=make_gemini_request,
        prompt_params={"comment": comment},
        max_tokens=2048,
        temperature=0,
    )

    return validated_output   

In [None]:
df['ai_response_gemini-1.0-pro'] = df['full_text'].progress_apply(lambda x: ask_gemini(comment=x))

### Structure the output

In [None]:
def extract_ai_response(response, key):
    try:
        # Replace all strings with null with None
        response = {k: None if v == "null" else v for k, v in response.items()}
        # Replace all True with None
        response = {k: None if v == True else v for k, v in response.items()}
        return response[key]
    except:
        return None

In [None]:
models = ["claude-3-opus-20240229", "claude-3-sonnet-20240229", "claude-3-haiku-20240307", "gpt-3.5-turbo-1106", "gpt-4-1106-preview", "gemini-1.0-pro"]

In [None]:
for model in models:

    ai_response_list = df[f'ai_response_{model}'].tolist()

    #keys = df[f"ai_response_{gpt_model}"].iloc[0].keys()
    keys = ['ai_first_name', 'ai_middle_name', 'ai_last_name', 'ai_email', 
            'ai_phone', 'ai_address', 'ai_city', 'ai_state', 'ai_zip', 
            'ai_country', 'ai_job_title', 'ai_org', 'ai_summary']
    
    for key in keys:
        df[f"{key}_{model}"] = df[f"ai_response_{model}"].progress_apply(lambda x: extract_ai_response(x, key))

In [None]:
df.to_csv('output.csv', index=False)