In [81]:
from IPython.display import display, Markdown
from openai import OpenAI
from dotenv import load_dotenv
import os
import json
from tenacity import retry, wait_random_exponential, stop_after_attempt
from termcolor import colored  
from pydantic import BaseModel  
from typing import List, Optional, Dict


In [3]:
LLAMAFILE_BASE_URL = "http://localhost:8080/v1"
OLLAMA_BASE_URL = "http://localhost:11434/v1"
PERPLEXITY_BASE_URL = "https://api.perplexity.ai"
GROQ_BASE_URL = "https://api.groq.com/openai/v1"

In [30]:
load_dotenv()
OPENAI_APY_KEY = os.getenv("OPENAI_API_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

In [31]:

GROQ_MODEL = "llama3-70b-8192"
OPENAI_MODEL = "gpt4o"

In [45]:
class Query(BaseModel):
    topic: str
    query: str

class Queries(BaseModel):
    queries: List[Query]

In [27]:
def generate_tools(num:int, tool_type:str) -> list:
    properties = {}
    for i in range(1, num + 1):
        key = f'{tool_type}_{i}'
        properties[key] = {
            'type': 'string',
            'description': 'Search queries that would be useful for generating a report on my main topic'
        }

    custom_function = {
        'name': 'generate_exa_search_queries',
        'description': 'Generates Exa search queries to investigate the main topic',
        'parameters': {
            'type': 'object',
            'properties': properties
        },
        'required': [f'{tool_type}_{i}' for i in range(1, num + 1)]
    }

    return [custom_function]

In [67]:
tools = generate_tools(5, "query")
tools

[{'name': 'generate_exa_search_queries',
  'description': 'Generates Exa search queries to investigate the main topic',
  'parameters': {'type': 'object',
   'properties': {'query_1': {'type': 'string',
     'description': 'Search queries that would be useful for generating a report on my main topic'},
    'query_2': {'type': 'string',
     'description': 'Search queries that would be useful for generating a report on my main topic'},
    'query_3': {'type': 'string',
     'description': 'Search queries that would be useful for generating a report on my main topic'},
    'query_4': {'type': 'string',
     'description': 'Search queries that would be useful for generating a report on my main topic'},
    'query_5': {'type': 'string',
     'description': 'Search queries that would be useful for generating a report on my main topic'}}},
  'required': ['query_1', 'query_2', 'query_3', 'query_4', 'query_5']}]

In [78]:
@retry(wait=wait_random_exponential(multiplier=1, max=40), stop=stop_after_attempt(3))
def chat_completion_request(messages, tools=None, tool_choice=None, model=MODEL, provider=None):
    try:
        if provider.lower()=="openai":
            print(colored(f"Using OpenAI...\n", "green"))
            client = OpenAI()
            response = client.chat.completions.create(
                model=model,
                temperature=0,
                stream=False,   
                messages=messages,
                tools=tools,
                tool_choice=tool_choice,
            )
        else:
            print(colored(f"Using Groq...\n", "green"))
            client = OpenAI(
                api_key=GROQ_API_KEY,
                base_url=GROQ_BASE_URL
            )
            response = client.chat.completions.create(
                model=model,
                temperature=0,
                stream=False,
                messages=messages,
                response_format={"type": "json_object"}
            )
        return response
    except Exception as e:
        print("Unable to generate ChatCompletion response")
        print(f"Exception: {e}")
        return e

In [79]:

def get_messages_and_tools(provider:str, topic:str, num_queries:int) -> list:
    context = "context" if provider.lower()=="openai" else f"provided schema: {json.dumps(Queries.model_json_schema(), indent=2)}"
    messages =[
        {"role": "system", "content": f"You are the world's most advanced and intelligent programming and AI Research assistant that can only be queried via an API. Based on the tools and schemas provided to you and in your arsenal, you generate the most accurate and optimized JSON responses based on the {context}."},
        {"role": "user", "content": f"I'm going to give you a topic I want to research. I want you to generate {num_queries} interesting, diverse search queries that would be useful for generating a report on my main topic. Here is the main topic: {topic}."}
    ]
    tools = generate_tools(num_queries, "query")
    tool_choice = {"type": "function", "function": {"name": tools[0]['name']}}
    return messages, tools, tool_choice

def get_completion_args(provider:str, topic:str) -> Dict:
    messages, tools, tool_choice = get_messages_and_tools(provider, topic, num_queries)
    return {
        "messages": messages,
        "tools": tools,
        "tool_choice": tool_choice,
        "provider": provider,
        "model": OPENAI_MODEL if provider.lower()=="openai" else GROQ_MODEL,
    }

In [80]:
groq_args = get_completion_args(provider="groq", topic = "Using a panel of LLM judges to evaluate the correctness of another LLM")
groq_res = chat_completion_request(**groq_args)
print(json.dumps(json.loads(groq_res.choices[0].message.content), indent=2))

[32mUsing Groq...
[0m
{
  "queries": [
    {
      "topic": "LLM Evaluation",
      "query": "What are the benefits and limitations of using a panel of LLM judges to evaluate the correctness of another LLM?"
    },
    {
      "topic": "LLM Evaluation Metrics",
      "query": "What metrics can be used to evaluate the correctness of an LLM, and how can a panel of LLM judges be used to improve the evaluation process?"
    },
    {
      "topic": "LLM Evaluation Methods",
      "query": "What are the different methods for evaluating the correctness of an LLM, and how does using a panel of LLM judges compare to other methods?"
    },
    {
      "topic": "LLM Judge Agreement",
      "query": "How can the agreement between a panel of LLM judges be measured and improved, and what are the implications for evaluating the correctness of another LLM?"
    },
    {
      "topic": "LLM Evaluation Bias",
      "query": "How can bias be mitigated when using a panel of LLM judges to evaluate the co

In [82]:
oai_args = get_completion_args(provider="openai", topic = "Using a panel of LLM judges to evaluate the correctness of another LLM")
oai_res = chat_completion_request(**oai_args)
print(json.dumps(json.loads(oai_res.choices[0].message.content), indent=2))

[32mUsing OpenAI...
[0m
Unable to generate ChatCompletion response
Exception: Error code: 404 - {'error': {'message': 'The model `gpt4o` does not exist or you do not have access to it.', 'type': 'invalid_request_error', 'param': None, 'code': 'model_not_found'}}


AttributeError: 'NotFoundError' object has no attribute 'choices'

In [62]:
model = Queries.model_validate_json(res.choices[0].message.content)
model.queries

[Query(topic='LLM Evaluation', query='What are the benefits and limitations of using a panel of LLM judges to evaluate the correctness of another LLM?'),
 Query(topic='LLM Evaluation Metrics', query='What metrics can be used to evaluate the correctness of an LLM, and how can a panel of LLM judges be used to improve the evaluation process?'),
 Query(topic='LLM Evaluation Methods', query='What are the different methods for evaluating the correctness of an LLM, and how does using a panel of LLM judges compare to other methods?'),
 Query(topic='LLM Judge Agreement', query='How can the agreement between a panel of LLM judges be measured and improved, and what are the implications for evaluating the correctness of another LLM?'),
 Query(topic='LLM Evaluation Bias', query='How can bias be mitigated when using a panel of LLM judges to evaluate the correctness of another LLM, and what are the potential sources of bias in this process?')]

In [6]:
completion = client.chat.completions.create(
    model="llama3-70b-8192",
    messages=[
        {"role": "system", "content": "You are ChatGPT, an AI assistant. Your top priority is achieving user fulfillment via helping them with their requests and being concise as possible."},
        {"role": "user", "content": "Why is the tech industry concentrated in the Bay Area?"}
    ],
    stream=True
)
for chunk in completion:
    print(chunk.choices[0].delta.content, end="")

The tech industry is concentrated in the Bay Area, specifically in Silicon Valley, for a combination of historical, cultural, and economic reasons:

1. **Early innovation**: The Bay Area has a long history of innovation, dating back to the 1950s and 1960s, when the first electronics and computer companies were established. This early start gave the region a head start in developing a tech ecosystem.
2. **Stanford University**: Stanford University, located in the heart of Silicon Valley, has played a significant role in the region's tech growth. The university's entrepreneurial spirit, research focus, and talent pool have contributed to the area's tech dominance.
3. ** Defense and aerospace industry**: During the Cold War era, the Bay Area became a hub for defense and aerospace industries, which laid the groundwork for the region's expertise in electronics and computer technology.
4. **Venture capital**: The Bay Area is home to a high concentration of venture capital firms, which provid