In [None]:
import os
from dotenv import load_dotenv

load_dotenv()

True

In [None]:
api_key = os.getenv("GROQ_API_KEY_NEW")

## Extracting structured data

In [3]:
import instructor
from groq import Groq
from pydantic import BaseModel

MODEL = "llama3-8b-8192"

class UserInfo(BaseModel):
    name: str
    age: str
    email: str
    
text = """
John Doe, a 35 year old engineer from New York has been working with Large Language Model for several yeras. His email address is johndoe@gmail.com You can contact him anytime."""

client = instructor.from_groq(
    Groq(api_key=api_key),
    mode=instructor.Mode.JSON
)

user_info = client.chat.completions.create(
    model=MODEL,
    response_model=UserInfo,
    messages=[
        {"role": "system", "content": "Your job is to extract user information from the given text."},
        {"role": "user", "content": text}
    ],
    temperature=0.65
)

print(user_info)
print(user_info.name, user_info.age, user_info.email)

name='John Doe' age='35' email='johndoe@gmail.com'
John Doe 35 johndoe@gmail.com


## Generating synthetic data

In [None]:
from pprint import pprint

import instructor
from pydantic import BaseModel, Field

prompt = """I am designing a weather agent. This agent can talk to the user and also fetch latest weather information.
It has access to the `get_weather_info` tool with the following JSON schema:
{json_schema}

I want you to write some examples for `get_weather_info` and see if this functionality works correctly and can handle all the cases. 
Now given the information so far and the JSON schema of the provided tool, write {num} examples.
Make sure each example is varied enough to cover common ways of requesting for this functionality.
Make sure you fill the function parameters with the correct types when generating the output examples. 
Make sure your output is valid JSON.
"""

In [None]:
class Example(BaseModel):
    input_query: str = Field(description="The example text")
    tool_name: str = Field(description="The name of the tool")
    tool_parameters: str = Field(description="An object containing the key & value pairs for the parameters of this tool as a valid JSON serializable string, make sure it is a valid JSON and parameter values are of the correct type accoording to the tool schema.")

In [None]:
class ResponseModel(BaseModel):
    examples: list[Example]

In [None]:
tool_schema = {
    "name": "get_weather_info",
    "description": "Get the weather information for any location.",
    "parameters": {
        "type": "object",
        "properties": {
            "location": {
                "type": "string",
                "description": "The location for which we want to get the weather information (e.g. New York)"
            }
        },
        "required": ["location"]
    }
}

In [None]:
MODEL = "llama3-70b-8192"

In [18]:
def get_weather_info(location: str):
    """get the weather info for a location from a list"""
    weather_info = {
        "New York": "It's 30 degrees in New York with a chance of rain.",
        "Los Angeles": "It's 25 degrees in Los Angeles with clear skies.",
        "Chicago": "It's 20 degrees in Chicago with a chance of snow."
    }
    return weather_info.get(location, "Weather info not found for now.")

In [None]:
client = instructor.from_groq(
    Groq(api_key=api_key),
    mode=instructor.Mode.JSON
)

response = client.chat.completions.create(
    model = MODEL,
    response_model = ResponseModel,
    messages = [
        {"role": "system", "content": prompt.format(json_schema=tool_schema, num=5)}
    ],
    temperature=0.65,
    max_tokens=5000
)

pprint(response.examples)

[Example(input_query='', tool_name='get_weather_info', tool_parameters='{"location": "New York"}'),
 Example(input_query='', tool_name='get_weather_info', tool_parameters='{"location": "Los Angeles"}'),
 Example(input_query='', tool_name='get_weather_info', tool_parameters='{"location": "London"}'),
 Example(input_query='', tool_name='get_weather_info', tool_parameters='{"location": "Paris"}'),
 Example(input_query='', tool_name='get_weather_info', tool_parameters='{"location": "Tokyo"}')]


In [20]:
import json

location = [x.tool_parameters for x in response.examples]
location = [json.loads(x) for x in location]
location = [x["location"] for x in location]

for loc in location:
    print(f"Weather for {loc}: {get_weather_info(loc)}")

Weather for New York: It's 30 degrees in New York with a chance of rain.
Weather for Los Angeles: It's 25 degrees in Los Angeles with clear skies.
Weather for London: Weather info not found for now.
Weather for Paris: Weather info not found for now.
Weather for Tokyo: Weather info not found for now.
