In [None]:
from concurrent.futures import ThreadPoolExecutor
from glob import glob
from pathlib import Path
from pprint import pprint
import threading


from dotenv import load_dotenv
from langchain.globals import set_llm_cache
from langchain_core.caches import InMemoryCache
import pandas as pd
import promptquality as pq
from tqdm.auto import tqdm
from llm_handler import LLMHandler

from IPython.display import clear_output

set_llm_cache(InMemoryCache())

load_dotenv("../.env")
# pq.login("console.demo.rungalileo.io")

In [None]:
llm_handler = LLMHandler()
model = "accounts/fireworks/models/deepseek-r1"
llm = llm_handler.get_llm(model, temperature=0.0, max_tokens=4000)
# llm.invoke("Hello, how are you?")

In [None]:
files = glob("../data/datasets/*.parquet")
print(len(files))
files

In [None]:
import json
import re
from pydantic import BaseModel
from openai import OpenAI
import os

from dotenv import load_dotenv
load_dotenv("../.env")
 
# Initialize the Fireworks client
client = OpenAI(
    base_url="https://api.fireworks.ai/inference/v1",
    api_key=os.getenv("FIREWORKS_API_KEY"),
)
 
 
# Define the output schema using Pydantic
class QAResult(BaseModel):
    question: str
    answer: str
 
 
# Prepare the user input
user_input = "Who wrote 'Pride and Prejudice'?"
 
# Construct the messages payload
messages = [{"role": "user", "content": user_input}]
 
# Make the API call to DeepSeek R1
response = client.chat.completions.create(
    model="accounts/fireworks/models/deepseek-r1",
    messages=messages,
    response_format={"type": "json_object", "schema": QAResult.model_json_schema()},
    max_tokens=1000,  # Adjust as needed to prevent truncation
)
 
# Extract the content of the response
response_content = response.choices[0].message.content
print(f"Response content: {response_content}")
 
# Use regular expressions to extract the reasoning and JSON parts.
# The reasoning is enclosed within <think>...</think> tags,
# and the JSON part follows the </think> tag.
reasoning_match = re.search(r"<think>(.*?)</think>", response_content, re.DOTALL)
json_match = re.search(r"</think>\s*(\{.*\})", response_content, re.DOTALL)
 
# Extract reasoning
reasoning = reasoning_match.group(1).strip()
 
# Extract JSON string
json_str = json_match.group(1).strip()
 
# Directly parse the JSON string into a Pydantic model
qa_result = QAResult.model_validate_json(json_str)
 
# Output the extracted reasoning and the parsed Pydantic model
print(f"\nReasoning: {reasoning}")
print(f"\nQAResult: {qa_result}")
 

In [None]:
import json
import re
from typing import TypedDict, Any, Union, List
from pydantic import BaseModel
from openai import OpenAI
import os
from dotenv import load_dotenv

load_dotenv("../.env")

# Tool call schema
class ToolCall(TypedDict):
    name: str
    args: dict[str, Any]

# Define possible response types using Pydantic
class TextResponse(BaseModel):
    text: str

class ToolCallResponse(BaseModel):
    tool_calls: List[ToolCall]

class CombinedResponse(BaseModel):
    response_type: str  # Either "text" or "tool_calls"
    content: Union[TextResponse, List[ToolCall]]

# Initialize the Fireworks client
client = OpenAI(
    base_url="https://api.fireworks.ai/inference/v1",
    api_key=os.getenv("FIREWORKS_API_KEY"),
)

def process_llm_response(user_input: str, response_type: str = "text") -> CombinedResponse:
    """
    Process LLM response with support for both text and tool calls
    
    Args:
        user_input: The user's question or command
        response_type: Either "text" or "tool_calls" to specify expected response format
    
    Returns:
        CombinedResponse object containing either text response or tool calls
    """
    # Construct the messages payload
    messages = [{"role": "user", "content": user_input}]
    
    # Set up the response schema based on the requested type
    if response_type == "text":
        schema = TextResponse.model_json_schema()
    else:
        schema = ToolCallResponse.model_json_schema()
    
    # Make the API call to DeepSeek R1
    response = client.chat.completions.create(
        model="accounts/fireworks/models/deepseek-r1",
        messages=messages,
        response_format={"type": "json_object", "schema": schema},
        max_tokens=1000,
    )
    
    # Extract the content of the response
    response_content = response.choices[0].message.content
    
    # Parse the response using regex
    reasoning_match = re.search(r"<think>(.*?)</think>", response_content, re.DOTALL)
    json_match = re.search(r"</think>\s*(\{.*\})", response_content, re.DOTALL)
    
    if not reasoning_match or not json_match:
        raise ValueError("Invalid response format from LLM")
    
    reasoning = reasoning_match.group(1).strip()
    json_str = json_match.group(1).strip()
    
    # Parse the JSON response based on the type
    if response_type == "text":
        content = TextResponse.model_validate_json(json_str)
    else:
        content = ToolCallResponse.model_validate_json(json_str)
        content = content.tool_calls  # Extract just the tool calls list
    
    return CombinedResponse(
        response_type=response_type,
        content=content
    )

# # Example usage
# if __name__ == "__main__":
#     # Example text response
#     text_question = "Who wrote 'Pride and Prejudice'?"
#     text_result = process_llm_response(text_question, "text")
#     print(f"\nText Response: {text_result}")
    
#     # Example tool call response
#     tool_question = "Calculate 15% of 85"
#     tool_result = process_llm_response(tool_question, "tool_calls")
#     print(f"\nTool Call Response: {tool_result}")

In [None]:
CombinedResponse.model_json_schema()

In [None]:
df = pd.read_parquet(files[0], engine="fastparquet")
df.head()

In [None]:
conversation = [{'role': 'user',
  'content': "I'm about to embark on a road trip adventure and I want my car to be in peak condition. Could you make sure to increase the current fuel level to ensure that my tank is full, so I don't have to keep stopping to refuel along the way?"},
 {'role': 'assistant', 'content': "['fillFuelTank(fuelAmount=35.0)']"},
 {'role': 'user',
  'content': "Before I hit the open road, I need to get the engine running smoothly. Can you confirm there's enough fuel, and ensure the engine's primed for a seamless start?"},
 {'role': 'assistant',
  'content': '["lockDoors(unlock=False, door=[\'driver\', \'passenger\', \'rear_left\', \'rear_right\'])", \'pressBrakePedal(pedalPosition=1.0)\', "startEngine(ignitionMode=\'START\')"]'},
 {'role': 'user',
  'content': "I want to make certain my tires are roadworthy before setting off. If any of my car's tires are showing pressure below 40, point me in the direction of the closest tire service station, because I definitely don't want to run into tire trouble."},
]
system_msg = {
            "role": "system",
            "content": 'Your job is to use the given tools to answer the query of human. If there is no relevant tool then reply with "I cannot answer the question with given tools". If tool is available but sufficient information is not available, then ask human to get the same. You can call as many tools as you want. Use multiple tools if needed. If the tools need to be called in a sequence then just call the first tool.',
        }


schema = CombinedResponse.model_json_schema()

system_msg = {
            "role": "system",
            "content": "Your job is to use the given tools to answer the query of human. If there is no relevant tool then reply with 'I cannot answer the question with given tools'. " + \
                       "If tool is available but sufficient information is not available, then ask human to get the same. You can call as many tools as you want. Use multiple tools if needed. " + \
                       "If the tools need to be called in a sequence then just call the first tool. The final response after thinking should **only** be a list of tools or single tool that you would call and or just a text message if not tool call is possible. " +
                       f"Here is the schema for the output which can be just text response or a list of tool call. \n{str(schema)}"
        }
system_msg

# write a function to append system message to the content of the first message of the conversation in the beginning
def append_system_message(conversation, system_msg, tools):
    conversation[0]["content"] = f"{system_msg['content']}\n\n{conversation[0]['content']}"
    # attach tools string to the last message at the end
    conversation[-1]["content"] = f"{conversation[-1]['content']} {tools}"
    return conversation
  
row = df.iloc[3]
print(append_system_message(conversation, system_msg, row.tools_langchain)[0]["content"])
print(append_system_message(conversation, system_msg, row.tools_langchain)[-1]["content"])

llm_handler = LLMHandler()
model = "accounts/fireworks/models/deepseek-r1"
# model = "deepseek-ai/DeepSeek-R1"

llm = llm_handler.get_llm(model, temperature=0.6, max_tokens=4000)

chain = llm
print([x["title"] for x in row.tools_langchain])
output = chain.invoke(append_system_message(row.conversation, system_msg, row.tools_langchain))

print(output.content)

Your job is to use the given tools to answer the query of human. If there is no relevant tool then reply with 'I cannot answer the question with given tools'. If tool is available but sufficient information is not available, then ask human to get the same. You can call as many tools as you want. Use multiple tools if needed. If the tools need to be called in a sequence then just call the first tool. The final response after thinking should **only** be a list of tools or single tool that you would call and or just a text message if not tool call is possible. Here is the schema for the output which can be just text response or a list of tool call. 
{'$defs': {'TextResponse': {'properties': {'text': {'title': 'Text', 'type': 'string'}}, 'required': ['text'], 'title': 'TextResponse', 'type': 'object'}, 'ToolCall': {'properties': {'name': {'title': 'Name', 'type': 'string'}, 'args': {'title': 'Args', 'type': 'object'}}, 'required': ['name', 'args'], 'title': 'ToolCall', 'type': 'object'}}, 

In [None]:
model = "meta-llama/Llama-3.3-70B-Instruct-Turbo"

llm = llm_handler.get_llm(model, temperature=0.6, max_tokens=4000)
chain = llm.bind_tools(row.tools_langchain)
print([x["title"] for x in row.tools_langchain])

output = chain.invoke([system_msg, *row.conversation])
print(output.content)

Using ChatTogether
['activateParkingBrake', 'adjustClimateControl', 'check_tire_pressure', 'displayCarStatus', 'display_log', 'estimate_distance', 'estimate_drive_feasibility_by_mileage', 'fillFuelTank', 'find_nearest_tire_shop', 'gallon_to_liter', 'get_current_speed', 'get_outside_temperature_from_google', 'get_outside_temperature_from_weather_com', 'get_zipcode_based_on_city', 'liter_to_gallon', 'lockDoors', 'pressBrakePedal', 'releaseBrakePedal', 'setCruiseControl', 'setHeadlights', 'set_navigation', 'startEngine', 'absolute_value', 'add', 'divide', 'imperial_si_conversion', 'logarithm', 'max_value', 'mean', 'min_value', 'multiply', 'percentage', 'power', 'round_number', 'si_unit_conversion', 'square_root', 'standard_deviation', 'subtract', 'sum_values']
<function=logarithm{"value": 10, "base": 20, "precision": 10}</function>
