# Imports and Loads

In [None]:
import os 
import pandas as pd 
import numpy as np 
import nest_asyncio
nest_asyncio.apply()
from dotenv import load_dotenv
load_dotenv()

## Export API Keys

In [2]:
os.environ["WANDB_API_KEY"]     = os.getenv("WANDB_CASPIA_API_KEY")
os.environ["OPENAI_API_KEY"]    = os.getenv("OPENAI_API_KEY")
os.environ["GROQ_API_KEY"]      = os.getenv("GROQ_API_KEY")
os.environ["HF_TOKEN"]          = os.getenv("HF_CASPIA_API_KEY")
os.environ['TOGETHER_API_KEY']  = os.getenv('TOGETHER_CASPIA_API_KEY')
os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_CASPIA_API_KEY')

## Load Data from Huggingface Hub

In [None]:
from datasets import load_dataset

data = load_dataset('caspia-technologies/benchmarking_rtl_svx')
df_train = data['train'].to_pandas()

In [None]:
df_train.shape

In [None]:
df_train.head(3)

In [None]:
import pprint 
import json 

random_example = json.loads(df_train['parsed_description'][0])
random_example_code = df_train['code'][0]
pprint.pprint(random_example)

In [7]:
#transform this to a dataframe with 6 columns, 2 for each block, one for the system and one for the prompt
df_train_parsed = pd.DataFrame()
df_train_parsed['system_block_summary'] = df_train['parsed_description'].apply(lambda x: json.loads(x)['block_summary']['system'])
df_train_parsed['prompt_block_summary'] = df_train['parsed_description'].apply(lambda x: json.loads(x)['block_summary']['prompt'])
df_train_parsed['system_detailed_global_summary'] = df_train['parsed_description'].apply(lambda x: json.loads(x)['detailed_global_summary']['system'])
df_train_parsed['prompt_detailed_global_summary'] = df_train['parsed_description'].apply(lambda x: json.loads(x)['detailed_global_summary']['prompt'])
df_train_parsed['system_high_level_global_summary'] = df_train['parsed_description'].apply(lambda x: json.loads(x)['high_level_global_summary']['system'])
df_train_parsed['prompt_high_level_global_summary'] = df_train['parsed_description'].apply(lambda x: json.loads(x)['high_level_global_summary']['prompt'])
df_train_parsed['code'] = df_train['code']

In [None]:
df_train_parsed.head(3)

## Do Inference

Model List:
- Together: meta-llama/Llama-3.3-70B-Instruct-Turbo-Free
- Huggingface: huggingface:Qwen/Qwen2.5-72B-Instruct
- Anthropic: anthropic:claude-3-5-sonnet-20240620

Let's do it for the first example

In [9]:
def get_example(index):
    first_example = json.loads(df_train['parsed_description'][index])
    first_code = df_train['code'][index]
    return first_example, first_code

AI Suite Inference

In [None]:
import time
import weave
import tiktoken
from datetime import datetime
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type

def count_tokens(text: str) -> int:
    """Helper function to count tokens for a given text"""
    encoding = tiktoken.encoding_for_model('gpt-4o')
    return len(encoding.encode(text))

class LLMRequestError(Exception):
    pass

# Initialize Weave project
weave.init('SVx')

@weave.op()
def track_token_metrics(messages, response):
    """Track token usage and other metrics for the LLM call"""
    # Count input tokens
    input_tokens = sum(count_tokens(message['content']) for message in messages)
    
    # Count output tokens
    output_text = response.choices[0].message.content
    output_tokens = count_tokens(output_text)
    
    return {
        'input_tokens': input_tokens,
        'output_tokens': output_tokens,
        'total_tokens': input_tokens + output_tokens,
    }

@weave.op()
@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=2, max=10), #exponential backoff
    retry=retry_if_exception_type(Exception),
    before_sleep=lambda retry_state: print(f"Attempt {retry_state.attempt_number} failed. Retrying...")
)
def make_llm_request(client, model, messages):
    """Make LLM request with automatic input/output tracking"""
    try:
        # Make the API call
        response = client.chat.completions.create(
            model=model,
            messages=messages,
            temperature=0.01
        )
        
        # Track metrics
        metrics = track_token_metrics(messages, response)
        
        return {
            'response': response.choices[0].message.content,
            'metrics': metrics,
            'provider': 'together' if 'together' in str(client) else 'aisuite',
            'model': model
        }
        
    except Exception as e:
        print(f"Error in make_llm_request for model {model}: {str(e)}")
        raise LLMRequestError(f"Failed to generate response: {str(e)}")

In [None]:
import aisuite as ai 
from together import Together 
import pandas as pd


def ask_llm(query, query_code):
    #These are the examples for 2-shot learning, don't use these examples for inference or result benchmarking 
    example1, example_code1 = get_example(1)
    example2, example_code2 = get_example(1234)

    client = ai.Client()

    models = {
        "aisuite": {
            "client": ai.Client(),
            "models": {
                "Claude-3.5-Sonnet": "anthropic:claude-3-5-sonnet-20240620"
            }
        },
        "together": {
            "client": Together(),
            "models": {
                "Llama-3.3-70B-It": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
                "Mistral-Small-3-24B-It": "mistralai/Mistral-Small-24B-Instruct-2501"
            }
        }    
    }

    system_message = """You are an helpful assistant. You will be given a query along with two example queries and corresponding answers to follow.
        Based on that, you will need to provide a solution to the query. Only answer with Verilog code block, do not generate anything else. Follow the example format for output.
        \n\n <Example 1> ##Instruction: {0} ##Query: {1} ##Answer: {2} \n\n <\Example 1> \n\n <Example 2>
        ##Instruction: {3} ##Query: {4} ##Answer: {5} \n\n <\Example 2> \n\n <User Query> \n\n"""

    formatted_system_message = system_message.format(
        example1['block_summary']['system'],
        example1['block_summary']['prompt'],
        example_code1,
        example2['block_summary']['system'],
        example2['block_summary']['prompt'],
        example_code2
    )

    user_message = f"{query['block_summary']['system']} {query['block_summary']['prompt']} \n\n </User Query>"

    messages = [
        {
            "role": "system",
            "content": formatted_system_message
        },
        {
            "role": "user",
            "content": user_message
        }
    ]

    response_dataframe = pd.DataFrame()

    # write for loop for traversing each client and model and generate response only for model_names 
    for client_name, client_data in models.items():
        print(f"Accessing {client_name} client...")
        for model_name, model in client_data['models'].items():
            print(f"Attempting response from {model_name}...")
            try:
                llm_response = make_llm_request(client_data['client'], model, messages)
                response_dataframe[f"{client_name}_{model_name}"] = [llm_response['response']] #[response.choices[0].message.content]
                print(f"Response from {model_name} generated successfully!")
            except Exception as e:
                print(f"Error after all retries for {model_name}: {e}")
                response_dataframe[f"{client_name}_{model_name}"] = [None]
    
    #response_dataframe['ground_truth'] = query_code
    #insert in first column 
    response_dataframe.insert(0, 'ground_truth', query_code)

    return response_dataframe

In [None]:
# Example usage in Jupyter notebook:

# Then run your query:
query, query_code = get_example(2)

# Execute the async function
results = ask_llm(query, query_code)
#print(results)

In [None]:
# Initialize empty DataFrame with same columns as response
df_result = pd.DataFrame(columns=['ground_truth'])

# Process dataframe rows
for index, row in df_train_parsed.iterrows():
    try:
        # Get query and code
        query, query_code = get_example(index)
        
        # Get LLM response
        response_df = ask_llm(query, query_code)
        
        # Concatenate new response with existing results
        df_result = pd.concat([df_result, response_df], ignore_index=True)
        
        print(f"Processed row {index+1}")
        
        if index == 2:
            break
            
    except Exception as e:
        print(f"Error processing row {index}: {e}")
        continue

# Display results
print(f"Total rows processed: {len(df_result)}")

In [None]:
df_result

## Save the file

In [16]:
df_result.to_csv('result.csv', index=False)