In [1]:
from dotenv import load_dotenv
import pandas as pd
import os
import boto3
from botocore.exceptions import ClientError
import json

In [2]:
data_folder = "../data"
test_file_path = os.path.join(data_folder, "merged_instructions.csv")
df = pd.read_csv(test_file_path)
df['instruction'].head()

0    স্ট্রিং থেকে প্রদত্ত অক্ষরের প্রথম এবং শেষ উপস...
1    একটি প্রদত্ত ম্যাট্রিক্সকে তার সারিগুলির যোগফল...
2    একটি ফাংশন লিখুন যা একটি অভিধানে সবচেয়ে সাধার...
3    একটি ত্রিভুজাকার প্রিজমের আয়তন খুঁজে বের করার...
4    একটি স্ট্রিংকে ছোট অক্ষরে বিভক্ত করার জন্য একট...
Name: instruction, dtype: object

In [3]:
# Test case generator Agent
client = boto3.client("bedrock-runtime", region_name="us-west-2")

def generate_test_cases(instruction) -> str:
    model_id = 'openai.gpt-oss-120b-1:0'
    
    prompt = f"""
    You are a test case generator. Your task is to generate test cases for the given method signature.\n
    - The test cases should be in the form of input-output pairs with assert statements.\n
    - The input should be a valid input for the instruction, and the output should be the expected output for that input.\n
    - Each test case should be on a separate line.\n
    - The test cases should be in python.\n
    - Consider edge cases and typical use cases.\n
    - Generate upto 5 test cases.\n
    - Do not include any explanations or additional text.\n
    """
    # Set up messages and system message
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "text": instruction
                }
            ]
        }
    ]
    system = [
        {
            "text": prompt
        }
    ]

    try:
        response = client.converse(
            modelId=model_id,
            messages=messages,
            system=system,
            inferenceConfig={
                "maxTokens": 1024, 
                "temperature": 0.5
            },
        )

        for content_block in response["output"]["message"]["content"]:
            if content_block.get("text"):
                return content_block.get("text")


    except (ClientError, Exception) as e:
        print(f"ERROR: Can't invoke '{model_id}'. Reason: {e}")

    return None

In [None]:

result_data = []

for index, row in df.iterrows():
    message = row['instruction']

    test_case_assert_statements = generate_test_cases(message)

    if test_case_assert_statements:
        message += "\n Your goal is to pass the following test cases:\n" + test_case_assert_statements # add test cases if generated

    model_id = 'openai.gpt-oss-20b-1:0'

    prompt = f"You are a helpful assistant that generates code snippets based on user instructions." \
                "The method signature should be according to the given in the example part.\n" \
                "Use the following guidlines in generation: \n" \
                " - Provide the code in the response only\n" \
                " - Do not output any other texts or test cases results.\n" \
                " - Include the necessary import statements on the top even if they are usually not needed like - re, typing, itertools, Split etc. \n" \
                " - Do not include any comments in the code\n" \
                " - Do not forget to consider the edge cases\n" \
                " - Avoid recursion if possible\n" \
                " - Lastly, verify the code has no compilations error and contains the necessary imports.\n"
    
    # Set up messages and system message
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "text": message
                }
            ]
        }
    ]

    system = [
        {
            "text": prompt
        }
    ]

    try:
        # Send the message to the model, using a basic inference configuration.
        response = client.converse(
            modelId=model_id,
            messages=messages,
            system=system,
            inferenceConfig={
                "maxTokens": 1024, 
                "temperature": 0.5
            },
        )

        # Extract and print the response text.
        flag = False
        for content_block in response["output"]["message"]["content"]:
            if content_block.get("text"):
                code_response = content_block.get("text")
                result_data.append({
                    "id": row['id'],
                    "response": code_response
                })
                flag = True

        if not flag:
            result_data.append({
                    "id": row['id'],
                    "response": ""
                })

    except (ClientError, Exception) as e:
        print(f"ERROR: Can't invoke '{model_id}'. Reason: {e}")

    print("Completed:", row['id'])
    
result_df = pd.DataFrame(result_data)

Completed: 1
Completed: 2


In [None]:
# Create a json from the result_df
result_json = result_df.to_json(orient="records", indent=4)
# save json in the data_folder os.path.join(data_folder, "submission.json")
with open("gpt_oss_20B_submission.json", "w") as json_file:
    json_file.write(result_json)