In [1]:
from dotenv import load_dotenv
import pandas as pd
import os
import boto3
from botocore.exceptions import ClientError
import json
from bedrock_agentcore.tools.code_interpreter_client import CodeInterpreter
import uuid

data_folder = "../../data"
test_file_path = os.path.join(data_folder, "merged_instructions.csv")
df = pd.read_csv(test_file_path)
df['instruction'].head()

0    স্ট্রিং থেকে প্রদত্ত অক্ষরের প্রথম এবং শেষ উপস...
1    একটি প্রদত্ত ম্যাট্রিক্সকে তার সারিগুলির যোগফল...
2    একটি ফাংশন লিখুন যা একটি অভিধানে সবচেয়ে সাধার...
3    একটি ত্রিভুজাকার প্রিজমের আয়তন খুঁজে বের করার...
4    একটি স্ট্রিংকে ছোট অক্ষরে বিভক্ত করার জন্য একট...
Name: instruction, dtype: object

In [2]:
def strip_code_fences(s: str) -> str:
    if not isinstance(s, str):
        s = str(s)
    s = s.strip()
    if s.startswith("```"):
        body = s[3:]
        if "\n" in body:
            body = body.split("\n", 1)[1]
        if "```" in body:
            body = body.rsplit("```", 1)[0]
        return body.strip()
    return s

In [None]:
def interpreter_result(code: str):
    try:
        # Configure and Start the code interpreter session
        code_client = CodeInterpreter('us-west-2')
        code_client.start(session_timeout_seconds=10)

        # Execute the hello world code
        response = code_client.invoke("executeCode", {
            "language": "python", 
            "code": code
        })

        # Capture the response instead of printing
        results = []
        for event in response["stream"]:
            results.append(event["result"])

        # Clean up and stop the code interpreter session 
        code_client.stop()

        # Example of using the captured data:
        # print(f"Captured {len(results)} result objects")
        # If you want to see the structure later:
        # print(json.dumps(results, indent=2))
        return results
    except (ClientError, Exception) as e:
        return None

def generate_code(instruction: str):
    client = boto3.client("bedrock-runtime", region_name="us-west-2")
    model_id = 'us.meta.llama3-2-90b-instruct-v1:0'

    prompt = f"You are a helpful assistant that generates code snippets based on user instructions." \
                "The method signature should be according to the given in the example part.\n" \
                "Use the following guidlines in generation: \n" \
                " - Provide the code in the response only\n" \
                " - Do not output any other texts or test cases results.\n" \
                " - Include the necessary import statements on the top even if they are usually not needed like - re, typing, itertools, Split etc. \n" \
                " - Do not include any comments in the code\n" \
                " - Do not forget to consider the edge cases\n" \
                " - Avoid recursion if possible\n" \
                " - Lastly, verify the code has no compilations error and contains the necessary imports.\n"
    
    # Set up messages and system message
    messages = [ { "role": "user", "content": [ { "text": instruction } ] } ]
    system = [ { "text": prompt } ]

    try:
        # Send the message to the model, using a basic inference configuration.
        response = client.converse(
            modelId=model_id,
            messages=messages,
            system=system,
            inferenceConfig={
                "maxTokens": 1024, 
                "temperature": 0.5
            },
        )

        for content_block in response["output"]["message"]["content"]:
            if content_block.get("text"):
                return content_block.get("text")
            
        return None

    except (ClientError, Exception) as e:
        print(f"ERROR: Can't invoke '{model_id}'. Reason: {e}")

import ast
def parse_tests(raw) -> list:
    raw = str(raw)
    x = ast.literal_eval(raw)
    if isinstance(x, str):
        x = ast.literal_eval(x)
    if not isinstance(x, (list, tuple)):
        raise ValueError("test_list parsed to non-list")
    return [str(t) for t in x]

In [None]:

result_data = []

for index, row in df.iterrows():

    instruction = row['instruction']
    given_test = parse_tests(row['test_list'])[0]

    instruction = row['instruction'] + "\n Here is a sample given test case:\n" + given_test

    code = generate_code(instruction)
    code = strip_code_fences(code)
    code_with_test_case = code + "\n" + given_test
    execution_result = interpreter_result(code_with_test_case)

    retry_count = 0
    while (execution_result == None or execution_result[0]['isError']) and retry_count < 5:
        print(f"Retrying for id {row['id']}, attempt {retry_count + 1}")

        error_message = ""
        if execution_result == None:
            error_message = "Time limit exceeded"
        elif "structuredContent" in execution_result[0]:
            error_message = execution_result[0]["structuredContent"]["stderr"]
        else:
            error_message = execution_result[0]["content"][0]["text"]

        # print("Error Message:", error_message)
        retry_message = instruction +"\nCurrent code:\n" + code + "\nError Message:\n" + error_message + "\nPlease fix the code."
        # print("Retry message:", retry_message)
        code = generate_code(retry_message)
        code = strip_code_fences(code)
        code_with_test_case = code + "\n" + given_test
        execution_result = interpreter_result(code_with_test_case)
        retry_count += 1

        # print(code)

    result_data.append({
        "id": row['id'],
        "response": code
    })
    print("Completed:", row['id'])
    
result_df = pd.DataFrame(result_data)

Retrying for id 1, attempt 1
Retrying for id 1, attempt 2
Retrying for id 1, attempt 3
Retrying for id 1, attempt 4
Retrying for id 1, attempt 5
Completed: 1
Retrying for id 2, attempt 1
Retrying for id 2, attempt 2
Completed: 2
Retrying for id 3, attempt 1
Retrying for id 3, attempt 2
Retrying for id 3, attempt 3
Retrying for id 3, attempt 4
Retrying for id 3, attempt 5
Completed: 3
Retrying for id 4, attempt 1
Retrying for id 4, attempt 2
Retrying for id 4, attempt 3
Retrying for id 4, attempt 4
Retrying for id 4, attempt 5
Completed: 4
Retrying for id 5, attempt 1
Retrying for id 5, attempt 2
Retrying for id 5, attempt 3
Retrying for id 5, attempt 4
Retrying for id 5, attempt 5
Completed: 5
Completed: 6
Completed: 7
Retrying for id 8, attempt 1
Retrying for id 8, attempt 2
Retrying for id 8, attempt 3
Retrying for id 8, attempt 4


In [None]:
# Create a json from the result_df
result_json = result_df.to_json(orient="records", indent=4)
# save json in the data_folder os.path.join(data_folder, "submission.json")
with open("llama_3.2_90B_submission.json", "w") as json_file:
    json_file.write(result_json)