In [5]:
import autogen
import jupyter_client
import base64
from base64 import b64decode

from autogen.agentchat.contrib.multimodal_conversable_agent import MultimodalConversableAgent
from autogen.agentchat.contrib.capabilities.vision_capability import VisionCapability
from autogen import Agent, AssistantAgent
from typing import List, Dict, Union, Optional

config_list = autogen.config_list_from_json(
    "OAI_CONFIG_LIST",
    filter_dict={
        "model": [
                "gpt-4-vision-preview", 
                "gpt-4-1106-preview"
        ],
    },
)

config_list_4v = autogen.config_list_from_json(
    "OAI_CONFIG_LIST",
    filter_dict={
        "model": ["gpt-4-vision-preview", #"gpt-4-1106-preview"
                 ],
    },
)

config_list_gpt4 = autogen.config_list_from_json(
    "OAI_CONFIG_LIST",
    filter_dict={
        "model": ["gpt-4-vision-preview", "gpt-4-1106-preview"],
    },
)

gpt4_llm_config = {"config_list": config_list_gpt4, "cache_seed": 42}


criteria_prompt = """Evaluate the image against the provided criteria. For each criterion, assign a score on a scale from 1 to 5, where 1 indicates the worst performance and 5 indicates the best.
Criteria:
Q0. How helpful is the image?
Q1. How easy is it to follow the image?

use the following format in your output:
Q 1 | Q 2 score | your reason 1
Q 2 | Q 2 score | your reason 2
...
"""

# sample python code for testing, use this to save image 
py_code = """
import matplotlib.pyplot as plt
import numpy as np

# Create a figure with high dpi
plt.figure(dpi=400)

# Define the x values
x = np.linspace(0.1, 2*np.pi, 400)
x = x[x != np.pi]  # Remove pi to avoid division by zero in cot(x) and csc(x)

# Calculate the functions
u = 1/np.tan(x)  # g(x) = cot(x)
y = np.sin(u)  # f(g(x)) = sin(cot(x))
g_prime = -1/(np.sin(x)**2)  # g'(x) = -csc^2(x)
f_prime_of_g = np.cos(u)  # f'(g(x)) = cos(cot(x))
dy_dx = f_prime_of_g * g_prime  # The derivative

# Plotting
plt.subplot(2, 3, 1)
plt.plot(x, u, label='$g(x) = cot(x)$')
plt.title('Inner Function $g(x)$')
plt.xlabel('$x$')
plt.ylabel('$g(x)$')
plt.legend()

plt.subplot(2, 3, 2)
plt.plot(x, y, label='$f(g(x)) = sin(cot(x))$')
plt.title('Composite Function $f(g(x))$')
plt.xlabel('$x$')
plt.ylabel('$f(g(x))$')
plt.legend()

plt.subplot(2, 3, 3)
plt.plot(x, g_prime, label="$g'(x) = -csc^2(x)$")
plt.title("Derivative of Inner Function $g'(x)$")
plt.xlabel('$x$')
plt.ylabel("$g'(x)$")
plt.legend()

plt.subplot(2, 3, 4)
plt.plot(x, f_prime_of_g, label="$f'(g(x)) = cos(cot(x))$")
plt.title("Derivative of Outer Function $f'(u)$")
plt.xlabel('$x$')
plt.ylabel("$f'(g(x))$")
plt.legend()

plt.subplot(2, 3, 5)
plt.plot(x, dy_dx, label="$dy/dx = -cos(cot(x)) \\cdot csc^2(x)$")
plt.title("Derivative of Composite Function $dy/dx$")
plt.xlabel('$x$')
plt.ylabel("$dy/dx$")
plt.legend()

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()
"""


# jupyter kernel to execute python code and output a tuple of image bytes and error message 
def call_python(code: str):
    # Create a new kernel manager
    km = jupyter_client.KernelManager()
    km.start_kernel()

    # Create a client for the kernel
    kc = km.client()
    kc.start_channels()

    image_data, error_message = None, None 
    try:
        # Send code to be executed
        kc.execute(code)

        # Wait for and process the execution result
        while True:
            try:
                msg = kc.get_iopub_msg(timeout=90)
                if msg['header']['msg_type'] in ['execute_result', 'display_data']:
                    data = msg['content']['data']

                    # change the type 
                    if 'image/png' in data:
                        # Decode the image data
                        image_data = b64decode(data['image/png'])
                        break

                if msg['header']['msg_type'] == 'error':
                    error_content = msg['content']
                    error_message = f"Error: {error_content['ename']}: {error_content['evalue']}\n"
                    error_message += "\n".join(error_content['traceback'])
                    break

                if msg['header']['msg_type'] == 'status':
                    if msg['content']['execution_state'] == 'idle':
                        break
            except Exception as e:
                error_message = f"Exception while waiting for execute result: {e}"
                break
    finally:
        # Stop the channels and kernel
        kc.stop_channels()
        km.shutdown_kernel()

    if error_message:
        return None, error_message

    return image_data, None

def code_to_image_bytes_or_error(code: str):
    image_data, error = call_python(code)
    if error:
        return error
    else:
        image_bytes = base64.b64encode(image_data).decode('utf-8')
        msg = f"""<img data:image/jpeg;base64,{image_bytes}>"""
        return msg
        

image_critic = MultimodalConversableAgent(
    name="image-explainer",
    system_message=criteria_prompt,
    max_consecutive_auto_reply=10,
    llm_config={"config_list": config_list_4v, "temperature": 0, "max_tokens": 4096},
)

user_proxy = autogen.UserProxyAgent(
    name="User_proxy",
    system_message="A human admin.",
    human_input_mode="TERMINATE",
    code_execution_config={
        "use_docker": False
    },  

            #    "functions": [
            #         {
            #             "name": "code_to_image_bytes_or_error",
            #             "description": "python code executor",
            #             "parameters": {
            #                 "type": "object",
            #                 "properties": {
            #                     "code": {
            #                         "type": "string",
            #                         "description": "python code str",
            #                     }
            #                 },
            #                 "required": ["code"],
            #             },
            #         },
            #     ]        
#                },
    # function_map={"code_to_image_bytes_or_error": code_to_image_bytes_or_error}
)

coder = MultimodalConversableAgent(
    name="Coder",
    system_message="your goal is to trigger the function call for code",
    llm_config={"config_list": config_list_gpt4, "temperature": 0, "max_tokens": 4096,
               "functions": [
                    {
                        "name": "code_to_image_bytes_or_error",
                        "description": "python code executor",
                        "parameters": {
                            "type": "object",
                            "properties": {
                                "code": {
                                    "type": "string",
                                    "description": "python code str",
                                }
                            },
                            "required": ["code"],
                        },
                    },
                ]           
        },
    function_map={"code_to_image_bytes_or_error": code_to_image_bytes_or_error}
)

groupchat = autogen.GroupChat(agents=[user_proxy, image_critic, coder], messages=[], max_round=12)

vision_capability = VisionCapability(lmm_config={"config_list": config_list_4v, "temperature": 0.5, "max_tokens": 300})
group_chat_manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=gpt4_llm_config)
vision_capability.add_to_agent(group_chat_manager)

user_proxy.initiate_chat(
    group_chat_manager, 
    message=f""" let's improve the code by using the following flow: 
step 1. trigger the function call for given code (I will execute the code and provide image to you)
step 2. send the image to critics for feedback 
step 3. use the critics feedback to update the code 
continue step 1-3 until all feedback has been addressed.

let's start with the the initial python code with no function call in `user_proxy` agent,  

```python
{py_code}
```
""" ,
    verbose=True
)

[33mUser_proxy[0m (to chat_manager):

 let's improve the code by using the following flow: 
step 1. trigger the function call for given code (I will execute the code and provide image to you)
step 2. send the image to critics for feedback 
step 3. use the critics feedback to update the code 
continue step 1-3 until all feedback has been addressed.

let's start with the the initial python code with no function call in `user_proxy` agent,  

```python

import matplotlib.pyplot as plt
import numpy as np

# Create a figure with high dpi
plt.figure(dpi=400)

# Define the x values
x = np.linspace(0.1, 2*np.pi, 400)
x = x[x != np.pi]  # Remove pi to avoid division by zero in cot(x) and csc(x)

# Calculate the functions
u = 1/np.tan(x)  # g(x) = cot(x)
y = np.sin(u)  # f(g(x)) = sin(cot(x))
g_prime = -1/(np.sin(x)**2)  # g'(x) = -csc^2(x)
f_prime_of_g = np.cos(u)  # f'(g(x)) = cos(cot(x))
dy_dx = f_prime_of_g * g_prime  # The derivative

# Plotting
plt.subplot(2, 3, 1)
plt.plot(x, u, label='

ChatResult(chat_id=None, chat_history=[{'content': ' let\'s improve the code by using the following flow: \nstep 1. trigger the function call for given code (I will execute the code and provide image to you)\nstep 2. send the image to critics for feedback \nstep 3. use the critics feedback to update the code \ncontinue step 1-3 until all feedback has been addressed.\n\nlet\'s start with the the initial python code with no function call in `user_proxy` agent,  \n\n```python\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\n# Create a figure with high dpi\nplt.figure(dpi=400)\n\n# Define the x values\nx = np.linspace(0.1, 2*np.pi, 400)\nx = x[x != np.pi]  # Remove pi to avoid division by zero in cot(x) and csc(x)\n\n# Calculate the functions\nu = 1/np.tan(x)  # g(x) = cot(x)\ny = np.sin(u)  # f(g(x)) = sin(cot(x))\ng_prime = -1/(np.sin(x)**2)  # g\'(x) = -csc^2(x)\nf_prime_of_g = np.cos(u)  # f\'(g(x)) = cos(cot(x))\ndy_dx = f_prime_of_g * g_prime  # The derivative\n\n# Plotting\