In [1]:
import os
from taskgen import Agent, Function
from dotenv import load_dotenv
import cv2
import numpy as np

In [2]:
# Specify the path to your .env file
dotenv_path = '../.env'

# Load environment variables from a .env file
load_dotenv()

# Now you can safely use the environment variable
api_key = os.getenv('OPENAI_API_KEY')

In [3]:
# Create your agent
my_agent = Agent(agent_name='Computer Vision Assistant', 
                 agent_description='You are a computer vision agent that can perform various image processing tasks')

In [4]:
# Example External Function for image resizing
def resize_image(shared_variables, image_path: str, width: int, height: int):
    '''Resizes the image using the <image_path>, <width>, and <height>'''
    image = cv2.imread(image_path)
    resized_image = cv2.resize(image, (width, height))
    shared_variables["resized_image_path"] = "resized_image.jpg"
    cv2.imwrite(shared_variables["resized_image_path"], resized_image)
    return {"resized_image_path": shared_variables["resized_image_path"]}

# Function for image resizing
resize_fn = Function(
    fn_description='Resize the image at <image_path> to a width of <width> and height of <height>',
    external_fn=resize_image,                  
    output_format={'resized_image_path': 'Path to the resized image'}
)

In [5]:
# Example External Function for edge detection
def detect_edges(shared_variables, image_path: str, threshold1: int, threshold2: int):
    '''Performs edge detection on the image using the <image_path>, <threshold1>, and <threshold2>'''
    image = cv2.imread(image_path)
    edges = cv2.Canny(image, threshold1, threshold2)
    shared_variables["edge_image_path"] = "edge_image.jpg"
    cv2.imwrite(shared_variables["edge_image_path"], edges)
    return {"edge_image_path": shared_variables["edge_image_path"]}

# Function for edge detection
edge_detection_fn = Function(
    fn_description='Perform edge detection on the image at <image_path> with a lower threshold of <threshold1> and an upper threshold of <threshold2>',
    external_fn=detect_edges, 
    output_format={'edge_image_path': 'Path to the image with detected edges'}
)

In [6]:
# Assign functions to the agent
my_agent.assign_functions(function_list=[resize_fn, edge_detection_fn])

<taskgen.agent.Agent at 0x14044d3f0>

In [7]:
# Show the functions the agent has
my_agent.print_functions()

Name: use_llm
Description: Used only when no other function can do the task
Input: []
Output: {'Output': 'Output of LLM'}

Name: end_task
Description: Use only when task is completed
Input: []
Output: {}

Name: resize_image
Description: Resize the image at <image_path> to a width of <width> and height of <height>
Input: ['image_path', 'width', 'height']
Output: {'resized_image_path': 'Path to the resized image'}

Name: detect_edges
Description: Perform edge detection on the image at <image_path> with a lower threshold of <threshold1> and an upper threshold of <threshold2>
Input: ['image_path', 'threshold1', 'threshold2']
Output: {'edge_image_path': 'Path to the image with detected edges'}



In [8]:
# Example usage
image_path = 'example_image.jpg'
output = my_agent.run(f'Resize the image at {image_path} to a width of 800 and height of 600')
print(output)

output = my_agent.run(f'Perform edge detection on the image at {image_path} with a lower threshold of 100 and an upper threshold of 200')
print(output)

Subtask identified: Resize the image at example_image.jpg to a width of 800 and height of 600
Calling function resize_image with parameters {'image_path': 'example_image.jpg', 'width': 800, 'height': 600}
> {'resized_image_path': {'resized_image_path': 'resized_image.jpg'}}

Task completed successfully!

[{'resized_image_path': {'resized_image_path': 'resized_image.jpg'}}]
Subtask identified: Perform edge detection on the image at example_image.jpg with a lower threshold of 100 and an upper threshold of 200
Calling function detect_edges with parameters {'image_path': 'resized_image.jpg', 'threshold1': 100, 'threshold2': 200}
> {'edge_image_path': {'edge_image_path': 'edge_image.jpg'}}

Task completed successfully!

[{'resized_image_path': {'resized_image_path': 'resized_image.jpg'}}, {'edge_image_path': {'edge_image_path': 'edge_image.jpg'}}]


In [9]:
# Give a response to the user
output = my_agent.reply_user('The image processing tasks have been completed successfully.')

The image processing tasks have been completed successfully.


# Computer Vision Agent

This code demonstrates a computer vision agent that can perform various image processing tasks using the `taskgen` library and OpenCV.

## Prerequisites

To run this code, you need to have the following installed:

1. Python (version 3.6 or higher)
2. `taskgen` library
   - Install using `pip install taskgen`
3. OpenCV (cv2)
   - Install using `pip install opencv-python`
4. NumPy
   - Install using `pip install numpy`
5. python-dotenv
   - Install using `pip install python-dotenv`

## Setup

1. Make sure you have the required libraries installed as mentioned in the prerequisites.

2. Create a `.env` file in the parent directory of your project and add your OpenAI API key:
```
OPENAI_API_KEY=your-api-key
```

Replace `your_api_key_here` with your actual OpenAI API key.

3. Place the example image file (`example_image.jpg`) in the same directory as the code file.

## Usage

1. Run the code file using a Python interpreter.

2. The code will perform the following tasks:
- Resize the example image to a width of 800 and a height of 600.
- Perform edge detection on the example image with a lower threshold of 100 and an upper threshold of 200.

3. The processed images will be saved as `resized_image.jpg` and `edge_image.jpg` in the same directory as the code file.

4. The file paths of the processed images will be printed as the output of the agent's `run` method.

Note: Make sure you have the necessary permissions to read the example image file and write the processed image files in the directory.

## Additional Notes

- The code uses the `dotenv` library to load the OpenAI API key from the `.env` file. Make sure the `.env` file is properly configured with your API key.

- The `taskgen` library is used to create an agent and define functions for image processing tasks. Refer to the `taskgen` documentation for more information on its usage and capabilities.

- The OpenCV library (cv2) is used for image processing operations such as resizing and edge detection. You can explore more OpenCV functionalities to extend the code as needed.

```requirements.txt
annotated-types==0.6.0
anyio==3.7.1
appnope==0.1.4
asttokens==2.4.1
certifi==2024.2.2
comm==0.2.1
debugpy==1.8.1
decorator==5.1.1
distro==1.9.0
exceptiongroup==1.2.0
executing==2.0.1
h11==0.14.0
httpcore==1.0.4
httpx==0.27.0
idna==3.6
ipykernel==6.29.3
ipython==8.22.2
jedi==0.19.1
jupyter_client==8.6.0
jupyter_core==5.7.1
matplotlib-inline==0.1.6
nest-asyncio==1.6.0
numpy==1.26.4
openai==1.3.6
opencv-python==4.9.0.80
packaging==23.2
parso==0.8.3
pexpect==4.9.0
platformdirs==4.2.0
prompt-toolkit==3.0.43
psutil==5.9.8
ptyprocess==0.7.0
pure-eval==0.2.2
pydantic==2.6.3
pydantic_core==2.16.3
Pygments==2.17.2
python-dateutil==2.9.0.post0
python-dotenv==1.0.1
pyzmq==25.1.2
six==1.16.0
sniffio==1.3.1
stack-data==0.6.3
taskgen-ai==0.0.8
tornado==6.4
tqdm==4.66.2
traitlets==5.14.1
typing_extensions==4.10.0
wcwidth==0.2.13
```