In [1]:
%load_ext autoreload
%matplotlib inline

In [2]:
# python -m pip install 'git+https://github.com/MaureenZOU/detectron2-xyz.git'

import numpy as np

from PIL import Image
import matplotlib.pyplot as plt

import sys
sys.path.append(".")
import os

from utils.logging import CustomLogger
from utils import logging
from agents import agent_factory

from executor import SimpleExecutor, LineWiseExecutor

from utils.image_utils import visualize_image


In [3]:
logger = CustomLogger()
logging.set_logger(logger)

In [None]:
from environments.real_world import RealWorldEnv
env = RealWorldEnv()
env.setup()

In [5]:
def run_experiment(env, agent, prompt, debug=True):
    print("Image before planning: ")
    image = env.get_image()
    visualize_image(image)
    plan_result = agent.try_plan(prompt, image)
    print(plan_result)
    print("Annotated image: ")
    visualize_image(plan_result.annotated_image)
    
    if not plan_result.success:
        return
        
    # executor = LineWiseExecutor(env.get_execution_context(), pause_every_line=True)
    executor = LineWiseExecutor(env.get_execution_context())
    # Build a context containing the masks for the plan code to access
    context = {"regions": [mask["segmentation"] for mask in plan_result.masks]}
    executor.execute_plan(plan_result.plan_code, additional_context=context)

    print("After manipulation: ")
    visualize_image(env.get_image())

    return plan_result

Specify Your task here.

In [6]:
prompt="""Task: Put the red block into the green bowl."""

In [None]:
agent = agent_factory("SegVLM")
agent.configs["img_size"] = 640
plan_result = run_experiment(env, agent, prompt)

In [9]:
logger.save_logs_to_html_file("result.html")