Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: Lint

on:
push:
branches: [main]
pull_request:

jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: astral-sh/setup-uv@v5
- run: uv python install 3.10
- run: uv pip install ruff
- run: uv run ruff check .
- run: uv run ruff format --check .
7 changes: 7 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.12.2
hooks:
- id: ruff
args: [--fix]
- id: ruff-format
10 changes: 5 additions & 5 deletions agents/game_agent/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
# Jianwei Yang (jianwyan@microsoft.com)
# --------------------------------------------------------

import pygame
import numpy as np
import random
import re

import gradio as gr
import time
import numpy as np
import pygame
import torch
from PIL import Image
from transformers import AutoModelForCausalLM, AutoProcessor
import re
import random

pygame.mixer.quit() # Disable sound

Expand Down
21 changes: 6 additions & 15 deletions agents/libero/eval_magma_libero.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,13 @@
import os
import numpy as np
import draccus
from dataclasses import dataclass
from typing import Optional, Tuple
from typing import Tuple

import draccus
import tqdm
from libero.libero import benchmark
from libero_env_utils import (
get_libero_env,
get_libero_dummy_action,
get_libero_obs,
get_max_steps,
set_seed_everywhere
)
from libero_magma_utils import (
get_magma_model,
get_magma_prompt,
get_magma_action
)
from libero_env_utils import get_libero_dummy_action, get_libero_env, get_libero_obs, get_max_steps, set_seed_everywhere
from libero_magma_utils import get_magma_action, get_magma_model, get_magma_prompt


@dataclass
class LiberoConfig:
Expand Down
8 changes: 5 additions & 3 deletions agents/libero/libero_env_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@

import math
import os
import torch
import random
from PIL import Image

import imageio
import numpy as np
import tensorflow as tf
import torch
from libero.libero import get_libero_path
from libero.libero.envs import OffScreenRenderEnv
from PIL import Image


def resize_image(img, resize_size):
"""
Expand Down Expand Up @@ -91,7 +93,7 @@ def quat2axisangle(quat):

def save_rollout_video(replay_images, success, task_description):
"""Saves a video replay of a rollout in libero."""
save_dir = f"./libero_videos"
save_dir = "./libero_videos"
os.makedirs(save_dir, exist_ok=True)
processed_task_description = task_description.lower().replace(" ", "_").replace("\n", "_").replace(".", "_")[:50]
video_path = f"{save_dir}/quick_eval-success={success}--task={processed_task_description}.mp4"
Expand Down
10 changes: 6 additions & 4 deletions agents/libero/libero_magma_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import os
import json
import torch
import os

import numpy as np
from magma.image_processing_magma import MagmaImageProcessor
from magma.processing_magma import MagmaProcessor
import torch

from magma.modeling_magma import MagmaForConditionalGeneration
from magma.processing_magma import MagmaProcessor


def get_magma_model(model_name):
processor = MagmaProcessor.from_pretrained(model_name, trust_remote_code=True)
Expand Down
9 changes: 3 additions & 6 deletions agents/robot_traj/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,14 @@
# Jianwei Yang (jianwyan@microsoft.com)
# --------------------------------------------------------

import os
import warnings
from utils.visualizer import Visualizer
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple
import ast
import random
import gradio as gr
import ast, re

import gradio as gr
import torch
import torchvision
from transformers import AutoModelForCausalLM, AutoProcessor
from utils.visualizer import Visualizer

'''
build model
Expand Down
17 changes: 8 additions & 9 deletions agents/robot_traj/app.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,15 @@
# Jianwei Yang (jianwyan@microsoft.com)
# --------------------------------------------------------

import os
import warnings
from utils.visualizer import Visualizer
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple
import ast
import random
import gradio as gr
import ast, re
from typing import TYPE_CHECKING

import gradio as gr
import torch
import torchvision
from transformers import AutoModelForCausalLM, AutoProcessor
from utils.visualizer import Visualizer

'''
build model
Expand Down Expand Up @@ -132,7 +130,6 @@ def inference(image, task, *args, **kwargs):
except Exception as e:
print(e)
return None
from gradio.events import Dependency

class ImageMask(gr.components.Image):
"""
Expand All @@ -146,7 +143,8 @@ class ImageMask(gr.components.Image):

def preprocess(self, x):
return super().preprocess(x)
from typing import Callable, Literal, Sequence, Any, TYPE_CHECKING
from typing import Literal, Sequence

from gradio.blocks import Block
if TYPE_CHECKING:
from gradio.components import Timer
Expand All @@ -163,7 +161,8 @@ class Video(gr.components.Video):

def preprocess(self, x):
return super().preprocess(x)
from typing import Callable, Literal, Sequence, Any, TYPE_CHECKING
from typing import Literal, Sequence

from gradio.blocks import Block
if TYPE_CHECKING:
from gradio.components import Timer
Expand Down
8 changes: 4 additions & 4 deletions agents/robot_traj/utils/visualizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
import os
import numpy as np

import imageio
import matplotlib.pyplot as plt
import numpy as np
import torch

from matplotlib import cm
import torch.nn.functional as F
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from matplotlib import cm
from PIL import Image, ImageDraw


Expand Down
24 changes: 8 additions & 16 deletions agents/ui_agent/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,19 @@
# Jianwei Yang (jianwyan@microsoft.com)
# --------------------------------------------------------

import base64
import io
from typing import Optional
import spaces

import gradio as gr
import numpy as np
import spaces
import torch
from huggingface_hub import snapshot_download
from PIL import Image
import io
import re

import base64, os
from util.utils import check_ocr_box, get_yolo_model, get_caption_model_processor, get_som_labeled_img
from transformers import AutoModelForCausalLM, AutoProcessor
from util.process_utils import extract_bbox, extract_mark_id, pred_2_point
from util.som import MarkHelper, plot_boxes_with_marks, plot_circles_with_marks
from util.process_utils import pred_2_point, extract_bbox, extract_mark_id

import torch
from PIL import Image

from huggingface_hub import snapshot_download
import torch
from transformers import AutoModelForCausalLM
from transformers import AutoProcessor
from util.utils import check_ocr_box, get_caption_model_processor, get_som_labeled_img, get_yolo_model

# Define repository and local directory
repo_id = "microsoft/OmniParser-v2.0" # HF repo
Expand Down
3 changes: 1 addition & 2 deletions agents/ui_agent/util/box_annotator.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from typing import List, Optional, Union, Tuple
from typing import List, Optional, Tuple, Union

import cv2
import numpy as np

from supervision.detection.core import Detections
from supervision.draw.color import Color, ColorPalette

Expand Down
12 changes: 8 additions & 4 deletions agents/ui_agent/util/omniparser.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
from util.utils import get_som_labeled_img, get_caption_model_processor, get_yolo_model, check_ocr_box
import torch
from PIL import Image
import io
import base64
import io
from typing import Dict

import torch
from PIL import Image

from util.utils import check_ocr_box, get_caption_model_processor, get_som_labeled_img, get_yolo_model


class Omniparser(object):
def __init__(self, config: Dict):
self.config = config
Expand Down
1 change: 1 addition & 0 deletions agents/ui_agent/util/process_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re


# is instruction English
def is_english_simple(text):
try:
Expand Down
10 changes: 4 additions & 6 deletions agents/ui_agent/util/som.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
import torch
from ultralytics import YOLO
from PIL import Image
import io
import base64

device = 'cuda'

from PIL import Image, ImageDraw, ImageFont
import numpy as np
import networkx as nx
import numpy as np
from PIL import ImageDraw, ImageFont

# import cv2

font_path = "agents/ui_agent/util/arial.ttf"
Expand Down
36 changes: 14 additions & 22 deletions agents/ui_agent/util/utils.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,18 @@
# from ultralytics import YOLO
import os
import io
import base64
import io
import time
from PIL import Image, ImageDraw, ImageFont
import json
import requests
# utility function
import os

import json
import sys
import os
# utility function
import cv2
import easyocr
import numpy as np

# %matplotlib inline
from matplotlib import pyplot as plt
import easyocr
from paddleocr import PaddleOCR
from PIL import Image

reader = easyocr.Reader(['en'])
paddle_ocr = PaddleOCR(
lang='en', # other lang also available
Expand All @@ -28,26 +23,23 @@
use_dilation=True, # improves accuracy
det_db_score_mode='slow', # improves accuracy
rec_batch_num=1024)
import time
import base64

import os
import ast
from typing import List, Tuple, Union

import supervision as sv
import torch
from typing import Tuple, List, Union
import torchvision.transforms as T
from torchvision.ops import box_convert
import re
from torchvision.transforms import ToPILImage
import supervision as sv
import torchvision.transforms as T
from util.box_annotator import BoxAnnotator

from util.box_annotator import BoxAnnotator


def get_caption_model_processor(model_name, model_name_or_path="Salesforce/blip2-opt-2.7b", device=None):
if not device:
device = "cuda" if torch.cuda.is_available() else "cpu"
if model_name == "blip2":
from transformers import Blip2Processor, Blip2ForConditionalGeneration
from transformers import Blip2ForConditionalGeneration, Blip2Processor
processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
if device == 'cpu':
model = Blip2ForConditionalGeneration.from_pretrained(
Expand All @@ -58,7 +50,7 @@ def get_caption_model_processor(model_name, model_name_or_path="Salesforce/blip2
model_name_or_path, device_map=None, torch_dtype=torch.float16
).to(device)
elif model_name == "florence2":
from transformers import AutoProcessor, AutoModelForCausalLM
from transformers import AutoModelForCausalLM, AutoProcessor
processor = AutoProcessor.from_pretrained("microsoft/Florence-2-base", trust_remote_code=True)
if device == 'cpu':
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, torch_dtype=torch.float32, trust_remote_code=True)
Expand Down
18 changes: 8 additions & 10 deletions data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
# datasets
from .epic import epic
# data collators
from .data_collator import DataCollatorForHFDataset, DataCollatorForSupervisedDataset

# (joint) datasets
from .dataset import build_joint_dataset
from .ego4d import ego4d
from .epic import epic
from .llava import llava
from .magma import magma
from .openx import openx
from .openx_magma import openx_magma
from .magma import magma
from .llava import llava
from .seeclick import seeclick

# (joint) datasets
from .dataset import build_joint_dataset

# data collators
from .data_collator import DataCollatorForSupervisedDataset
from .data_collator import DataCollatorForHFDataset
Loading