In [1]:
import re
import json
from eval_utils import *
import numpy as np
import os

import BboxToolkit as bt


In [2]:
import re

import inflect

# Create an engine instance
convert = inflect.engine()

def find_numbers(text):
    # Define a pattern to match numerical digits
    number_pattern = r"\b\d+\b"

    # Define a pattern to match number words (adjust according to needed range or specifics)
    word_pattern = r"\b(one|two|three|four|five|six|seven|eight|nine|ten)\b"

    # Find all occurrences of numerical digits
    numbers = re.findall(number_pattern, text)

    # Find all occurrences of number words
    number_words = re.findall(word_pattern, text, flags=re.IGNORECASE)

    # You might want to convert found words to their corresponding numbers
    word_to_number = {
        "one": 1, "two": 2, "three": 3, "four": 4, "five": 5,
        "six": 6, "seven": 7, "eight": 8, "nine": 9, "ten": 10
    }
    numbers.extend(str(word_to_number[word.lower()]) for word in number_words)

    return numbers

In [3]:
import numpy as np
import shapely.geometry as shgeo

from BboxToolkit import bbox2type
from BboxToolkit import get_bbox_type

def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False, eps=1e-6):
    assert mode in ['iou', 'iof']
    assert get_bbox_type(bboxes1) != 'notype'
    assert get_bbox_type(bboxes2) != 'notype'
    rows = bboxes1.shape[0]
    cols = bboxes2.shape[0]
    if is_aligned:
        assert rows == cols

    if rows * cols == 0:
        return np.zeros((rows, 1), dtype=np.float32) \
                if is_aligned else np.zeros((rows, cols), dtype=np.float32)

    hbboxes1 = bbox2type(bboxes1, 'hbb')
    hbboxes2 = bbox2type(bboxes2, 'hbb')
    if not is_aligned:
        hbboxes1 = hbboxes1[:, None, :]
    lt = np.maximum(hbboxes1[..., :2], hbboxes2[..., :2])
    rb = np.minimum(hbboxes1[..., 2:], hbboxes2[..., 2:])
    wh = np.clip(rb - lt, 0, np.inf)
    h_overlaps = wh[..., 0] * wh[..., 1]

    if get_bbox_type(bboxes1) == 'hbb' and get_bbox_type(bboxes2) == 'hbb':
        overlaps = h_overlaps
        areas1 = (hbboxes1[..., 2] - hbboxes1[..., 0]) * (
            hbboxes1[..., 3] - hbboxes1[..., 1])

        if mode == 'iou':
            areas2 = (hbboxes2[..., 2] - hbboxes2[..., 0]) * (
                hbboxes2[..., 3] - hbboxes2[..., 1])
            unions = areas1 + areas2 - overlaps
        else:
            unions = areas1


    else:
        polys1 = bbox2type(bboxes1, 'poly')
        polys2 = bbox2type(bboxes2, 'poly')
        sg_polys1 = [shgeo.Polygon(p) for p in polys1.reshape(rows, -1, 2)]
        sg_polys2 = [shgeo.Polygon(p) for p in polys2.reshape(cols, -1, 2)]

        overlaps = np.zeros(h_overlaps.shape)
        for p in zip(*np.nonzero(h_overlaps)):
            overlaps[p] = sg_polys1[p[0]].intersection(sg_polys2[p[-1]]).area

        if mode == 'iou':
            unions = np.zeros(h_overlaps.shape, dtype=np.float32)
            for p in zip(*np.nonzero(h_overlaps)):
                unions[p] = sg_polys1[p[0]].union(sg_polys2[p[-1]]).area
        else:
            unions = np.array([p.area for p in sg_polys1], dtype=np.float32)
            if not is_aligned:
                unions = unions[..., None]

    unions = np.clip(unions, eps, np.inf)
    outputs = overlaps / unions
    if outputs.ndim == 1:
        outputs = outputs[..., None]
    return outputs, overlaps, unions

def corner_to_obb(corners):
    # Extract x1, y1, x2, y2, angle from the corners array
    x1 = corners[:, 0]
    y1 = corners[:, 1]
    x2 = corners[:, 2]
    y2 = corners[:, 3]
    angle = corners[:, 4]
    angle = angle % 180
    
    theta_rad = np.radians(angle)

    # Calculate the midpoint (center of the bounding box)
    cx = (x1 + x2) / 2.0
    cy = (y1 + y2) / 2.0

    # Calculate distance (width and height) between points
    # Width is along the major axis after rotation
    dx = x2 - x1
    dy = y2 - y1
    w = np.abs(np.sqrt(dx**2 + dy**2) * np.cos(theta_rad))
    h = np.abs(np.sqrt(dx**2 + dy**2) * np.sin(theta_rad))

    # if w<h: # [45,135]
    #     w,h = h,w
    #     angle = 90 - angle

    w, h = np.maximum(w, h), np.minimum(w, h)
    
    # Combine results into a single 2D array
    angle = angle % 180
    results = np.vstack((cx, cy, w, h, angle)).T

    return results


In [14]:

output_dir = '../../outputs/outputs_RSBench_new/'
# output_dir = '../../outputs/outputs_RSBench_obb_v2_new/'

# output_version =  '_v1' 
output_version =  '_v2'

In [None]:
data_path= os.path.join(output_dir, f'rsbench_referring{output_version}.json')
# data_path= os.path.join(output_dir, f'rsvgd.json')

thres_list = [0.5, 0.7]
count = np.zeros(len(thres_list))

cumI = 0
cumU = 0
mean_IoU = 0

total_count = 0
valid_count = 0

use_size_group = False
size_list = ['small', 'medium', 'large']
unique_check = [1,0]
rot_errs = []

with open(data_path, 'r') as file:
    for line in file:
        # Convert JSON string to Python dictionary
        item = json.loads(line.strip())
        img_id = item['image_id']
        is_unique =  item['is_unique']
        if use_size_group:
            obj_size = item['size_group']
            obj_size = 'medium' if obj_size == '' else obj_size
        else:
            obj_size = 'medium'

        if not (is_unique in unique_check and (obj_size in size_list)):
            continue
        
        total_count += 1

        if 'obb_v1' in output_dir: 
            integers = re.findall(r'\d+', item['obb_v1'])
        elif 'obb_v2' in output_dir:
            integers = re.findall(r'\d+', item['obb_v2'])
        else:
            integers = re.findall(r'\d+', item['ground_truth'])
        gt_bbox = np.array([int(num) for num in integers])[np.newaxis,:]
        
        integers = re.findall(r'\d+', item['answer'])
        pred_bbox = np.array([int(num) for num in integers])[np.newaxis,:]
        
        try:
            if 'obb_v1' in output_dir and len(integers) > 4:
                rot_errs.append(pred_bbox[0,4] % 180 - gt_bbox[0,4] % 180)
                iou_score, I, U = bbox_overlaps(gt_bbox, pred_bbox)
            elif 'obb_v2' in output_dir and len(integers) > 4:
                gt_bbox = corner_to_obb(gt_bbox)
                pred_bbox = corner_to_obb(pred_bbox)
                rot_errs.append(pred_bbox[0,4] - gt_bbox[0,4])
                iou_score, I, U = bbox_overlaps(gt_bbox, pred_bbox)
            elif 'obb' not in output_dir:
                # gt_bbox = gt_bbox[:,:4]
                # pred_bbox = pred_bbox[:,:4]
                # iou_score, I, U = bbox_overlaps(gt_bbox, pred_bbox)
                gt_bbox = gt_bbox[0,:4].tolist()
                pred_bbox = pred_bbox[0,:4].tolist()
                iou_score, I, U = computeIoU(gt_bbox, pred_bbox, return_iou=True)
                iou_score = [iou_score]
                I = [I]
                U = [U]
            else:
                print('invalid output', img_id, pred_bbox, gt_bbox, flush=True)
                continue

            iou_score = iou_score[0]
            I = I[0]
            U = U[0]

            mean_IoU += iou_score
            cumI += I
            cumU += U
        
            # print(img_id, iou_score, flush=True)
            for ii, thres in enumerate(thres_list):
                if iou_score >= thres:
                    count[ii]+=1
            valid_count += 1

        except Exception as e:
            print(e, flush=True)
            print('invalid output', img_id, pred_bbox, gt_bbox, flush=True)


print('number of total/valid samples:', total_count, valid_count)
for ii, thres in enumerate(thres_list):
    print(f'Acc at iou_{thres}:', count[ii] / total_count * 100, flush=True)
print(f'meanIoU: {mean_IoU/total_count * 100}, cumIoU: {cumI/cumU * 100}, rot_err: {np.mean(np.abs(rot_errs))}', flush=True)

# Captioning

In [None]:
data_path = os.path.join(output_dir, f'rsbench_cap{output_version}.json')
# data_path = '../../outputs/GPT4_eval/gpt4_cap.json'

gt_answers= []
pred_answers = []
with open(data_path, 'r') as file:
    for line in file:
        item = json.loads(line.strip())
        img_id = item['image_id']
        # if '09194_0000' in data_path:
        #     print(item)

        gt_ans = item['ground_truth'].strip().replace('\n', ' ')
        pred_ans = item['answer'].strip().replace('\n', ' ')

        if img_id is None or img_id=='\n' or pred_ans is None or pred_ans=='\n':
            print('empty', img_id, pred_ans)
            continue

        gt_answers.append([img_id, gt_ans])
        pred_answers.append([img_id, pred_ans])

print('number of captions', len(gt_answers))

np.savetxt('pred_cap.txt',  pred_answers, fmt='%s', delimiter='\t')
np.savetxt('gt_cap.txt',  gt_answers, fmt='%s', delimiter='\t')

In [None]:
cap_len = [len(ans[1].split()) for ans in pred_answers]
print('avg len', np.mean(cap_len), np.std(cap_len))

In [None]:
# conda activate YOUR_PYTHON_ENV
# cd geochat/eval/caption_eval/
# python create_json_references.py -i ../gt_cap.txt -o ../gt_cap.json
# python run_evaluations.py -i ../pred_cap.txt  -r ../gt_cap.json