In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
import json
import math
from scipy.cluster.hierarchy import fcluster, linkage
from collections import defaultdict
from tqdm import tqdm

In [2]:
data = [json.loads(l) for l in open('/mnt/ssd1/hang/MaskDINO/notebooks/demo_mask_images/ebay_luggages-wheel_masks_data_1.json').readlines()]
print(len(data))

12690


In [3]:
data[1]

{'img_path': '/MaskDINO/total_data/thang/crawl/SIA_scratch/ebay_crawl_used_luggage_images/images/Plastic/Plastic--155450733300--https_~~i.ebayimg.com~images~g~zEkAAOSwlmNkDgOe~s-l1600.webp',
 'wheel_box': [[127.0594711303711,
   1177.810546875,
   284.2583312988281,
   1205.316650390625]],
 'wheel_scores': [0.7841631174087524],
 'wheel_mask': [[[253, 1149],
   [252, 1150],
   [246, 1150],
   [245, 1151],
   [234, 1151],
   [233, 1152],
   [228, 1152],
   [227, 1153],
   [221, 1153],
   [220, 1154],
   [217, 1154],
   [216, 1155],
   [213, 1155],
   [212, 1156],
   [210, 1156],
   [209, 1157],
   [207, 1157],
   [206, 1158],
   [204, 1158],
   [203, 1159],
   [200, 1159],
   [199, 1160],
   [197, 1160],
   [196, 1161],
   [194, 1161],
   [193, 1162],
   [192, 1162],
   [191, 1163],
   [189, 1163],
   [188, 1164],
   [186, 1164],
   [185, 1165],
   [183, 1165],
   [182, 1166],
   [180, 1166],
   [179, 1167],
   [178, 1167],
   [177, 1168],
   [176, 1168],
   [175, 1169],
   [174, 1169],


In [4]:
def plot_box(img, box):
#     if isinstance(img, str):
#         img = cv2.imread(img.replace('/MaskDINO/total_data/', '/data2/'))
#         img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    img = cv2.rectangle(img, box[0:2], box[2:], (0,255,255), 2)
    return img

def plot_body_wheel(dpoint):
    img_path = dpoint['img_path'].replace('/MaskDINO/total_data/', '/data2/')
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    wheel_boxes = np.array(dpoint['wheel_box']).astype('i')
    wheel_scores = dpoint['wheel_scores']
    
    body_boxes = np.array(dpoint['body_box']).astype('i')
#     colors = np.random.randint(0, 255, (len(wheel_boxes), 3))
    for i, (w_b, w_s) in enumerate(zip(wheel_boxes, wheel_scores)):
        c = (255,255,0)
        img = cv2.rectangle(img, w_b[0:2], w_b[2:], c, 2)
        img = cv2.putText(img, str(round(w_s,2)), w_b[0:2], cv2.FONT_HERSHEY_SIMPLEX, 1, c, 2, cv2.LINE_AA)
    for b_b in body_boxes:
        img = cv2.rectangle(img, b_b[0:2], b_b[2:], (255,255,0), 2)
    return img
    
def distance_between_2_points(p1, p2):  
    x1, y1 = p1
    x2, y2 = p2
    distance = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
    return distance
def pick_nearest_point(box, body_box):
    body_center = (body_box[0]+(body_box[2]-body_box[0])/2, body_box[1]+(body_box[3]-body_box[1])/2)
    
    p1 = (box[0], box[1])
    p2 = (box[2], box[1])
    p3 = (box[2], box[3])
    p4 = (box[0], box[3])
    points = [p1,p2,p3,p4]
    
    d = [distance_between_2_points(p, body_center) for p in points]
    nearest_point = d.index(min(d))
    farest_point = d.index(max(d))
    return nearest_point, farest_point

def expand_box(box, near, far, bodybox, expand_box_thres):
    bodyW = bodybox[2]-bodybox[0]
    
    p1 = np.array((box[0], box[1]))
    p2 = np.array((box[2], box[1]))
    p3 = np.array((box[2], box[3]))
    p4 = np.array((box[0], box[3]))
    points = [p1,p2,p3,p4]
    w = box[2]-box[0]
    h = box[3]-box[1]
    
    n_p = points[near]
    f_p = points[far]
    
    if near == 0:
        n_coef = (-1,-1)
    elif near == 1:
        n_coef = (1,-1)
    elif near == 2:
        n_coef = (1,1)
    elif near == 3:
        n_coef = (-1,1)
        
    n_coef = np.array(n_coef)
    f_coef = -1*n_coef
    
     
    
    near_ratio = min(expand_box_thres[0], 2/3*((expand_box_thres[1]*bodyW/w)-1))
    far_ratio = near_ratio/2
#     print('far_ratio:', far_ratio, 'near_ratio:', near_ratio)
    
    near_del_xy = (near_ratio*n_coef*np.array(w,h))
    far_del_xy = far_ratio*f_coef*np.array(w,h)
    new_near = np.add(n_p, near_del_xy)
    new_far = np.add(f_p, far_del_xy)
    new_box = [min(new_near[0], new_far[0]), min(new_near[1], new_far[1]), 
               max(new_near[0], new_far[0]), max(new_near[1], new_far[1])]
    new_box=np.array(new_box, dtype='i')
    
    return new_box
def refine_box_coord(boxes, image):#box: xyxy
    img_h, img_w = image.shape[:2]
    boxes = np.array(boxes)
    
    boxes[:,0][boxes[:,0] < 0] = 0
    boxes[:,1][boxes[:,1] < 0] = 0
    boxes[:,2][boxes[:,2] > img_w] = img_w
    boxes[:,3][boxes[:,3] > img_h] = img_h
    return boxes
    
    
def check_box_on_img_boundary(image, box):
    img_h, img_w = image.shape[0:2]
    p1 = [box[0], box[1]]
    p2 = [box[2], box[1]]
    p3 = [box[2], box[3]]
    p4 = [box[0], box[3]]
    points = np.array([p1,p2,p3,p4])
#     print(points[:,0])
#     print('---',np.any(points[:,0]<=0))
#     print('---',np.any(points[:,0]>=img_w))
#     print('---',np.any(points[:,1]<=0))
#     print('---',np.any(points[:,1]>=img_h))
    sum_bound = np.sum(np.array([np.any(points[:,0]<=0),
                np.any(points[:,0]>=img_w),
                np.any(points[:,1]<=0),
                np.any(points[:,1]>=img_h)]))
#     print('sum bound:', sum_bound)
    return sum_bound
    
def cluster_boxes_func(boxes): # xywh
#     dis = 
    box_centers = np.array([(x1/2 + x2 / 2, y1/2 + y2 / 2) for x1, y1, x2, y2 in boxes])

    distance_threshold = max([b[2]-b[0] for b in boxes] + [b[3]-b[1] for b in boxes])  # Maximum distance to consider boxes in the same cluster
    Z = linkage(box_centers, method='single', metric='euclidean')
    cluster_labels = fcluster(Z, t=distance_threshold, criterion='distance')

    # Group box indices by cluster labels
    clusters = {}
    for idx, label in enumerate(cluster_labels):
        if label not in clusters:
            clusters[label] = []
        clusters[label].append(idx)  # Append index of the box
    
    return clusters

def get_bounding_box(boxes):
    """
    Calculate the bounding box that encloses two boxes.

    Parameters:
    box1 (tuple): Coordinates of the first box (x1, y1, x2, y2).
    box2 (tuple): Coordinates of the second box (x1, y1, x2, y2).

    Returns:
    tuple: Coordinates of the bounding box (x_min, y_min, x_max, y_max).
    """
    boxes = np.array(boxes)
#     print('boxes:',boxes)
    x_min = np.min(boxes[:,0])
    y_min = np.min(boxes[:,1])
    x_max = np.max(boxes[:,2])
    y_max = np.max(boxes[:,3])

    return [x_min, y_min, x_max, y_max]
class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return super().default(obj)

In [74]:
img_w = 50
img_h = 50
bbb = np.array([[-1,2,100,10],[10,10,70,30]])
bbb = np.array(bbb)
bbb[:,0][bbb[:,0] < 0] = 0
bbb[:,1][bbb[:,1] < 0] = 0
bbb[:,2][bbb[:,2] > img_w] = img_w
bbb[:,3][bbb[:,3] > img_h] = img_h

print(bbb)

# bbb[:,0] = np.max(bbb[:,0],axis=-1, initial=0)
# print(bbb[:,0])


# bbb[:,2] = np.min(bbb[:,2], img_w)

# bbb[:,1] = np.max(0, bbb[:,1])
# bbb[:,3] = np.min(boxbbbebbbs[:,3], img_h)
# bbb

[[ 0  2 50 10]
 [10 10 50 30]]


In [37]:
# for id in range(30,35,1):
#     plot_body_wheel(data[id])

In [None]:
id = 32
data = [json.loads(l) for l in open('/mnt/ssd1/hang/MaskDINO/notebooks/demo_mask_images/ebay_luggages-wheel_masks_data_1.json').readlines()]

In [8]:
new_data = []
# new_data = defaultdict(list)

for id in tqdm(range(0,len(data),1)):
#     print('----------------', id, '---------------')
    wheel_boxes = np.array(data[id]['wheel_box']).astype('i')
    body_boxes = np.array(data[id]['body_box']).astype('i')
    wheel_scores = np.array(data[id]['wheel_scores'])
#     print('wheel_boxes',wheel_boxes)
#     print('body_boxes',body_boxes)
#     image = cv2.imread(data[id]['img_path'].replace('/MaskDINO/total_data/', '/data2/'))
    image = plot_body_wheel(data[id])
    try:
        main_body_id = max(range(len(body_boxes)), key=lambda i: (body_boxes[i][2]-body_boxes[i][0])*(body_boxes[i][3]-body_boxes[i][1]))
    except:
        continue
    main_body = body_boxes[main_body_id]
    main_body_mask = np.array(data[id]['body_mask'][main_body_id]).astype('i')
#     print('main_body',main_body)
    # cluster double wheel
    if len(wheel_boxes) > 1:
        clusters = cluster_boxes_func(wheel_boxes)
    else:
        clusters = {1: [0]}
#     print('clusters:',clusters)
    group_wheel_boxes = []
    group_scores = []
    for grp, ids in clusters.items():
        bbox = get_bounding_box(wheel_boxes[ids])
#         print('bbox:',bbox)
        score = np.max(wheel_scores[ids])
        
        group_wheel_boxes.append(bbox)
        group_scores.append(score)
#     data[id]['wheel_box'] = group_wheel_boxes
#     data[id]['wheel_scores'] = group_scores
#     print('group_wheel_boxes:', group_wheel_boxes)
    group_wheel_boxes = refine_box_coord(group_wheel_boxes, image)
    
    data_line = {}
    data_line['id']=id
    data_line['img_path'] = data[id]['img_path'].replace('/MaskDINO/total_data/', '/data2/')
    data_line['pos_wheel_context_box'] = []
    data_line['reliable_wheel_box'] = []
    data_line['all_wheel_box'] = group_wheel_boxes
    
    sign = 0
    for wheel_box, w_s in zip(group_wheel_boxes, group_scores):
        new_image = plot_box(image, wheel_box)
        box_on_boundary = check_box_on_img_boundary(image, wheel_box)
#         print('box_on_boundary:',box_on_boundary)
        if w_s > 0.5 and box_on_boundary <2:
            sign= 1
            near, far = pick_nearest_point(wheel_box, main_body)
#             print('near', near,'far', far)
            expand_box_thres = (6,2/3) if box_on_boundary==1 else (4,1/2)
            new_box = expand_box(wheel_box, near,far, main_body,expand_box_thres)
            new_box = refine_box_coord([new_box], image)[0]
            
            data_line['pos_wheel_context_box'].append(new_box)
            data_line['reliable_wheel_box'].append(wheel_box)
#             print('new_box',new_box, image.shape[:2])
#             new_image = plot_box(image, new_box)
    
#         plt.imshow(new_image)
#         plt.show()
#     print(sign)
    if sign==1:
        
        data_line['body_box'] = main_body
        data_line['body_mask'] = main_body_mask
        
        with open('ebay_luggages_pos_wheel_context_box.json', 'a', encoding='utf-8') as f:
            json.dump(data_line, f, ensure_ascii=False, cls = NumpyEncoder)
            f.write('\n')

  near_ratio = min(expand_box_thres[0], 2/3*((expand_box_thres[1]*bodyW/w)-1))
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12690/12690 [06:32<00:00, 32.36it/s]


In [108]:
new_data

[{'id': 357,
  'img_path': '/data2/thang/crawl/SIA_scratch/ebay_crawl_used_luggage_images/images/Plastic/Plastic--185657723259--https_~~i.ebayimg.com~images~g~PuMAAOSwQFtjcls6~s-l1600.webp',
  'pos_box': [array([ 628, 1170, 1146, 1600], dtype=int32),
   array([ 669,   66, 1200,  718], dtype=int32)],
  'body_box': array([ 123,  223, 1160, 1560], dtype=int32),
  'body_mask': array([[434, 191],
         [433, 192],
         [376, 192],
         ...,
         [456, 192],
         [437, 192],
         [436, 191]], dtype=int32),
  'wheel_box': array([[ 833, 1375, 1044, 1580],
         [ 991,  227, 1200,  397]], dtype=int32)}]

In [106]:
data[0]

{'img_path': '/MaskDINO/total_data/thang/crawl/SIA_scratch/ebay_crawl_used_luggage_images/images/Plastic/Plastic--175968754497--https_~~i.ebayimg.com~images~g~vC4AAOSwgIRjsSMv~s-l1600.webp',
 'wheel_box': [[146.24392700195312,
   1276.997314453125,
   187.77134704589844,
   1307.756103515625]],
 'wheel_scores': [0.24606408178806305],
 'wheel_mask': [[[150, 1248],
   [148, 1250],
   [148, 1262],
   [147, 1263],
   [147, 1266],
   [146, 1267],
   [146, 1268],
   [143, 1271],
   [143, 1272],
   [142, 1273],
   [142, 1276],
   [144, 1278],
   [153, 1278],
   [154, 1277],
   [155, 1277],
   [156, 1276],
   [159, 1276],
   [160, 1275],
   [165, 1275],
   [166, 1274],
   [168, 1274],
   [171, 1271],
   [172, 1271],
   [173, 1270],
   [174, 1270],
   [175, 1269],
   [176, 1269],
   [176, 1266],
   [175, 1266],
   [174, 1267],
   [172, 1267],
   [171, 1266],
   [160, 1266],
   [159, 1267],
   [158, 1267],
   [158, 1269],
   [156, 1271],
   [154, 1271],
   [153, 1270],
   [153, 1268],
   [156, 1

In [19]:
new_near[1]

array([ 664.75, 1083.75])

In [8]:
n_coef = np.array((-1,-1))
f_coef = -1*n_coef
print(n_coef, f_coef)

[-1 -1] [1 1]


In [10]:
1.5*n_coef*(np.array((20,30)))

array([-30., -45.])