首先筛选所有predTag为1，即需要保留的box，存储在新的表中，简化列的信息

In [43]:
import pandas as pd
from shapely import box
from shapely import unary_union

df = pd.read_csv('merged.csv')
df = df[df['predTag'] == 1]
# columns_to_drop = ['Unnamed: 0', 'Tag', 'Type', 'Train Comparison', 'Test Comparison']
# df = df.drop(columns=columns_to_drop)

计算同一个ImageID下的box是否有IOU

In [44]:
data = df
# 用于存储结果的数据框
result = pd.DataFrame(columns=data.columns)

In [45]:
# 计算矩形的左上角和右下角坐标
def get_corners(row):
    x1 = row['x center'] - row['width'] / 2
    y1 = row['y center'] - row['height'] / 2
    x2 = row['x center'] + row['width'] / 2
    y2 = row['y center'] + row['height'] / 2
    return x1, y1, x2, y2

合并矩形

In [46]:
# 计算两个矩形的重叠面积
def compute_iou(row1, row2):
    x1_1, y1_1, x2_1, y2_1 = get_corners(row1)
    x1_2, y1_2, x2_2, y2_2 = get_corners(row2)

    xi1 = max(x1_1, x1_2)
    yi1 = max(y1_1, y1_2)
    xi2 = min(x2_1, x2_2)
    yi2 = min(y2_1, y2_2)

    inter_width = max(0, xi2 - xi1)
    inter_height = max(0, yi2 - yi1)
    inter_area = inter_width * inter_height

    area1 = (x2_1 - x1_1) * (y2_1 - y1_1)
    area2 = (x2_2 - x1_2) * (y2_2 - y1_2)

    union_area = area1 + area2 - inter_area

    iou = inter_area / union_area if union_area != 0 else 0
    return inter_area, union_area, iou

In [47]:
# 计算最小外接矩形
def get_minimum_bounding_rect(rows):
    x1 = min(row['x center'] - row['width'] / 2 for _, row in rows.iterrows())
    y1 = min(row['y center'] - row['height'] / 2 for _, row in rows.iterrows())
    x2 = max(row['x center'] + row['width'] / 2 for _, row in rows.iterrows())
    y2 = max(row['y center'] + row['height'] / 2 for _, row in rows.iterrows())

    new_width = x2 - x1
    new_height = y2 - y1
    new_x_center = x1 + new_width / 2
    new_y_center = y1 + new_height / 2

    return new_x_center, new_y_center, new_width, new_height

In [48]:
# @title 默认标题文本
# 使用深度优先搜索来查找重叠的矩形群组
def find_overlapping_groups(group):
    n = len(group)
    visited = [False] * n
    adj_list = {i: [] for i in range(n)}

    # 构建邻接表
    for i in range(n):
        for j in range(i + 1, n):
            _, _, iou = compute_iou(group.loc[i], group.loc[j])
            if iou > 0.7:
                adj_list[i].append(j)
                adj_list[j].append(i)

    # 使用DFS找到所有重叠的群组
    def dfs(node):
        stack = [node]
        component = []
        while stack:
            v = stack.pop()
            if not visited[v]:
                visited[v] = True
                component.append(v)
                for neighbor in adj_list[v]:
                    if not visited[neighbor]:
                        stack.append(neighbor)
        return component

    groups = []
    for i in range(n):
        if not visited[i]:
            component = dfs(i)
            groups.append(component)

    return groups

In [49]:
# 按ImageID进行分组处理
for image_id, group in data.groupby('ImageID'):
    group = group.reset_index(drop=True)
    overlapping_groups = find_overlapping_groups(group)

    for component_indices in overlapping_groups:
        component = group.loc[component_indices]
        if len(component) == 1:
            # 没有重叠，直接添加到结果中
            result = pd.concat([result, component], ignore_index=True)
        else:
            # 处理重叠的矩形
            label_ids = component['labelID'].tolist()
            if all(label_id == 9 for label_id in label_ids):
                # 所有labelID都为9
                new_x_center, new_y_center, new_width, new_height = get_minimum_bounding_rect(component)
                new_confidence = max(component['confidence'])
                new_area = new_width * new_height
                new_row = pd.Series({
                    'x center': new_x_center,
                    'y center': new_y_center,
                    'width': new_width,
                    'height': new_height,
                    'confidence': new_confidence,
                    'ImageID': image_id,
                    'labelID': 9,
                    'Area': new_area,
                    'boxID': min(component['boxID']),
                    'predTag': component.iloc[0]['predTag']
                })
                result = pd.concat([result, new_row.to_frame().T], ignore_index=True)

            elif all(label_id != 9 for label_id in label_ids):
                # 所有labelID都不为9
                new_x_center, new_y_center, new_width, new_height = get_minimum_bounding_rect(component)
                max_confidence_row = component.loc[component['confidence'].idxmax()]
                new_labelID = max_confidence_row['labelID']
                new_confidence = max_confidence_row['confidence']
                new_area = new_width * new_height
                new_row = pd.Series({
                    'x center': new_x_center,
                    'y center': new_y_center,
                    'width': new_width,
                    'height': new_height,
                    'confidence': new_confidence,
                    'ImageID': image_id,
                    'labelID': new_labelID,
                    'Area': new_area,
                    'boxID': min(component['boxID']),
                    # 'predTag': component.iloc[0]['predTag']
                })
                result = pd.concat([result, new_row.to_frame().T], ignore_index=True)

            else:
                # 混合labelID的情况
                label9_rows = component[component['labelID'] == 9]
                not_label9_rows = component[component['labelID'] != 9]
                label9_confidences = label9_rows['confidence'].tolist()
                not_label9_confidences = not_label9_rows['confidence'].tolist()

                if max(label9_confidences) - max(not_label9_confidences) > 0.6:
                    new_labelID = 9
                else:
                    new_labelID = not_label9_rows.loc[not_label9_rows['confidence'].idxmax()]['labelID']

                new_x_center, new_y_center, new_width, new_height = get_minimum_bounding_rect(component)
                new_confidence = max(component['confidence'])
                new_area = new_width * new_height
                new_row = pd.Series({
                    'x center': new_x_center,
                    'y center': new_y_center,
                    'width': new_width,
                    'height': new_height,
                    'confidence': new_confidence,
                    'ImageID': image_id,
                    'labelID': new_labelID,
                    'Area': new_area,
                    'boxID': min(component['boxID']),
                    # 'predTag': component.iloc[0]['predTag']
                })
                result = pd.concat([result, new_row.to_frame().T], ignore_index=True)

# 保存结果
# result.to_csv('merged2.csv', index=False)

  result = pd.concat([result, component], ignore_index=True)


In [50]:
# 读取CSV文件
data = pd.read_csv('filtered_predTag_1.csv')

# 定义一个函数来判断两个矩形是否重叠
def is_overlapping(box1, box2):
    return box1.intersects(box2)
def calculate_iou(rect1, rect2):
    """
    Calculate Intersection over Union (IoU) of two shapely Polygon objects.

    Parameters:
    rect1 (Polygon): First shapely Polygon object.
    rect2 (Polygon): Second shapely Polygon object.

    Returns:
    float: IoU value.
    """
    
    # Calculate the intersection area
    inter_area = rect1.intersection(rect2).area
    
    # Calculate the union area
    union_area = rect1.union(rect2).area
    
    # Calculate the IoU
    iou = inter_area / union_area if union_area > 0 else 0
    
    return iou>=0.7
# 定义一个函数来合并两个矩形
def merge_boxes(*boxes):
    return unary_union([*boxes])

# 处理每个ImageID的数据
def process_image_data(group):
    rects = []
    for idx, row in group.iterrows():
        x_center, y_center, width, height = row['x center'], row['y center'], row['width'], row['height']
        x1 = x_center - width / 2
        y1 = y_center - height / 2
        x2 = x_center + width / 2
        y2 = y_center + height / 2
        rect = box(x1, y1, x2, y2)
        print(rect)
        print("rect")
        rects.append((rect, row))

    # 合并重叠的矩形
    merged_rects = []
    while rects:
        base_rect, base_row = rects.pop(0)
        to_merge = [base_rect]
        to_merge_rows = [base_row]
        for rect, row in rects[:]:
            if any(calculate_iou(base_rect, r) for r in to_merge):
                to_merge.append(rect)
                to_merge_rows.append(row)
                rects.remove((rect, row))

        if len(to_merge) > 1:
            merged_rect = merge_boxes(*to_merge)
            x1, y1, x2, y2 = merged_rect.bounds
            x_center = (x1 + x2) / 2
            y_center = (y1 + y2) / 2
            width = x2 - x1
            height = y2 - y1
            label_ids = [row['labelID'] for row in to_merge_rows]
            confidences = [row['confidence'] for row in to_merge_rows]

            # 分类匹配和合并规则
            if all(label_id == 9 for label_id in label_ids):
                label_id = 9
            elif all(label_id != 9 for label_id in label_ids):
                max_conf_idx = confidences.index(max(confidences))
                label_id = label_ids[max_conf_idx]
            else:
                label_9_conf = max(confidences[i] for i in range(len(label_ids)) if label_ids[i] == 9)
                not_label_9_conf = max(confidences[i] for i in range(len(label_ids)) if label_ids[i] != 9)
                if label_9_conf - not_label_9_conf > 0.6:
                    label_id = 9
                elif not_label_9_conf - label_9_conf > 0.6:
                    max_conf_idx = confidences.index(max(confidences))
                    label_id = label_ids[max_conf_idx]
                else:
                    label_id = label_ids[confidences.index(max(confidences))]

            confidence = max(confidences)
            merged_rects.append([x_center, y_center, width, height, confidence, base_row['ImageID'], label_id, 0, 0, base_row['predTag']])
        else:
            merged_rects.append([base_row['x center'], base_row['y center'], base_row['width'], base_row['height'], base_row['confidence'], base_row['ImageID'], base_row['labelID'], base_row['Area'], base_row['boxID'], base_row['predTag']])

    return merged_rects

# 对所有数据按ImageID进行处理
grouped = data.groupby('ImageID')
result = []

for name, group in grouped:
    result.extend(process_image_data(group))

# 转换为DataFrame并保存为CSV
result_df = pd.DataFrame(result, columns=['x center', 'y center', 'width', 'height', 'confidence', 'ImageID', 'labelID', 'Area', 'boxID', 'predTag'])
result_df.to_csv('after_pre.csv', index=False)

POLYGON ((0.95635 0.5227999999999999, 0.95635 0.649, 0.8028500000000001 0.649, 0.8028500000000001 0.5227999999999999, 0.95635 0.5227999999999999))
rect
POLYGON ((0.2178 0.34365, 0.2178 0.47315, 0.062 0.47315, 0.062 0.34365, 0.2178 0.34365))
rect
POLYGON ((0.21545 0.3441, 0.21545 0.4687, 0.06135 0.4687, 0.06135 0.3441, 0.21545 0.3441))
rect
POLYGON ((0.21755 0.3456, 0.21755 0.4712, 0.06085 0.4712, 0.06085 0.3456, 0.21755 0.3456))
rect
POLYGON ((0.2181 0.3702, 0.2181 0.5016, 0.0593 0.5016, 0.0593 0.3702, 0.2181 0.3702))
rect
POLYGON ((0.2173 0.37185, 0.2173 0.5015499999999999, 0.0599 0.5015499999999999, 0.0599 0.37185, 0.2173 0.37185))
rect
POLYGON ((0.35805 0.3681, 0.35805 0.5609000000000001, 0.11435 0.5609000000000001, 0.11435 0.3681, 0.35805 0.3681))
rect
POLYGON ((0.9027999999999999 0.4327, 0.9027999999999999 0.6021, 0.6682 0.6021, 0.6682 0.4327, 0.9027999999999999 0.4327))
rect
POLYGON ((0.9037 0.3968, 0.9037 0.6172, 0.6595 0.6172, 0.6595 0.3968, 0.9037 0.3968))
rect
POLYGON ((0.900