In [4]:
import albumentations as A
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os

In [7]:

def apply_perspective_transform(image_path, label_path, output_image_path, output_label_path, scale=(0,0.05)):
    # 读取图像
    image = cv2.imread(image_path)
    height, width = image.shape[:2]

    # 读取标注并转换为 Albumentations 格式
    bboxes = []
    with open(label_path, 'r') as file:
        for line in file:
            class_id, x_center, y_center, bbox_width, bbox_height = map(float, line.split())
            # x_min = (x_center - bbox_width / 2) * width
            # y_min = (y_center - bbox_height / 2) * height
            # x_max = (x_center + bbox_width / 2) * width
            # y_max = (y_center + bbox_height / 2) * height
            # cv2.rectangle(image, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 2)
            # bboxes.append([x_min, y_min, x_max, y_max, class_id])
            bboxes.append([x_center, y_center, bbox_width, bbox_height, class_id])
    # plt.imshow(image)
    # plt.show()

    # 定义透视变换
    transform = A.Compose(
    [A.Perspective(p=1, scale=scale)],
    bbox_params=A.BboxParams(format='yolo'),
    )

    # 应用变换
    transformed = transform(image=image, bboxes=bboxes, class_labels=[bbox[4] for bbox in bboxes])
    transformed_image = transformed['image']
    transformed_bboxes = transformed['bboxes']
    
    # show image
    # plt.imshow(transformed_image)
    # plt.show()
    
    # 绘制变换后的图像和边界框
    # for bbox in transformed_bboxes:
    #     x_center, y_center, bbox_width, bbox_height, class_id = bbox
    #     x_min = (x_center - bbox_width / 2) * width
    #     y_min = (y_center - bbox_height / 2) * height
    #     x_max = (x_center + bbox_width / 2) * width
    #     y_max = (y_center + bbox_height / 2) * height
    #     bboxes.append([x_min, y_min, x_max, y_max, class_id])
        # cv2.rectangle(transformed_image, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 2)

    # plt.imshow(transformed_image)
    # plt.show()
    
    # 保存变换后的图像
    cv2.imwrite(output_image_path, transformed_image)

    # 保存变换后的标注
    with open(output_label_path, 'w') as file:
        for bbox in transformed_bboxes:
            x_center, y_center, bbox_width, bbox_height, class_id = bbox
            file.write(f'{int(class_id)} {x_center} {y_center} {bbox_width} {bbox_height}\n')

import sys
import os
def create_dataset(scale, base="GC10-DET"):
    # mkdir
    base_path = os.path.join('/Data4/student_zhihan_data/data', base)
    name = f"{base}_Transform_Scale_{scale[0]}:{scale[1]}"
    path = os.path.join('/Data4/student_zhihan_data/data', name)
    if not os.path.exists(path):
        os.mkdir(path)
        # create train, val, test directory
        os.mkdir(os.path.join(path, 'train'))
        os.mkdir(os.path.join(path, 'valid'))
        os.mkdir(os.path.join(path, 'test'))
        # copy images
        os.system(f"cp -r /Data4/student_zhihan_data/data/{base}/train/* {path}/train")
        os.system(f"cp -r /Data4/student_zhihan_data/data/{base}/valid/* {path}/valid")
        os.system(f"cp -r /Data4/student_zhihan_data/data/{base}/test/* {path}/test")
        # copy configure file
        os.system(f"cp /Data4/student_zhihan_data/data/{base}/data.yaml {path}")

    # adjust brightness of images in train, val, test directory
    for dir in ['train', 'valid', 'test']:
        for img in os.listdir(os.path.join(path, dir, "images")):
            out_img_path = os.path.join(path, dir, "images", img)
            out_label_path = os.path.join(path, dir, "labels", img[:-4]+".txt")
            in_img_path = os.path.join(base_path, dir, "images", img)
            in_label_path = os.path.join(base_path, dir, "labels", img[:-4]+".txt")
            
            apply_perspective_transform(in_img_path, in_label_path, out_img_path, out_label_path, scale)


# 使用示例
start = np.arange(0,0.3,0.05)
stop = np.arange(0.05, 0.35, 0.05)
# for i in zip(start, stop):
#     print(i)
#     apply_perspective_transform('/Data4/student_zhihan_data/data/GC10-DET/test/images/img_01_425000300_00630_jpg.rf.12001adc8b86faf88a47b6aa6f321b91.jpg', '/Data4/student_zhihan_data/data/GC10-DET/test/labels/img_01_425000300_00630_jpg.rf.12001adc8b86faf88a47b6aa6f321b91.txt', 'path/to/output_image.jpg', 'path/to/output_label.txt',i)

In [8]:
for i in zip(start, stop):
    print(i)
    create_dataset(i)

(0.0, 0.05)
(0.05, 0.1)
(0.1, 0.15000000000000002)
(0.15000000000000002, 0.2)
(0.2, 0.25)
(0.25, 0.3)
