# 将原图处理为Crop后的图像

更新：
- 210607: 图像shape为`(height, width, channel)`，但labelme格式的points中坐标存储为`(x, y)`，其中x对应宽边，y对应高边，需要注意。
- 210608: 训练报错: ERROR - The testing results of the whole dataset is empty.
 - 参考<a>https://www.pythonf.cn/read/171511</a>可能是边框不能和图像边界重叠。
 - 可能是被crop边界截断的标注的点阵顺序有问题，采用每次处理三个点（若处于同一条直线则删除中间点）的方式处理一下。
- 210611: 注意circle标注本就只有两个点
- 210611: 注意每次处理点的时候要进行`deepcopy`，`.copy()`只能复制一层，无法复制子对象

In [10]:

import os
import json
import numpy as np
import glob
import shutil
from sklearn.model_selection import train_test_split
np.random.seed(41)

import math
from PIL import Image
import matplotlib.pyplot as plt
import cv2
import copy

import labelme.utils as lu


def originImageLoader(image_path):
    with open(image_path, encoding='utf-8') as json_file:
        labelmeJson = json.load(json_file)
        
        image = lu.img_b64_to_arr(labelmeJson['imageData'])
        image = image.astype(np.uint8)

        return image

#0为背景
dataset = 'skin'  # face, skin

if dataset == 'face':
    classname_to_id = {"brow": 1, "nose": 2, "mouth": 3}
    labelme_path = "E:/dataset/face/labelme/"
    saved_coco_path = "E:/dataset/face/"
elif dataset == 'skin':
    classname_to_id = {"closed_comedo": 1, "open_comedo": 2, "papule": 3, "pustule": 4, "nodule": 5,
        "atrophic_scar": 6, "hypertrophic_scar": 7, "melasma": 8, "nevus": 9, "other": 10}
    labelme_path = "E:/data_raw/skin/labelme"
    saved_coco_path = "E:/dataset/skin/"
    
class Lableme2CoCo:

    def __init__(self):
        self.images = []
        self.annotations = []
        self.categories = []
        self.img_id = 0
        self.ann_id = 0

    def save_coco_json(self, instance, save_path):
        json.dump(instance, open(save_path, 'w', encoding='utf-8'), ensure_ascii=False, indent=1)  # indent=2 更加美观显示

    def to_coco(self, json_path_list):
        '''
        由json文件构建COCO，不包括数据强化augment
        '''
        self._init_categories()
        for json_path in json_path_list:
            obj = self.read_jsonfile(json_path)
            # 处理图像信息
            self.images.append(self._image(obj, json_path))
            # 处理标注信息
            shapes = obj['shapes']
            for shape in shapes:
                annotation = self._annotation(shape)
                self.annotations.append(annotation)
                self.ann_id += 1
            self.img_id += 1
        instance = {}
        instance['info'] = 'spytensor created'
        instance['license'] = ['license']
        instance['images'] = self.images
        instance['annotations'] = self.annotations
        instance['categories'] = self.categories
        return instance
    
    def to_aug_coco(self, json_path_list, img_save_pre):
        '''
        由json文件构建COCO，不包括数据强化augment
        '''
        print('to_aug_coco start')
        self._init_categories()
        for json_path in json_path_list:
            print('handling: ', json_path)
            obj = self.read_jsonfile(json_path)
            sub_image_list, sub_annotation_list = self._relative_crop(obj = obj, 
                                                                      json_path = json_path, 
                                                                      img_save_pre = img_save_pre)
            self.images += sub_image_list
            self.annotations += sub_annotation_list
            
        instance = {}
        instance['info'] = 'spytensor created'
        instance['license'] = ['license']
        instance['images'] = self.images
        instance['annotations'] = self.annotations
        instance['categories'] = self.categories
        return instance
    
    def _relative_crop(self, obj, json_path, img_save_pre, img_scale=(2048,2048)):
        '''
        相对剪裁，类似于patch-based（好像是吧）,同时保存
        '''
        sub_image_list = []
        sub_annotation_list = []
        temp_block = []
        
        image_info = self._image(obj, json_path)
        # 处理图像信息
        num_h, num_w = math.ceil(image_info['height'] / img_scale[0]), math.ceil(image_info['width'] / img_scale[1])
        space_h = (image_info['height'] - img_scale[0]) / (num_h - 1)
        space_w = (image_info['width'] - img_scale[1]) / (num_w - 1)
        start_img_id = self.img_id
        
        img_numpy = originImageLoader(json_path)
        # 显示标注与图像
        '''
        img_whole = img_numpy.copy()
        shapes = obj['shapes']
        for shape in shapes:
            [x, y, w, h] = self._get_box(shape['points'])
            left_top = (int(x), int(y))
            right_bottom = (int(x+w), int(y+h))
            cv2.rectangle(img_whole, left_top, right_bottom, (0,255,0), thickness=3)
        plt.figure(figsize=(16,12))
        plt.imshow(img_whole)
        plt.show()
        '''
        print('image_info',image_info['height'],',',image_info['width'],'img_numpy: ',img_numpy.shape)
        for i in range(num_h):
            for j in range(num_w):
                image = {}
                image['height'], image['width'] = img_scale[0], img_scale[1]
                image['id'] = self.img_id + i * num_w + j
                image['file_name'] = os.path.basename(json_path).replace(".json", "")
                image['file_name'] += '_crop' + str(i * num_w + j) + ".jpg"
                # 额外添加的信息，代表相对原图分割的左上角位置
                image['crop_xy'] = [math.ceil(i*space_h), math.ceil(j*space_w)]
                # 保存图像
                
                im = Image.fromarray(img_numpy[image['crop_xy'][0]:image['crop_xy'][0]+img_scale[0],
                                    image['crop_xy'][1]:image['crop_xy'][1]+img_scale[1],:])
                im.save(img_save_pre + image['file_name'])
                
                sub_image_list.append(image)
                # 临时存储每个块的位置，包括坐上角点位和h,w
                temp_block.append((image['crop_xy'][0], image['crop_xy'][1]))
        # 处理标注信息
        shapes = obj['shapes']
        # print(temp_block)
        for shape in shapes:
            # 判断该标注的bbox在哪个（一个或多个）区域内
            # XXX: 注意顺序
            if shape['shape_type'] == "circle":
                points = shape['points']
                xy = [tuple(point) for point in points]
                (cx, cy), (px, py) = xy # 圆心和第二点
                # print(xy)
                # TODO：将圆形转换为多边形
                d = math.sqrt((cx - px) ** 2 + (cy - py) ** 2)
                d1 = math.sqrt((d ** 2) / 2)
                points = [[cx, cy-d], [cx-d1, cy-d1], [cx-d, cy], [cx-d1, cy+d1],
                         [cx, cy+d], [cx+d1, cy+d1], [cx+d, cy], [cx+d1, cy-d1]]
#                 print(points)
                shape['points'] = points
            [x, y, w, h] = self._get_box(shape['points'])
            for k, block in enumerate(temp_block):
                # 如果在该block的范围内
                
                if y>=block[0]-h and y<block[0]+img_scale[0] and x>=block[1]-w and x<block[1]+img_scale[1] :
                    # TODO: 这个特直接用copy根本就没有复制子对象
                    # shapes_item = shape.copy()
                    shapes_item = copy.deepcopy(shape)
                    effective_cnt = len(shapes_item['points'])
                    # 210610：处理被crop截断的标注可能出现的问题
#                     p0 = shapes_item['points'][0]
#                     p1 = shapes_item['points'][1]
#                     handled_points_list = [p0]
                    for kp, point in enumerate(shapes_item['points']):
                        point[0] -= block[1]
                        point[1] -= block[0]
#                         try:
#                             point[0] -= block[0]
#                             point[1] -= block[1]
#                         except:
#                             print(point[0],point[1],block[0],block[1],kp,point)
                        # 仅仅用bbox判断可能会出现错误，需要更严谨地判断
#                         if point[0]<=0 or point[0]>=img_scale[1] or point[1]<=0 or point[1]>=img_scale[0]:
#                             effective_cnt -= 1
#                         try:
#                             if point[0]<=0 or point[0]>=img_scale[0] or point[1]<=0 or point[1]>=img_scale[1]:
#                                 effective_cnt -= 1
#                         except:
#                             print(point[0],point[1],block[0],block[1],kp,point)
                        # 标注的边框可能不能和图像边框重合
                        point[0] = max(1, point[0])
                        point[0] = min(point[0], img_scale[1] - 1)
                        point[1] = max(1, point[1])
                        point[1] = min(point[1], img_scale[0] - 1)
#                         try:
#                             if kp >= 2 :
#                                 # 查看斜率的绝对误差
#                                 if abs((p0[1]-p1[1])/(p0[0]-p1[0]+100) - (p0[1]-point[1])/(p0[0]-point[0]+100)) < 0.0001:
#                                     # 则p1点不存入队列
#                                     p1 = point
#                                 else:
#                                     handled_points_list.append(p1)
#                                     p0 = p1
#                                     p1 = point
#                         except:
#                             print(p0[0],p0[1],p1[0],p1[1],point[0],point[1])
#                         if kp == len(shapes_item['points'])-1:
#                             p0 = handled_points_list[0]
#                             if abs((p0[1]-p1[1])/(p0[0]-p1[0]+100) - (p0[1]-point[1])/(p0[0]-point[0]+100)) < 0.0001:
#                                 # 则point点不存入队列
#                                 handled_points_list.append(p1)
#                             else:
#                                 handled_points_list += [p1, point]
                        shapes_item['points'][kp] = point
                    # 如果在范围内地点小于等于2个
#                     if effective_cnt <= 2:
#                         break
                    # print('>', end='')
                    self.img_id = start_img_id + k
                    annotation = self._annotation(shapes_item)
                    if annotation['bbox'][2] < 5 or annotation['bbox'][3] < 5:
                        continue
                    sub_annotation_list.append(annotation)
                    self.ann_id += 1
        self.img_id = start_img_id + num_h * num_w
        print(self.ann_id)
        # 调试用
        # self._show_image(img_numpy, sub_image_list, sub_annotation_list, (num_w, num_h))
        return sub_image_list, sub_annotation_list
    
    def _show_image(self, img_numpy, sub_image_list, sub_annotation_list, block_num, img_scale=(1024,1024)):
        '''
        通过coco数据格式显示标注与图像
        '''
        (num_w, num_h) = block_num
        plt.figure(figsize=(16,12))
        for k, image in enumerate(sub_image_list):
            im = img_numpy[image['crop_xy'][0]:image['crop_xy'][0]+img_scale[0], 
                           image['crop_xy'][1]:image['crop_xy'][1]+img_scale[1],:]
            for annotation in sub_annotation_list:
                if annotation['image_id'] == image['id']:
                    # print(annotation['segmentation'])
                    points = np.array(annotation['segmentation']).reshape(-1,2).tolist()
                    [x, y, w, h] = self._get_box(points)
                    left_top = (int(x), int(y))
                    right_bottom = (int(x+w), int(y+h))
                    cv2.rectangle(im, left_top, right_bottom, (0,255,0), thickness=3)
            plt.subplot(num_h, num_w, k+1)
            plt.imshow(im)
            plt.xticks([])  # 去掉横坐标值
            plt.yticks([])  # 去掉纵坐标值
        # plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0, hspace=0)
        plt.subplots_adjust(wspace=-0.6, hspace=0.1)
        plt.show()
    
    # 构建类别
    def _init_categories(self):
        '''
        根据全局变量classname_to_id构建类别
        '''
        for k, v in classname_to_id.items():
            category = {}
            category['id'] = v
            category['name'] = k
            self.categories.append(category)

    # 构建COCO的image字段
    def _image(self, obj, path):
        '''
        构建COCO的image字段：
        读取宽、高、名称，赋给id
        '''
        image = {}
        from labelme import utils
        img_x = utils.img_b64_to_arr(obj['imageData'])
        h, w = img_x.shape[:-1]
        image['height'] = h
        image['width'] = w
        image['id'] = self.img_id
        image['file_name'] = os.path.basename(path).replace(".json", ".jpg")
        return image

    # 构建COCO的annotation字段
    def _annotation(self, shape):
        label = shape['label']
        points = shape['points']
        annotation = {}
        if shape['shape_type'] != "circle" and shape['shape_type'] != "polygon":
            print(f'==================== ', shape['shape_type'], ' =====================')
        # TODO: to_coco与to_aug_coco关于点阵的处理位置不同
#         if shape['shape_type'] == "circle":
#             xy = [tuple(point) for point in points]
#             (cx, cy), (px, py) = xy # 圆心和第二点
#             # TODO：将圆形转换为多边形
#             d = math.sqrt((cx - px) ** 2 + (cy - py) ** 2)
#             d1 = math.sqrt((d ** 2) / 2)
#             points = [[cx, cy-d], [cx-d1, cy-d1], [cx-d, cy], [cx-d1, cy+d1],
#                      [cx, cy+d], [cx+d1, cy+d1], [cx+d, cy], [cx+d1, cy-d1]]
        
        annotation['id'] = self.ann_id
        annotation['image_id'] = self.img_id
#         if label == 'closecomedo':
#             annotation['category_id'] = 2
#         elif label == 'comedo w':
#             annotation['category_id'] = 2
#         elif label == 'comedo b':
#             annotation['category_id'] = 1
#         elif label == 'pigment':
#             annotation['category_id'] = 3
#         elif label == 'undifined':
#             annotation['category_id'] = 6
#         elif label == 'nodule':
#             annotation['category_id'] = 4
#         elif label == 'scar hyper':
#             annotation['category_id'] = 5
#         else :
#             annotation['category_id'] = int(classname_to_id[label])
        annotation['category_id'] = int(classname_to_id[label])
        annotation['segmentation'] = [np.asarray(points).flatten().tolist()]
        annotation['bbox'] = self._get_box(points)
        annotation['iscrowd'] = 0
        annotation['area'] = 1.0
        return annotation

    # 读取json文件，返回一个json对象
    def read_jsonfile(self, path):
        with open(path, "r", encoding='utf-8') as f:
            return json.load(f)

    # COCO的格式： [x1,y1,w,h] 对应COCO的bbox格式
    def _get_box(self, points):
        min_x = min_y = np.inf
        max_x = max_y = 0
        for x, y in points:
            min_x = min(min_x, x)
            min_y = min(min_y, y)
            max_x = max(max_x, x)
            max_y = max(max_y, y)
        return [min_x, min_y, max_x - min_x, max_y - min_y]


In [11]:
if __name__ == '__main__':
    
    # 创建文件
    if not os.path.exists("%scoco1/annotations/"%saved_coco_path):
        os.makedirs("%scoco1/annotations/"%saved_coco_path)
    if not os.path.exists("%scoco1/images/train2017/"%saved_coco_path):
        os.makedirs("%scoco1/images/train2017"%saved_coco_path)
    if not os.path.exists("%scoco1/images/val2017/"%saved_coco_path):
        os.makedirs("%scoco1/images/val2017"%saved_coco_path)
    # 获取images目录下所有的joson文件列表
    json_list_path = glob.glob(labelme_path + "/*.json")
    # 数据划分,这里没有区分val2017和tran2017目录，所有图片都放在images目录下
    train_path, val_path = train_test_split(json_list_path, test_size=0.2)
    
    if dataset == 'face':
        train_path = train_path + val_path
        val_path = []
    
    # 用于测试模型是否能够过拟合
    # rest_path, train_path = train_test_split(json_list_path, test_size=0.05)
    # _, val_path = train_test_split(rest_path, test_size=0.05)
    print(train_path)
    print(val_path)
    print("train_n:", len(train_path), 'val_n:', len(val_path))

    # 把训练集转化为COCO的json格式
    l2c_train = Lableme2CoCo()
    # train_instance = l2c_train.to_coco(train_path)
    train_instance = l2c_train.to_aug_coco(train_path, "%scoco1/images/train2017/"%saved_coco_path)
    l2c_train.save_coco_json(train_instance, '%scoco1/annotations/instances_train2017.json'%saved_coco_path)
    '''
    for file in train_path:
        # shutil.copy(file.replace("json","jpg"),"%scoco/images/train2017/"%saved_coco_path)
        im = Image.fromarray(originImageLoader(file))
        file_name = file.split("\\")[-1]
        file_name = file_name.replace("json","jpg")
        im.save("%scoco/images/train2017/"%saved_coco_path + file_name)
    for file in val_path:
        # shutil.copy(file.replace("json","jpg"),"%scoco/images/val2017/"%saved_coco_path)
        im = Image.fromarray(originImageLoader(file))
        file_name = file.split("\\")[-1]
        file_name = file_name.replace("json","jpg")
        im.save("%scoco/images/val2017/"%saved_coco_path + file_name)
    '''
    # 把验证集转化为COCO的json格式
    l2c_val = Lableme2CoCo()
    # val_instance = l2c_val.to_coco(val_path)
    val_instance = l2c_val.to_aug_coco(val_path, "%scoco1/images/val2017/"%saved_coco_path)
    l2c_val.save_coco_json(val_instance, '%scoco1/annotations/instances_val2017.json'%saved_coco_path)


['E:/data_raw/skin/labelme\\J__任科宇_痤疮_20190130143112000_斑点.json', 'E:/data_raw/skin/labelme\\J__安妮_痤疮_20190226083127000_斑点.json', 'E:/data_raw/skin/labelme\\J__夏梦旭_痤疮_20191230111741000_斑点.json', 'E:/data_raw/skin/labelme\\J__李亚玲_痤疮_20190220131753000_斑点.json', 'E:/data_raw/skin/labelme\\CXM__严仁杰_痤疮_20190816103212000_斑点.json', 'E:/data_raw/skin/labelme\\J__康禾_痤疮PIH_20190319091700000_斑点.json', 'E:/data_raw/skin/labelme\\J__余泓颖_痤疮_20190220103433000_斑点.json', 'E:/data_raw/skin/labelme\\J__康禾_痤疮_20190416094801000_斑点.json', 'E:/data_raw/skin/labelme\\CXM__高美玲_痤疮_20190821102141000_斑点.json', 'E:/data_raw/skin/labelme\\J__侯玥颖_痤疮_20191030103240000_斑点.json', 'E:/data_raw/skin/labelme\\J__蔡晓舟_痤疮_20190220104205000_斑点.json', 'E:/data_raw/skin/labelme\\J__尹洁_痤疮_20190227094716000_斑点.json', 'E:/data_raw/skin/labelme\\J__陈乙_痤疮_20191030102140000_斑点.json', 'E:/data_raw/skin/labelme\\CXM__周琦雨_痤疮_20200910154351000_斑点.json', 'E:/data_raw/skin/labelme\\J__蔡炜_痤疮_20190130134832000_斑点.json', 'E:/data_raw/skin/lab


['E:/data_raw/skin/labelme\\J__伍正伟痤疮_20190910105713000_标准照片.json', 'E:/data_raw/skin/labelme\\J__孙艺术_痤疮_20190416084457000_斑点.json', 'E:/data_raw/skin/labelme\\J__孙小艳_痤疮_20191113111348000_斑点.json', 'E:/data_raw/skin/labelme\\CXM__王江_痤疮_20190802083140000_斑点.json', 'E:/data_raw/skin/labelme\\J__屈艳_痤疮_20190319103901000_斑点.json', 'E:/data_raw/skin/labelme\\J__刘淑楠_痤疮_20190102133549000_斑点.json', 'E:/data_raw/skin/labelme\\CXM__陈荣荣_痤疮_20191011081708000_标准照片.json', 'E:/data_raw/skin/labelme\\CXM__余浩涓_痤疮_20200820135647000_斑点.json', 'E:/data_raw/skin/labelme\\J__宋诗睿_痤疮_20191126083125000_斑点.json', 'E:/data_raw/skin/labelme\\CXM__祁雄伟_痤疮_20190926131328000_斑点.json', 'E:/data_raw/skin/labelme\\J__李陈_痤疮_20190122112959000_斑点.json', 'E:/data_raw/skin/labelme\\J__孟怡洁_痤疮_20191030095016000_斑点.json', 'E:/data_raw/skin/labelme\\J__谢柠枍_痤疮_20190130132512000_斑点.json', 'E:/data_raw/skin/labelme\\CXM__黎晓明_痤疮_20201008134406000_斑点.json', 'E:/data_raw/skin/labelme\\GZP__蒲睿琪_痤疮_20200512082250000_斑点.json', 'E:/data_ra

handling:  E:/data_raw/skin/labelme\CXM__何小清_痤疮_20190816143201000_斑点.json
image_info 4600 , 3448 img_numpy:  (4600, 3448, 3)
14961
handling:  E:/data_raw/skin/labelme\J__王秋妮痤疮_20190710143347000_斑点.json
image_info 4600 , 3448 img_numpy:  (4600, 3448, 3)
15094
handling:  E:/data_raw/skin/labelme\J__杨凡_痤疮_20190107100950000_斑点.json
image_info 5184 , 3456 img_numpy:  (5184, 3456, 3)
15465
handling:  E:/data_raw/skin/labelme\CXM__王坤腾_痤疮_20190927092832000_斑点.json
image_info 5184 , 3456 img_numpy:  (5184, 3456, 3)
15643
handling:  E:/data_raw/skin/labelme\J__任春晓_痤疮_20201021091425000_斑点.json
image_info 4600 , 3448 img_numpy:  (4600, 3448, 3)
15922
handling:  E:/data_raw/skin/labelme\CXM__陈彦西_痤疮_20200917134609000_斑点.json
image_info 4600 , 3448 img_numpy:  (4600, 3448, 3)
16086
handling:  E:/data_raw/skin/labelme\J__左宇_痤疮_20200914103835000_斑点.json
image_info 4600 , 3448 img_numpy:  (4600, 3448, 3)
16134
handling:  E:/data_raw/skin/labelme\J__江梓菡_痤疮_20190123142230000_斑点.json
image_info 5184 , 3456

In [12]:
print(len(train_path))
print(len(val_path))

132
33


In [6]:
test_path = '/home/wangjy/research/maskrcnn/facedata/CXM, #U5218#U5201#U5201-#U75e4#U75ae 20200917144814000 #U6591#U70b9.json'
test_image = originImageLoader(test_path)

In [4]:
import os
check_path_img_train = '/home/wangjy/data/facedata_croped/coco/images/train2017/'
check_path_img_val = '/home/wangjy/data/facedata_croped/coco/images/val2017/'
check_list_img_train = os.listdir(check_path_img_train)
check_list_img_val = os.listdir(check_path_img_val)
print(len(check_list_img_train))
print(len(check_list_img_val))

check_path_label_train = '/home/wangjy/data/facedata_croped/coco/annotations/'

2496
340
