Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Add tool for converting labelme data to coco format #2041

Open
wants to merge 1 commit into
base: dev-1.x
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
322 changes: 322 additions & 0 deletions tools/misc/labelme2coco.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,322 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import glob
import json
import os
import shutil

import numpy as np
from sklearn.model_selection import train_test_split
from tqdm import tqdm

# import sys
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please remove unnecessary codes.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry,I think some commented code might be useful so kept it.



class Labelme2coco_keypoints():

def __init__(self, args):
"""Lableme 关键点数据集转 COCO 数据集的构造函数:
LareinaM marked this conversation as resolved.
Show resolved Hide resolved

Args
args:命令行输入的参数
- class_name 根类名字
"""

self.classname_to_id = {args.class_name: 1}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to have multiple classes?

self.images = []
self.annotations = []
self.categories = []
self.ann_id = 0
self.img_id = 0

def save_coco_json(self, instance, save_path):
json.dump(
instance,
open(save_path, 'w', encoding='utf-8'),
ensure_ascii=False,
indent=1)

def read_jsonfile(self, path):
with open(path, 'r', encoding='utf-8') as f:
return json.load(f)

def _get_box(self, points):
min_x = min_y = np.inf
max_x = max_y = 0
for x, y in points:
min_x = min(min_x, x)
min_y = min(min_y, y)
max_x = max(max_x, x)
max_y = max(max_y, y)
return [min_x, min_y, max_x - min_x, max_y - min_y]

def _get_keypoints(self, points, keypoints, num_keypoints, label):
"""解析 labelme 的原始数据, 生成 coco 标注的 关键点对象.

例如:
"keypoints": [
67.06149888292556, # x 的值
122.5043507571318, # y 的值
1, # 相当于 Z 值,2D关键点 v = 0表示不可见,
v = 1表示标记但不可见,v = 2表示标记且可见
82.42582269256718,
109.95672933232304,
1,
...,
],
"""
labels = ['wrist', 'thumb1', 'thumb2', ...]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should not be hard-coded.

flag = label.split('_')[-1]
LareinaM marked this conversation as resolved.
Show resolved Hide resolved
x = label.split('_')[0]
visible = 0
if flag == 'occluded':
visible = 1
else:
visible = 2
x = labels.index(x)
keypoints[x * 3] = points[0]
keypoints[x * 3 + 1] = points[1]
keypoints[x * 3 + 2] = visible
num_keypoints += 1

return num_keypoints

def _image(self, obj, path):
"""解析 labelme 的 obj 对象,生成 coco 的 image 对象.

生成包括:id,file_name,height,width 4个属性

示例:
{
"file_name": "training/rgb/00031426.jpg",
"height": 224,
"width": 224,
"id": 31426
}
"""

image = {}

# 此处通过imageData获得数据
# 获得原始 labelme 标签的 imageData 属性,并通过 labelme 的工具方法转成 array
# img_x = utils.img_b64_to_arr(obj['imageData'])
# image['height'], image['width'] = img_x.shape[:-1] # 获得图片的宽高

# 此处直接通过imageHeight,imageWidth得到,避免labelme中的imageData问题
image['height'], image['width'] = obj['imageHeight'], obj[
'imageWidth'] # 获得图片的宽高
# self.img_id = int(os.path.basename(path).split(".json")[0])
self.img_id = self.img_id + 1
image['id'] = self.img_id

image['file_name'] = os.path.basename(path).replace('.json', '.jpg')

return image

def _annotation(self, bboxes_list, keypoints_list, json_path):
"""生成coco标注.

Args: bboxes_list: 矩形标注框 keypoints_list: 关键点 json_path:json文件路径
"""
# 核对一个bbox里有n个keypoints; 然而本人不要求每个bbox里都要有n个点
# if len(keypoints_list) != args.join_num * len(bboxes_list):
# print(
# 'you loss {} keypoint(s) with file {}'\
# .format(args.join_num * len(bboxes_list) -\
# len(keypoints_list), json_path)
# )
# print('Please check !!!')
# sys.exit()

i = 0
# 对每个bbox分别保存keypoints
for object in bboxes_list:
annotation = {}
keypoints = [0 for i in range(36)
] # 每个keypoint数组初始化为[0,..] len = 36 对应12个点(x,y,v)
num_keypoints = 0

label = object['label']
bbox = object['points']
annotation['id'] = self.ann_id
annotation['image_id'] = self.img_id
annotation['category_id'] = int(self.classname_to_id[label])
annotation['iscrowd'] = 0
annotation['area'] = 1.0
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

area should be calculated, not 1.0

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh I forgot to modify this code when I found it,thank you!

annotation['segmentation'] = [np.asarray(bbox).flatten().tolist()
] # 两个坐标点
annotation['bbox'] = self._get_box(bbox) # 矩形框左上角的坐标和矩形框的长宽

# 生成keypoint的list
for keypoint in keypoints_list:
point = keypoint['points']
label = keypoint['label'] # 点的名字
num_keypoints = self._get_keypoints(point[0], keypoints,
num_keypoints, label)
annotation['keypoints'] = keypoints
annotation['num_keypoints'] = num_keypoints

i += 1
self.ann_id += 1
self.annotations.append(annotation)

def _init_categories(self):
"""初始化 COCO 的 标注类别.

例如:
"categories": [
{
"supercategory": "hand",
"id": 1,
"name": "hand",
"keypoints": [
"wrist",
"thumb1",
"thumb2",
...,
],
"skeleton": [
]
}
]
"""

for name, id in self.classname_to_id.items():
category = {}

category['supercategory'] = name
category['id'] = id
category['name'] = name
# n个关键点数据
category['keypoint'] = [
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be another input instead of hard-coded.

'wrist',
'thumb1',
'thumb2',
...,
]
# category['keypoint'] = [str(i + 1) for i in range(args.join_num)]

self.categories.append(category)

def to_coco(self, json_path_list):
"""Labelme 原始标签转换成 coco 数据集格式,生成的包括标签和图像.

Args: json_path_list:原始数据集的目录
"""

self._init_categories()
# 整个文件夹里的json进行逐个处理
for json_path in tqdm(json_path_list):
obj = self.read_jsonfile(json_path) # 解析一个标注文件
self.images.append(self._image(obj, json_path)) # 解析图片
shapes = obj['shapes'] # 读取 labelme shape 标注

bboxes_list, keypoints_list = [], []
for shape in shapes:
if shape['shape_type'] == 'rectangle': # bboxs
bboxes_list.append(shape)
elif shape['shape_type'] == 'point': # keypoints
keypoints_list.append(shape)
# 输入为一个文件的keypoints和bbox,即一张图里的信息
self._annotation(bboxes_list, keypoints_list, json_path)

keypoints = {}
keypoints['info'] = {
'description': 'Air Dataset',
'version': 1.0,
'year': 2022
}
keypoints['license'] = ['BUAA']
keypoints['images'] = self.images
keypoints['annotations'] = self.annotations
keypoints['categories'] = self.categories
return keypoints


def init_dir(base_path):
"""初始化COCO数据集的文件夹结构;

coco - annotations #标注文件路径
- train #训练数据集
- val #验证数据集
Args:
base_path:数据集放置的根路径
"""
if not os.path.exists(os.path.join(base_path, 'coco', 'annotations')):
os.makedirs(os.path.join(base_path, 'coco', 'annotations'))
if not os.path.exists(os.path.join(base_path, 'coco', 'train')):
os.makedirs(os.path.join(base_path, 'coco', 'train'))
if not os.path.exists(os.path.join(base_path, 'coco', 'val')):
os.makedirs(os.path.join(base_path, 'coco', 'val'))


def convert(path, target):
parser = argparse.ArgumentParser()
parser.add_argument(
'--class_name', '--n', help='class name', type=str, default='airplane')
parser.add_argument(
'--input',
'--i',
help='json file path (labelme)',
type=str,
default=path)
parser.add_argument(
'--output',
'--o',
help='output file path (coco format)',
type=str,
default=path)
parser.add_argument(
'--join_num', '--j', help='number of join', type=int, default=12)
parser.add_argument(
'--ratio',
'--r',
help='train and test split ratio',
type=float,
default=0.25)
args = parser.parse_args()

labelme_path = args.input
saved_coco_path = args.output

init_dir(saved_coco_path) # 初始化COCO数据集的文件夹结构

json_list_path = glob.glob(labelme_path + '/*.json')
train_path, val_path = train_test_split(
json_list_path, test_size=args.ratio)
print('{} for training'.format(len(train_path)),
'\n{} for testing'.format(len(val_path)))
print('Start transform please wait ...')

l2c_train = Labelme2coco_keypoints(args) # 构造数据集生成类

# 生成训练集
train_keypoints = l2c_train.to_coco(train_path)
l2c_train.save_coco_json(
train_keypoints,
os.path.join(saved_coco_path, 'coco', 'annotations',
'keypoints_train.json'))

# 生成验证集
l2c_val = Labelme2coco_keypoints(args)
val_instance = l2c_val.to_coco(val_path)
l2c_val.save_coco_json(
val_instance,
os.path.join(saved_coco_path, 'coco', 'annotations',
'keypoints_val.json'))

# 拷贝 labelme 的原始图片到训练集和验证集里面
for file in train_path:
shutil.copy(
file.replace('json', 'jpg'),
os.path.join(saved_coco_path, 'coco', 'train'))
for file in val_path:
shutil.copy(
file.replace('json', 'jpg'),
os.path.join(saved_coco_path, 'coco', 'val'))


if __name__ == '__main__':
source = 'your labelme path'
target = 'your coco path'
convert(source, target)
Comment on lines +1 to +322
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import glob
import json
import os
import shutil
import numpy as np
from sklearn.model_selection import train_test_split
from tqdm import tqdm
# import sys
class Labelme2coco_keypoints():
def __init__(self, args):
"""Lableme 关键点数据集转 COCO 数据集的构造函数:
Args
args命令行输入的参数
- class_name 根类名字
"""
self.classname_to_id = {args.class_name: 1}
self.images = []
self.annotations = []
self.categories = []
self.ann_id = 0
self.img_id = 0
def save_coco_json(self, instance, save_path):
json.dump(
instance,
open(save_path, 'w', encoding='utf-8'),
ensure_ascii=False,
indent=1)
def read_jsonfile(self, path):
with open(path, 'r', encoding='utf-8') as f:
return json.load(f)
def _get_box(self, points):
min_x = min_y = np.inf
max_x = max_y = 0
for x, y in points:
min_x = min(min_x, x)
min_y = min(min_y, y)
max_x = max(max_x, x)
max_y = max(max_y, y)
return [min_x, min_y, max_x - min_x, max_y - min_y]
def _get_keypoints(self, points, keypoints, num_keypoints, label):
"""解析 labelme 的原始数据生成 coco 标注的 关键点对象.
例如
"keypoints": [
67.06149888292556, # x 的值
122.5043507571318, # y 的值
1, # 相当于 Z 值,2D关键点 v = 0表示不可见,
v = 1表示标记但不可见v = 2表示标记且可见
82.42582269256718,
109.95672933232304,
1,
...,
],
"""
labels = ['wrist', 'thumb1', 'thumb2', ...]
flag = label.split('_')[-1]
x = label.split('_')[0]
visible = 0
if flag == 'occluded':
visible = 1
else:
visible = 2
x = labels.index(x)
keypoints[x * 3] = points[0]
keypoints[x * 3 + 1] = points[1]
keypoints[x * 3 + 2] = visible
num_keypoints += 1
return num_keypoints
def _image(self, obj, path):
"""解析 labelme obj 对象生成 coco image 对象.
生成包括idfile_nameheightwidth 4个属性
示例
{
"file_name": "training/rgb/00031426.jpg",
"height": 224,
"width": 224,
"id": 31426
}
"""
image = {}
# 此处通过imageData获得数据
# 获得原始 labelme 标签的 imageData 属性,并通过 labelme 的工具方法转成 array
# img_x = utils.img_b64_to_arr(obj['imageData'])
# image['height'], image['width'] = img_x.shape[:-1] # 获得图片的宽高
# 此处直接通过imageHeight,imageWidth得到,避免labelme中的imageData问题
image['height'], image['width'] = obj['imageHeight'], obj[
'imageWidth'] # 获得图片的宽高
# self.img_id = int(os.path.basename(path).split(".json")[0])
self.img_id = self.img_id + 1
image['id'] = self.img_id
image['file_name'] = os.path.basename(path).replace('.json', '.jpg')
return image
def _annotation(self, bboxes_list, keypoints_list, json_path):
"""生成coco标注.
Argsbboxes_list矩形标注框 keypoints_list关键点 json_pathjson文件路径
"""
# 核对一个bbox里有n个keypoints; 然而本人不要求每个bbox里都要有n个点
# if len(keypoints_list) != args.join_num * len(bboxes_list):
# print(
# 'you loss {} keypoint(s) with file {}'\
# .format(args.join_num * len(bboxes_list) -\
# len(keypoints_list), json_path)
# )
# print('Please check !!!')
# sys.exit()
i = 0
# 对每个bbox分别保存keypoints
for object in bboxes_list:
annotation = {}
keypoints = [0 for i in range(36)
] # 每个keypoint数组初始化为[0,..] len = 36 对应12个点(x,y,v)
num_keypoints = 0
label = object['label']
bbox = object['points']
annotation['id'] = self.ann_id
annotation['image_id'] = self.img_id
annotation['category_id'] = int(self.classname_to_id[label])
annotation['iscrowd'] = 0
annotation['area'] = 1.0
annotation['segmentation'] = [np.asarray(bbox).flatten().tolist()
] # 两个坐标点
annotation['bbox'] = self._get_box(bbox) # 矩形框左上角的坐标和矩形框的长宽
# 生成keypoint的list
for keypoint in keypoints_list:
point = keypoint['points']
label = keypoint['label'] # 点的名字
num_keypoints = self._get_keypoints(point[0], keypoints,
num_keypoints, label)
annotation['keypoints'] = keypoints
annotation['num_keypoints'] = num_keypoints
i += 1
self.ann_id += 1
self.annotations.append(annotation)
def _init_categories(self):
"""初始化 COCO 标注类别.
例如
"categories": [
{
"supercategory": "hand",
"id": 1,
"name": "hand",
"keypoints": [
"wrist",
"thumb1",
"thumb2",
...,
],
"skeleton": [
]
}
]
"""
for name, id in self.classname_to_id.items():
category = {}
category['supercategory'] = name
category['id'] = id
category['name'] = name
# n个关键点数据
category['keypoint'] = [
'wrist',
'thumb1',
'thumb2',
...,
]
# category['keypoint'] = [str(i + 1) for i in range(args.join_num)]
self.categories.append(category)
def to_coco(self, json_path_list):
"""Labelme 原始标签转换成 coco 数据集格式生成的包括标签和图像.
Argsjson_path_list原始数据集的目录
"""
self._init_categories()
# 整个文件夹里的json进行逐个处理
for json_path in tqdm(json_path_list):
obj = self.read_jsonfile(json_path) # 解析一个标注文件
self.images.append(self._image(obj, json_path)) # 解析图片
shapes = obj['shapes'] # 读取 labelme shape 标注
bboxes_list, keypoints_list = [], []
for shape in shapes:
if shape['shape_type'] == 'rectangle': # bboxs
bboxes_list.append(shape)
elif shape['shape_type'] == 'point': # keypoints
keypoints_list.append(shape)
# 输入为一个文件的keypoints和bbox,即一张图里的信息
self._annotation(bboxes_list, keypoints_list, json_path)
keypoints = {}
keypoints['info'] = {
'description': 'Air Dataset',
'version': 1.0,
'year': 2022
}
keypoints['license'] = ['BUAA']
keypoints['images'] = self.images
keypoints['annotations'] = self.annotations
keypoints['categories'] = self.categories
return keypoints
def init_dir(base_path):
"""初始化COCO数据集的文件夹结构
coco - annotations #标注文件路径
- train #训练数据集
- val #验证数据集
Args
base_path数据集放置的根路径
"""
if not os.path.exists(os.path.join(base_path, 'coco', 'annotations')):
os.makedirs(os.path.join(base_path, 'coco', 'annotations'))
if not os.path.exists(os.path.join(base_path, 'coco', 'train')):
os.makedirs(os.path.join(base_path, 'coco', 'train'))
if not os.path.exists(os.path.join(base_path, 'coco', 'val')):
os.makedirs(os.path.join(base_path, 'coco', 'val'))
def convert(path, target):
parser = argparse.ArgumentParser()
parser.add_argument(
'--class_name', '--n', help='class name', type=str, default='airplane')
parser.add_argument(
'--input',
'--i',
help='json file path (labelme)',
type=str,
default=path)
parser.add_argument(
'--output',
'--o',
help='output file path (coco format)',
type=str,
default=path)
parser.add_argument(
'--join_num', '--j', help='number of join', type=int, default=12)
parser.add_argument(
'--ratio',
'--r',
help='train and test split ratio',
type=float,
default=0.25)
args = parser.parse_args()
labelme_path = args.input
saved_coco_path = args.output
init_dir(saved_coco_path) # 初始化COCO数据集的文件夹结构
json_list_path = glob.glob(labelme_path + '/*.json')
train_path, val_path = train_test_split(
json_list_path, test_size=args.ratio)
print('{} for training'.format(len(train_path)),
'\n{} for testing'.format(len(val_path)))
print('Start transform please wait ...')
l2c_train = Labelme2coco_keypoints(args) # 构造数据集生成类
# 生成训练集
train_keypoints = l2c_train.to_coco(train_path)
l2c_train.save_coco_json(
train_keypoints,
os.path.join(saved_coco_path, 'coco', 'annotations',
'keypoints_train.json'))
# 生成验证集
l2c_val = Labelme2coco_keypoints(args)
val_instance = l2c_val.to_coco(val_path)
l2c_val.save_coco_json(
val_instance,
os.path.join(saved_coco_path, 'coco', 'annotations',
'keypoints_val.json'))
# 拷贝 labelme 的原始图片到训练集和验证集里面
for file in train_path:
shutil.copy(
file.replace('json', 'jpg'),
os.path.join(saved_coco_path, 'coco', 'train'))
for file in val_path:
shutil.copy(
file.replace('json', 'jpg'),
os.path.join(saved_coco_path, 'coco', 'val'))
if __name__ == '__main__':
source = 'your labelme path'
target = 'your coco path'
convert(source, target)
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import collections
import datetime
import glob
import json
import os
import shutil
import uuid
import numpy as np
import PIL.Image
import PIL.ImageDraw
from mmengine import Config
from sklearn.model_selection import train_test_split
from tqdm import tqdm
try:
import pycocotools.mask
except ImportError:
import sys
print('Please install pycocotools:\n\n pip install pycocotools\n')
sys.exit(1)
class Labelme2coco_keypoints():
"""Convert Labelme annotated keypoints into COCO format.
Args:
category_names (list): A list of category names in this dataset.
keypoints_names (list): A list of keypoint names in this dataset.
skeleton (list): A list of skeleton information in this dataset.
"""
def __init__(self, category_names, keypoints_names, skeleton):
self.classname_to_id = {}
for id, name in enumerate(category_names):
self.classname_to_id[name] = id + 1
self.images = []
self.annotations = []
self.categories = []
self.ann_id = 0
self.img_id = 0
self.keypoints_names = keypoints_names
self.skeleton = skeleton
def save_coco_json(self, instance, save_path):
json.dump(
instance,
open(save_path, 'w', encoding='utf-8'),
ensure_ascii=False,
indent=1)
def read_jsonfile(self, path):
with open(path, 'r', encoding='utf-8') as f:
return json.load(f)
def shape_to_mask(self,
img_shape,
points,
shape_type=None,
point_size=5):
mask = np.zeros(img_shape[:2], dtype=np.uint8)
mask = PIL.Image.fromarray(mask)
draw = PIL.ImageDraw.Draw(mask)
xy = [tuple(point) for point in points]
if shape_type == 'rectangle':
assert len(
xy) == 2, 'Shape of shape_type=rectangle must have 2 points'
draw.rectangle(xy, outline=1, fill=1)
elif shape_type == 'point':
assert len(xy) == 1, 'Shape of shape_type=point must have 1 points'
cx, cy = xy[0]
r = point_size
draw.ellipse([cx - r, cy - r, cx + r, cy + r], outline=1, fill=1)
else:
assert len(xy) > 2, 'Polygon must have points more than 2'
draw.polygon(xy=xy, outline=1, fill=1)
mask = np.array(mask, dtype=bool)
return mask
def _image(self, path, obj):
"""Parse the obj object of Labelme to generate the image object of
COCO.
Args:
obj (JSON): The JSON object corresponding to ``path``.
path (str): Path to the Labelme json file.
Returns:
Dict: A dictionary representing the image, the keys include
``'id'``, ``'file_name'``, ``'license'``, ``'height'`` and
``'width'``.
"""
image = {}
# Directly obtain by imageHeight and imageWidth here to avoid problems
# imageData in Labelme
image['height'], image['width'] = obj['imageHeight'], obj[
'imageWidth'] # get image's width and height
self.img_id += 1
image['id'] = self.img_id
image['file_name'] = os.path.basename(path).replace('.json', '.jpg')
image['license'] = 1
return image
def _annotation(self, shapes, img):
"""Generate COCO annotations.
Args:
shapes (list): The list of shapes in Labelme annotation.
img (dict): The configuration of the image.
"""
groupId_keypoints = collections.defaultdict(list)
masks = {} # for area
segmentations = collections.defaultdict(list) # for segmentation
for shape in shapes:
group_id = shape.get('group_id')
if group_id is None:
group_id = uuid.uuid1()
if shape['shape_type'] == 'point':
groupId_keypoints[group_id].append(shape)
else:
points = shape['points']
label = shape['label']
shape_type = shape.get('shape_type', 'polygon')
mask = self.shape_to_mask([img['height'], img['width']],
points, shape_type)
instance = (label, group_id)
if instance in masks:
masks[instance] = masks[instance] | mask
else:
masks[instance] = mask
if shape_type == 'rectangle':
(x1, y1), (x2, y2) = points
x1, x2 = sorted([x1, x2])
y1, y2 = sorted([y1, y2])
points = [x1, y1, x2, y1, x2, y2, x1, y2]
points = np.asarray(points).flatten().tolist()
segmentations[instance].append(points)
for instance, mask in masks.items():
cls_name, group_id = instance
if cls_name not in self.classname_to_id:
continue
cls_id = self.classname_to_id[cls_name]
mask = np.asfortranarray(mask.astype(np.uint8))
mask = pycocotools.mask.encode(mask)
area = float(pycocotools.mask.area(mask))
bbox = pycocotools.mask.toBbox(mask).flatten().tolist()
keypoints = [0] * (3 * len(self.keypoints_names))
keypoints_list = groupId_keypoints[group_id]
for keypoint in keypoints_list:
idx = self.keypoints_names.index(keypoint['label'])
point = keypoint['points'][0]
visible = 1 if 'occluded' in keypoint['flags'] and keypoint[
'flags']['occluded'] else 2
keypoints[idx * 3] = point[0]
keypoints[idx * 3 + 1] = point[1]
keypoints[idx * 3 + 2] = visible
self.annotations.append(
dict(
id=len(self.annotations),
image_id=img['id'],
category_id=cls_id,
segmentation=segmentations[instance],
area=area,
bbox=bbox,
iscrowd=0,
num_keypoints=len(keypoints_list),
keypoints=keypoints,
))
def _init_categories(self):
"""Initialize the COCO labeling category."""
for name, id in self.classname_to_id.items():
category = {}
category['supercategory'] = name
category['id'] = id
category['name'] = name
category['keypoints'] = self.keypoints_names
category['skeleton'] = self.skeleton
self.categories.append(category)
def to_coco(self, json_path_list):
"""Convert Labelme raw labels into COCO dataset format. The generated
results include labels and images.
Args:
json_path_list (list): Paths of original datasets.
Returns:
Dict: A dictionary in COCO annotation format.
"""
self._init_categories()
# The json files representing each image in the folder are processed
for json_path in tqdm(json_path_list):
obj = self.read_jsonfile(json_path)
img = self._image(json_path, obj)
self.images.append(img)
self._annotation(obj['shapes'], img)
now = datetime.datetime.today()
coco_json = {}
coco_json['info'] = dict(
description='Converted COCO dataset',
version=None,
contributor=None,
url=None,
year=now.year,
date_created=now.strftime('%Y/%m/%d'))
coco_json['licenses'] = [dict(
url=None,
id=1,
name=None,
)]
coco_json['images'] = self.images
coco_json['annotations'] = self.annotations
coco_json['categories'] = self.categories
return coco_json
def init_dir(base_path):
"""Initializing the folder structure of the COCO dataset.
Args:
base_path (str): Root path for placing the COCO dataset.
"""
if not os.path.exists(os.path.join(base_path, 'coco', 'annotations')):
os.makedirs(os.path.join(base_path, 'coco', 'annotations'))
if not os.path.exists(os.path.join(base_path, 'coco', 'train')):
os.makedirs(os.path.join(base_path, 'coco', 'train'))
if not os.path.exists(os.path.join(base_path, 'coco', 'val')):
os.makedirs(os.path.join(base_path, 'coco', 'val'))
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
'--input',
'--i',
help='input json file folder',
type=str,
required=True)
parser.add_argument(
'--output',
'--o',
help='output data folder (for COCO data)',
type=str,
required=True)
parser.add_argument(
'--categories',
'--c',
type=str,
help='category names file',
default=None)
parser.add_argument(
'--dataset_cfg',
'--d',
type=str,
help='dataset config file',
default='configs/_base_/datasets/coco.py')
parser.add_argument(
'--ratio',
'--r',
help='train and test split ratio',
type=float,
default=0.25)
args = parser.parse_args()
return args
def parse_dataset_config(dataset_cfg='configs/_base_/datasets/coco.py'):
"""Parse the dataset configuration file.
Args:
dataset_cfg (str, optional): Path to the dataset configuration
file. A valid configuration file must include ``dataset_info``
and the following keys:
- keypoint_info (dict): the keypoint information
- skeleton_info (dict): the skeleton information
If not specified, will use default COCO dataset. Defaults to
'configs/_base_/datasets/coco.py'.
Returns:
Tuple[list]: Return the keypoint names and skeleton info.
"""
dataset_info = Config.fromfile(dataset_cfg).dataset_info
keypoints_names = ['' for _ in range(len(dataset_info['keypoint_info']))]
skeleton = []
keypoint2id = {}
for keypoint_info in dataset_info['keypoint_info'].values():
name = keypoint_info['name']
keypoint_id = keypoint_info['id']
keypoints_names[keypoint_id] = name
keypoint2id[name] = keypoint_id
for skeleton_info in dataset_info['skeleton_info'].values():
link = skeleton_info['link']
skeleton.append([keypoint2id[link[0]], keypoint2id[link[1]]])
return keypoints_names, skeleton
def convert(category_names, keypoints_names, skeleton, labelme_paths,
coco_file_path):
"""Convert Labelme annotated files into COCO format and save it.
Args:
category_names (list): The list of category names of this dataset.
keypoints_names (list): The list of keypoint names of this dataset.
skeleton (list): The list of skeleton info of this dataset.
labelme_paths (list): The list of path of Labelme files.
coco_file_path (str): The path to save the COCO annotation file.
"""
l2c = Labelme2coco_keypoints(category_names, keypoints_names, skeleton)
# generate train dateset
coco = l2c.to_coco(labelme_paths)
l2c.save_coco_json(coco, coco_file_path)
def main():
args = parse_args()
labelme_path = args.input
saved_coco_path = args.output
init_dir(
saved_coco_path) # Initialize the folder structure of the COCO dataset
json_list_path = glob.glob(labelme_path + '/*.json')
train_path, val_path = train_test_split(
json_list_path, test_size=args.ratio)
print('{} for training'.format(len(train_path)),
'\n{} for testing'.format(len(val_path)))
print('Start transform please wait ...')
keypoints_names, skeleton = parse_dataset_config(args.dataset_cfg)
category_names = []
if args.categories:
for line in open(args.categories).readlines():
category_names.append(line.strip())
else:
category_names = ['person']
# generate train dateset
convert(
category_names, keypoints_names, skeleton, train_path,
os.path.join(saved_coco_path, 'coco', 'annotations',
'keypoints_train.json'))
# generate val dateset
convert(
category_names, keypoints_names, skeleton, val_path,
os.path.join(saved_coco_path, 'coco', 'annotations',
'keypoints_val.json'))
# Copy the original images of Labelme into the training and validation sets
for file in train_path:
shutil.copy(
file.replace('json', 'jpg'),
os.path.join(saved_coco_path, 'coco', 'train'))
for file in val_path:
shutil.copy(
file.replace('json', 'jpg'),
os.path.join(saved_coco_path, 'coco', 'val'))
if __name__ == '__main__':
main()

Also with a document introducing the usage of the script.
labelme2coco.md