data['category_id'] = self.cat_ids[label] IndexError: list index out of range #4243

ShihuaiXu · 2020-12-07T09:37:10Z

I am training detectors_htc_r50, I have 186 classes, and I set the num_classes to 187 in the box and mask. Then, I start to train my model on my own dataset, and test it symptomatically after every epoch. The test of the first epoch is normal and the test of the second epoch is error, the error is below:
writing results to ./test.pkl
Traceback (most recent call last):
File "tools/test.py", line 212, in
main()
File "tools/test.py", line 208, in main
print(dataset.evaluate(outputs, **eval_kwargs))
File "/home/mmdetection/mmdet/datasets/coco.py", line 588, in evaluate
result_files, tmp_dir = self.format_results(results, jsonfile_prefix)
File "/home/mmdetection/mmdet/datasets/coco.py", line 533, in format_results
result_files = self.results2json(results, jsonfile_prefix)
File "/home/mmdetection/mmdet/datasets/coco.py", line 470, in results2json
json_results = self._segm2json(results)
File "/home/mmdetection/mmdet/datasets/coco.py", line 421, in _segm2json
data['category_id'] = self.cat_ids[label]
The config is below:
base = [
'../base/datasets/coco_instance.py',
'../base/schedules/schedule_1x.py', '../base/default_runtime.py'
]

model settings

model = dict(
type='HybridTaskCascade',
pretrained='torchvision://resnet50',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
roi_head=dict(
type='HybridTaskCascadeRoIHead',
interleaved=True,
mask_info_flow=True,
num_stages=3,
stage_loss_weights=[1, 0.5, 0.25],
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=[
dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=187,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
loss_weight=1.0)),
dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=187,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.05, 0.05, 0.1, 0.1]),
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
loss_weight=1.0)),
dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=187,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.033, 0.033, 0.067, 0.067]),
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
],
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
mask_head=[
dict(
type='HTCMaskHead',
with_conv_res=False,
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=187,
loss_mask=dict(
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)),
dict(
type='HTCMaskHead',
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=187,
loss_mask=dict(
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)),
dict(
type='HTCMaskHead',
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=187,
loss_mask=dict(
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))
]))

model training and testing settings

train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=[
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.6,
neg_iou_thr=0.6,
min_pos_iou=0.6,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.7,
min_pos_iou=0.7,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False)
])
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.001,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100,
mask_thr_binary=0.5))
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline))

GitVae · 2020-12-08T10:24:16Z

我之前也碰到了这个问题，查了源码之后发现了问题，看看你适用嘛：

修改 mmdetection/mmdet/datasets/coco.py 将类别改为一致

class CocoDataset(CustomDataset):
"""
CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush')
"""
CLASSES = ('shop_sign') # 此处改为你自己的186类，注意顺序，需要和接下来的2对齐

查看自己的数据集【train-val-test都要修改】，比如 mmdetection/data/coco/annotations/instances_train2017.json
看该文件中的 "categories" 标签中的 “ id” 和 “ name”
需要与1中你写在coco.py中的对齐。

coco数据集的标签格式：
{
"images":[],
"annotations":[],
"categories":[] # 就是这个地方，要改好，与1对齐，有186类，每一类中的“ id” 和 “ name”都要和1顺序一致
}

希望能够解决你的问题。

ShihuaiXu · 2020-12-08T10:25:45Z

谢谢兄弟帮忙

…

------------------ Original ------------------ From: LuckyStar <notifications@github.com> Date: Tue,Dec 8,2020 6:24 PM To: open-mmlab/mmdetection <mmdetection@noreply.github.com> Cc: Demo Xu <531232693@qq.com>, Author <author@noreply.github.com> Subject: Re: [open-mmlab/mmdetection] data['category_id'] = self.cat_ids[label] IndexError: list index out of range (#4243) 我之前也碰到了这个问题，查了源码之后发现了问题，看看你适用嘛：修改 mmdetection/mmdet/datasets/coco.py 将类别改为一致 class CocoDataset(CustomDataset): CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush') CLASSES = ('shop_sign') # 此处改为你自己的186类，注意顺序，需要和接下来的2对齐查看自己的数据集【train-val-test都要修改】，比如 mmdetection/data/coco/annotations/instances_train2017.json 看该文件中的 "categories" 标签中的 “ id” 和 “ name” 需要与1中你写在coco.py中的对齐。 coco数据集的标签格式： { "images":[], "annotations":[], "categories":[] # 就是这个地方，要改好，与1对齐，有186类，每一类中的“ id” 和 “ name”都要和1顺序一致 } 希望能够解决你的问题。 — You are receiving this because you authored the thread. Reply to this email directly, view it on GitHub, or unsubscribe.

Hexuanfang · 2021-01-05T09:38:09Z

楼主您好，我的问题和您的一模一样，请问您的问题怎么解决的呢？我的就只有两类，上一个答主说的对齐是 "categories": [
{
"id": 1,
"name": "car"
},
{
"id": 2,
"name": "truck"
}
这个和要和coco.py中的CLASSES=["car","truck"]这样的对齐吗？
但是我这个对齐没问题，问题依然没有解决呢！
恳请告知，谢谢

v-qjqs · 2021-01-11T13:48:37Z

Hi @Hexuanfang, I guess you also need to explicitly add your own classes name to the fields data.train (and data.val, and data.test) in the config. See Reorganize new data formats to existing format for guidelines about how to train your own dataset with COCO format.

...
# dataset settings
dataset_type = 'CocoDataset'
classes = ('car', 'truck')
...
data = dict(
    samples_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        classes=classes,
        ann_file='path/to/your/train/data',
        ...),
    val=dict(
        type=dataset_type,
        classes=classes,
        ann_file='path/to/your/val/data',
        ...),
    test=dict(
        type=dataset_type,
        classes=classes,
        ann_file='path/to/your/test/data',
        ...))
...

secortot · 2021-01-14T07:52:13Z

Hi @Hexuanfang, I guess you also need to explicitly add your own classes name to the fields data.train (and data.val, and data.test) in the config, like this.

...
# dataset settings
dataset_type = 'CocoDataset'
classes = ('car', 'truck')
...
data = dict(
    samples_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        classes=classes,
        ann_file='path/to/your/train/data',
        ...),
    val=dict(
        type=dataset_type,
        classes=classes,
        ann_file='path/to/your/val/data',
        ...),
    test=dict(
        type=dataset_type,
        classes=classes,
        ann_file='path/to/your/test/data',
        ...))
...

我也是，我就一类，但是依然报错。
给这句话加个try except吧

v-qjqs · 2021-01-18T01:34:42Z

Hi @secortot, there are two steps to train your own customized dataset with COCO format:

Modify the config file for using the customized dataset.
Check the annotations of the customized dataset.

See Reorganize new data formats to existing format for more detailed suggestions.

secortot · 2021-01-18T03:24:24Z

Hi @secortot, there are two steps to train your own customized dataset with COCO format:

Modify the config file for using the customized dataset.

Check the annotations of the customized dataset.

See Reorganize new data formats to existing format for more detailed suggestions.

数据集更改为了coco的json格式，单一类别的检测，这个错误是运行不定次数epoch之后会出现（看代码是验证时出的问题，训练时无问题），仔细check了几遍ground truth，确实没有问题，在这一句加了抛出异常问题解决。
如果是数据格式的问题，也不会运行30个epoch之后才报错，感觉是代码的bug。

v-qjqs · 2021-01-20T01:38:22Z

Hi @secortot, could you provide detailed error log during the inference mode?

milleniums · 2021-02-10T11:53:08Z

Dear you, when modifying the category, please modify the parameter “num_classes“ in the model, otherwise this problem will occur.

Hshuqin · 2021-05-22T03:34:31Z

在这一句加了抛出异常问题解决。

请问你怎么解决的？我也是运行了不定数的epoch，然后报了这个错

Litou-lyh · 2021-08-14T12:31:00Z

Hi @secortot, there are two steps to train your own customized dataset with COCO format:

Modify the config file for using the customized dataset.

Check the annotations of the customized dataset.

See Reorganize new data formats to existing format for more detailed suggestions.

数据集更改为了coco的json格式，单一类别的检测，这个错误是运行不定次数epoch之后会出现（看代码是验证时出的问题，训练时无问题），仔细check了几遍ground truth，确实没有问题，在这一句加了抛出异常问题解决。
如果是数据格式的问题，也不会运行30个epoch之后才报错，感觉是代码的bug。

I encountered similar problems, and got exception in the middle of triaining. I fixed this problem by adding a comma "," at the end of class names in config file, like change "classes = ('person', 'car')" to "classes = ('person', 'car',)", but still confused why it works... some detailed gramma of python tuple might be the underlying reason.

wmrenr · 2022-07-13T13:42:25Z

I encountered this problem during the test. It's useless to try the above methods.What should I do?

Kittywyk · 2022-09-28T14:04:31Z

In my situation, I added classes=('a', 'b', 'c', 'd', 'e') field to
data=dict{train=dict(classes=classes), val=dict(classes=classes), test=dict(classes=classes)}
in the config file. And the problem was solved.

Mahmood-Hussain · 2022-10-12T03:26:56Z

Check:

Classname Spellings
Classname indexes in training and testing set
Customize your dataset settings inside the dataset config file like this

dataset settings

dataset_type = 'CocoDataset'
classes = ('car', 'truck')

...
data = dict(
    samples_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        classes=classes,
        ann_file='path/to/your/train/data',
        ...),
    val=dict(
        type=dataset_type,
        classes=classes,
        ann_file='path/to/your/val/data',
        ...),
    test=dict(
        type=dataset_type,
        classes=classes,
        ann_file='path/to/your/test/data',
        ...))
...

I know very few people can encounter the issue I had and this is not the solution to the original question here but still, it could be helpful for someone so I am writing.

I was training on one dataset and testing on a different dataset, the original dataset had class names all having the first letter as Capital but testing dataset class names were in small letters which caused me this issue. So I made testing class names the same spelling, first Capital letter to make it work.

Printing categories and their Ids inside mmdetection/mmdet/datasets/coco.py helped me in debugging

cqtanzj · 2023-06-07T09:12:56Z

The categories in the train.json, val.json, and test.json files must be aligned.
The class_name in the config file is merged from the categories in the three JSON files mentioned above.

For example:
In the train.json file, categories are:
"categories": [
{
"id": 1,
"name": "GarlicPorkRibs"
},
{
"id": 2,
"name": "Rice"
},
{
"id": 3,
"name": "Soup"
}
]
In the val.json file, categories are:
"categories": [
{
"id": 1,
"name": "GarlicPorkRibs"
},
{
"id": 2,
"name": "Rice"
},
{
"id": 3,
"name": "Soup"
},
{
"id": 4,
"name": "ShaGeSteamedMeatCake"
}
]

In the test.json file, categories are:
"categories": [
{
"id": 1,
"name": "GarlicPorkRibs"
},
{
"id": 2,
"name": "Rice"
},
{
"id": 3,
"name": "Soup"
},
{
"id": 4,
"name": "ShaGeSteamedMeatCake"
},
{
"id": 5,
"name": "BraisedTofu"
}
]
So the class_name in the mmyolo config file should be:
class_name = ("GarlicPorkRibs", "Rice", "Soup", "ShaGeSteamedMeatCake", "BraisedTofu")

BlackWhitebzl · 2024-04-26T08:47:45Z

我在使用HTC模型（htc_r50_fpn_1x_coco.py）训练自定义数据集时，参考用户指南对num_classes和数据集信息进行了修改后同样出现了上述报错。

我最终的解决方法如下：

数据集设置：参考用户指南，我未修改原coco.py文件。
num_classes设置：
（1）htc_r50_fpn_1x_coco.py中的num_classes仅针对semantic_head（此处还需要加上背景类，参考COCO原设置为183）。
（2）htc_r50_fpn_1x_coco.py是继承htc-without-semantic_r50_fpn_1x_coco.py的，该脚本中的bbox_head和mask_dead中的num_classes也需要修改。
（3）我新建了两个脚本，将num_classes基于我的数据集定义，运行ok。

不确定是否符合你的情况，希望可以带来帮助。

I have the same error reported above when training a custom dataset using the HTC model (htc_r50_fpn_1x_coco.py) after referring to the user's guide to make changes to the num_classes and dataset information.

My final solution is as follows:

dataset settings: refer to the user guide, I did not modify the original coco.py file.
num_classes setting:
(1) The num_classes in htc_r50_fpn_1x_coco.py is only for semantic_head (here you also need to add the background class, refer to the original COCO setting of 183).
(2) htc_r50_fpn_1x_coco.py is inherited from htc-without-semantic_r50_fpn_1x_coco.py, and the num_classes in bbox_head and mask_dead in that script need to be modified as well.
(3) I created two new scripts to define num_classes based on my dataset and ran ok.

Not sure if this fits your situation, hope this helps.

Translated with DeepL.com (free version)

openmmlab-bot assigned v-qjqs Dec 8, 2020

v-qjqs added the awaiting response label Jan 11, 2021

ShihuaiXu closed this as completed Jan 20, 2021

v-qjqs removed the awaiting response label Jan 20, 2021

This comment has been minimized.

Sign in to view

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

data['category_id'] = self.cat_ids[label] IndexError: list index out of range #4243

data['category_id'] = self.cat_ids[label] IndexError: list index out of range #4243

ShihuaiXu commented Dec 7, 2020

GitVae commented Dec 8, 2020 •

edited

ShihuaiXu commented Dec 8, 2020 via email

Hexuanfang commented Jan 5, 2021

v-qjqs commented Jan 11, 2021 •

edited

secortot commented Jan 14, 2021

v-qjqs commented Jan 18, 2021 •

edited

secortot commented Jan 18, 2021

v-qjqs commented Jan 20, 2021 •

edited

milleniums commented Feb 10, 2021

This comment has been minimized.

Hshuqin commented May 22, 2021

This comment has been minimized.

Litou-lyh commented Aug 14, 2021 •

edited

wmrenr commented Jul 13, 2022

Kittywyk commented Sep 28, 2022 •

edited

Mahmood-Hussain commented Oct 12, 2022

cqtanzj commented Jun 7, 2023

BlackWhitebzl commented Apr 26, 2024

data['category_id'] = self.cat_ids[label] IndexError: list index out of range #4243

data['category_id'] = self.cat_ids[label] IndexError: list index out of range #4243

Comments

ShihuaiXu commented Dec 7, 2020

model settings

model training and testing settings

GitVae commented Dec 8, 2020 • edited

ShihuaiXu commented Dec 8, 2020 via email

Hexuanfang commented Jan 5, 2021

v-qjqs commented Jan 11, 2021 • edited

secortot commented Jan 14, 2021

v-qjqs commented Jan 18, 2021 • edited

secortot commented Jan 18, 2021

v-qjqs commented Jan 20, 2021 • edited

milleniums commented Feb 10, 2021

This comment has been minimized.

Hshuqin commented May 22, 2021

This comment has been minimized.

Litou-lyh commented Aug 14, 2021 • edited

wmrenr commented Jul 13, 2022

Kittywyk commented Sep 28, 2022 • edited

Mahmood-Hussain commented Oct 12, 2022

dataset settings

cqtanzj commented Jun 7, 2023

BlackWhitebzl commented Apr 26, 2024

GitVae commented Dec 8, 2020 •

edited

v-qjqs commented Jan 11, 2021 •

edited

v-qjqs commented Jan 18, 2021 •

edited

v-qjqs commented Jan 20, 2021 •

edited

Litou-lyh commented Aug 14, 2021 •

edited

Kittywyk commented Sep 28, 2022 •

edited