-
Notifications
You must be signed in to change notification settings - Fork 45.5k
Description
Hello, I use tensorflow 1.12's object-detection 's API.
I tried to use the solution of Mask-RCNN by FAIR. The pretrained model is TF's ResNet101-v1:
Here is my configuration file:
# Mask R-CNN with Resnet-101 (v1) configured for the Oxford-IIIT Pet Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
faster_rcnn {
num_classes: 2
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 600
max_dimension: 1024
}
}
number_of_stages: 3
feature_extractor {
type: 'faster_rcnn_resnet101'
first_stage_features_stride: 16
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
first_stage_nms_score_threshold: 0.0
first_stage_nms_iou_threshold: 0.7
first_stage_max_proposals: 300
first_stage_localization_loss_weight: 2.0
first_stage_objectness_loss_weight: 1.0
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
second_stage_box_predictor {
mask_rcnn_box_predictor {
use_dropout: false
dropout_keep_probability: 1.0
predict_instance_masks: true
conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
variance_scaling_initializer {
factor: 1.0
uniform: true
mode: FAN_AVG
}
}
}
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.0
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
second_stage_localization_loss_weight: 2.0
second_stage_classification_loss_weight: 1.0
}
}
train_config: {
batch_size: 1
optimizer {
momentum_optimizer: {
learning_rate: {
manual_step_learning_rate {
initial_learning_rate: 0.0007
schedule {
step: 15000
learning_rate: 0.00007
}
schedule {
step: 30000
learning_rate: 0.000007
}
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
gradient_clipping_by_norm: 10.0
fine_tune_checkpoint: "/fast/junyan/HandDetection/hands-detection-gcloud/models/research/resnet_v1_101/resnet_v1_101.ckpt"
from_detection_checkpoint: true
load_all_detection_checkpoint_vars: true
# Note: The below line limits the training process to 200K steps, which we
# empirically found to be sufficient enough to train the pets dataset. This
# effectively bypasses the learning rate schedule (the learning rate will
# never decay). Remove the below line to train indefinitely.
num_steps: 200000
data_augmentation_options {
random_horizontal_flip {
}
}
}
train_input_reader: {
tf_record_input_reader {
input_path: "/fast/junyan/HandDetection/hands-detection-gcloud/hands_train.record"
}
label_map_path: "/fast/junyan/HandDetection/hands-detection-gcloud/hands_label_map.pbtxt"
load_instance_masks: true
}
eval_config: {
metrics_set: "coco_mask_metrics"
num_examples: 738
}
eval_input_reader: {
tf_record_input_reader {
input_path: "/fast/junyan/HandDetection/hands-detection-gcloud/hands_val.record"
}
label_map_path: "/fast/junyan/HandDetection/hands-detection-gcloud/hands_label_map.pbtxt"
load_instance_masks: true
shuffle: false
num_readers: 1
}
But it tells me the error:
Caused by op 'GatherV2_4', defined at:
File "object_detection/model_main.py", line 117, in
tf.app.run()
File "/root/anaconda3/lib/python3.6/site-packages/tensorflow/python/platform/app.py", line 125, in run
_sys.exit(main(argv))
File "object_detection/model_main.py", line 113, in main
tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
File "/root/anaconda3/lib/python3.6/site-packages/tensorflow/python/estimator/training.py", line 471, in train_and_evaluate
return executor.run()
File "/root/anaconda3/lib/python3.6/site-packages/tensorflow/python/estimator/training.py", line 610, in run
return self.run_local()
File "/root/anaconda3/lib/python3.6/site-packages/tensorflow/python/estimator/training.py", line 711, in run_local
saving_listeners=saving_listeners)
File "/root/anaconda3/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py", line 354, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/root/anaconda3/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py", line 1207, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "/root/anaconda3/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py", line 1237, in _train_model_default
features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
File "/root/anaconda3/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py", line 1195, in _call_model_fn
model_fn_results = self._model_fn(features=features, **kwargs)
File "/fast/junyan/HandDetection/hands-detection-gcloud/models/research/object_detection/model_lib.py", line 288, in model_fn
features[fields.InputDataFields.true_image_shape])
File "/fast/junyan/HandDetection/hands-detection-gcloud/models/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py", line 688, in predict
self._anchors.get(), image_shape, true_image_shapes))
File "/fast/junyan/HandDetection/hands-detection-gcloud/models/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py", line 775, in _predict_second_stage
anchors, image_shape_2d, true_image_shapes)
File "/fast/junyan/HandDetection/hands-detection-gcloud/models/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py", line 1306, in _postprocess_rpn
groundtruth_weights_list)
File "/fast/junyan/HandDetection/hands-detection-gcloud/models/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py", line 1389, in _sample_box_classifier_batch
single_image_groundtruth_weights)
File "/fast/junyan/HandDetection/hands-detection-gcloud/models/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py", line 1506, in _sample_box_classifier_minibatch_single_image
groundtruth_weights=groundtruth_weights)
File "/fast/junyan/HandDetection/hands-detection-gcloud/models/research/object_detection/core/target_assigner.py", line 185, in assign
reg_weights = self._create_regression_weights(match, groundtruth_weights)
File "/fast/junyan/HandDetection/hands-detection-gcloud/models/research/object_detection/core/target_assigner.py", line 322, in _create_regression_weights
groundtruth_weights, ignored_value=0., unmatched_value=0.)
File "/fast/junyan/HandDetection/hands-detection-gcloud/models/research/object_detection/core/matcher.py", line 205, in gather_based_on_match
gathered_tensor = self._gather_op(input_tensor, gather_indices)
File "/root/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 2675, in gather
return gen_array_ops.gather_v2(params, indices, axis, name=name)
File "/root/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 3332, in gather_v2
"GatherV2", params=params, indices=indices, axis=axis, name=name)
File "/root/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/root/anaconda3/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
return func(*args, **kwargs)
File "/root/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3274, in create_op
op_def=op_def)
File "/root/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1770, in init
self._traceback = tf_stack.extract_stack()InvalidArgumentError (see above for traceback): indices[0] = 0 is not in [0, 0)
[[node GatherV2_4 (defined at /fast/junyan/HandDetection/hands-detection-gcloud/models/research/object_detection/core/matcher.py:205) = GatherV2[Taxis=DT_INT32, Tindices=DT_INT64, Tparams=DT_FLOAT, _device="/device:CPU:0"](Reshape_8, Reshape_9, GatherV2_3/axis)]]
[[node IteratorGetNext (defined at object_detection/model_main.py:113) = IteratorGetNextoutput_shapes=[[1], [1,?,?,3], [1,2], [1,3], [1,100], [1,100,4], [1,100,2], [1,100,2], [1,100], [1,100,?,?], [1,100], [1,100], [1]], output_types=[DT_INT32, DT_FLOAT, DT_INT32, DT_INT32, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_INT32, DT_FLOAT, DT_BOOL, DT_FLOAT, DT_INT32], _device="/job:localhost/replica:0/task:0/device:CPU:0"]]
[[{{node GroupCrossDeviceControlEdges_0/Loss/BoxClassifierLoss/concat_6/axis/_7512}} = _Recvclient_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_1708_GroupCrossDeviceControlEdges_0/Loss/BoxClassifierLoss/concat_6/axis", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]]
I am not sure what is the root cause.
How can I resolve it?
Thanks & Regards!