-
Notifications
You must be signed in to change notification settings - Fork 45.3k
Description
Prerequisites
Please answer the following question for yourself before submitting an issue.
- [X ] I checked to make sure that this feature has not been requested already.
1. The entire URL of the file you are using
Custom made centernet model for human keypoints estimation using mobilenet v3 small wit fpn as feature extractor
2. Describe the feature you request
I have made a custom model for human pose estimation using mobilenet v3 with an attached fpn as feature extractor. When training I follow the example for Centernet MobilenetV2 FPN 512x512 from the model zoo. After training with the mentioned pipeline file locally, it turns out that the accuracy of the result isn't great, which leads me to believe that the configurations might be more fitting to cloud training with multiple GPUs. I must mention that I also trained with batch size of only one because...that is all that worked at the moment on my machine.
For reference, my laptop specs are:
- Intel(R) Core(TM) i7-10750H CPU @ 2.60GHz 2.59 GHz
- 16 GB RAM
- Nvidia RTX 2070 Super 8GB (the laptop version)
Could anyone please help in creating a pipeline fitting for this setup? I am doing this for my final year university project so training locally would be the most favorable option. Alternatively, a pipeline for training on Google Cloud (with some instructions for the specs to choose for the virtual machine) would be helpful, but again, training locally would be best.
3. Additional context
The model follows this with mostly changes to the layers used for the fpn and the filters. I have followed the Tensorflow instructions for training locally with GPU so I'm using cuDNN on my Windows machine.
Current pipeline:
model {
center_net {
num_classes: 1
feature_extractor {
type: "mobilenet_v3_small_fpn"
}
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 512
max_dimension: 512
pad_to_max_dimension: true
}
}
use_depthwise: true
object_detection_task {
task_loss_weight: 1.0
offset_loss_weight: 1.0
scale_loss_weight: 0.1
localization_loss {
l1_localization_loss {
}
}
}
object_center_params {
object_center_loss_weight: 1.0
classification_loss {
penalty_reduced_logistic_focal_loss {
alpha: 2.0
beta: 4.0
}
}
min_box_overlap_iou: 0.7
max_box_predictions: 5
}
keypoint_label_map_path: "F:/python_repos/first_with_mobilenetv3/label_map.txt"
keypoint_estimation_task {
task_name: "human_pose"
task_loss_weight: 1.0
loss {
localization_loss {
l1_localization_loss {
}
}
classification_loss {
penalty_reduced_logistic_focal_loss {
alpha: 2.0
beta: 4.0
}
}
}
keypoint_class_name: "/m/01g317"
keypoint_label_to_std {
key: "left_ankle"
value: 0.89
}
keypoint_label_to_std {
key: "left_ear"
value: 0.35
}
keypoint_label_to_std {
key: "left_elbow"
value: 0.72
}
keypoint_label_to_std {
key: "left_eye"
value: 0.25
}
keypoint_label_to_std {
key: "left_hip"
value: 1.07
}
keypoint_label_to_std {
key: "left_knee"
value: 0.89
}
keypoint_label_to_std {
key: "left_shoulder"
value: 0.79
}
keypoint_label_to_std {
key: "left_wrist"
value: 0.62
}
keypoint_label_to_std {
key: "nose"
value: 0.23
}
keypoint_label_to_std {
key: "right_ankle"
value: 0.89
}
keypoint_label_to_std {
key: "right_ear"
value: 0.35
}
keypoint_label_to_std {
key: "right_elbow"
value: 0.72
}
keypoint_label_to_std {
key: "right_eye"
value: 0.25
}
keypoint_label_to_std {
key: "right_hip"
value: 1.07
}
keypoint_label_to_std {
key: "right_knee"
value: 0.89
}
keypoint_label_to_std {
key: "right_shoulder"
value: 0.79
}
keypoint_label_to_std {
key: "right_wrist"
value: 0.62
}
keypoint_regression_loss_weight: 0.1
keypoint_heatmap_loss_weight: 1.0
keypoint_offset_loss_weight: 1.0
offset_peak_radius: 3
per_keypoint_offset: true
}
}
}
train_config {
batch_size: 1
data_augmentation_options {
random_horizontal_flip {
keypoint_flip_permutation: 0
keypoint_flip_permutation: 2
keypoint_flip_permutation: 1
keypoint_flip_permutation: 4
keypoint_flip_permutation: 3
keypoint_flip_permutation: 6
keypoint_flip_permutation: 5
keypoint_flip_permutation: 8
keypoint_flip_permutation: 7
keypoint_flip_permutation: 10
keypoint_flip_permutation: 9
keypoint_flip_permutation: 12
keypoint_flip_permutation: 11
keypoint_flip_permutation: 14
keypoint_flip_permutation: 13
keypoint_flip_permutation: 16
keypoint_flip_permutation: 15
}
}
data_augmentation_options {
random_patch_gaussian {
}
}
data_augmentation_options {
random_crop_image {
min_aspect_ratio: 0.5
max_aspect_ratio: 1.7
random_coef: 0.25
}
}
data_augmentation_options {
random_adjust_hue {
}
}
data_augmentation_options {
random_adjust_contrast {
}
}
data_augmentation_options {
random_adjust_saturation {
}
}
data_augmentation_options {
random_adjust_brightness {
}
}
data_augmentation_options {
random_absolute_pad_image {
max_height_padding: 200
max_width_padding: 200
pad_color: 0.0
pad_color: 0.0
pad_color: 0.0
}
}
optimizer {
adam_optimizer {
learning_rate {
cosine_decay_learning_rate {
learning_rate_base: 5e-3
total_steps: 300000
warmup_learning_rate: 1e-4
warmup_steps: 5000
}
}
}
use_moving_average: false
}
num_steps: 300000
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
fine_tune_checkpoint_type: ""
}
train_input_reader {
label_map_path: "F:/python_repos/first_with_mobilenetv3/label_map.txt"
tf_record_input_reader {
input_path: "F:/coco/tfrecord2017/coco_train.record-00000-of-00100"
}
filenames_shuffle_buffer_size: 256
num_keypoints: 17
}
eval_config {
num_visualizations: 10
metrics_set: "coco_detection_metrics"
use_moving_averages: false
min_score_threshold: 0.2
max_num_boxes_to_visualize: 20
batch_size: 1
parameterized_metric {
coco_keypoint_metrics {
class_label: "person"
keypoint_label_to_sigmas {
key: "left_ankle"
value: 0.089
}
keypoint_label_to_sigmas {
key: "left_ear"
value: 0.035
}
keypoint_label_to_sigmas {
key: "left_elbow"
value: 0.072
}
keypoint_label_to_sigmas {
key: "left_eye"
value: 0.025
}
keypoint_label_to_sigmas {
key: "left_hip"
value: 0.107
}
keypoint_label_to_sigmas {
key: "left_knee"
value: 0.087
}
keypoint_label_to_sigmas {
key: "left_shoulder"
value: 0.079
}
keypoint_label_to_sigmas {
key: "left_wrist"
value: 0.062
}
keypoint_label_to_sigmas {
key: "nose"
value: 0.026
}
keypoint_label_to_sigmas {
key: "right_ankle"
value: 0.089
}
keypoint_label_to_sigmas {
key: "right_ear"
value: 0.035
}
keypoint_label_to_sigmas {
key: "right_elbow"
value: 0.072
}
keypoint_label_to_sigmas {
key: "right_eye"
value: 0.025
}
keypoint_label_to_sigmas {
key: "right_hip"
value: 0.107
}
keypoint_label_to_sigmas {
key: "right_knee"
value: 0.087
}
keypoint_label_to_sigmas {
key: "right_shoulder"
value: 0.079
}
keypoint_label_to_sigmas {
key: "right_wrist"
value: 0.062
}
}
}
keypoint_edge {
start: 0
end: 1
}
keypoint_edge {
start: 0
end: 2
}
keypoint_edge {
start: 1
end: 3
}
keypoint_edge {
start: 2
end: 4
}
keypoint_edge {
start: 0
end: 5
}
keypoint_edge {
start: 0
end: 6
}
keypoint_edge {
start: 5
end: 7
}
keypoint_edge {
start: 7
end: 9
}
keypoint_edge {
start: 6
end: 8
}
keypoint_edge {
start: 8
end: 10
}
keypoint_edge {
start: 5
end: 6
}
keypoint_edge {
start: 5
end: 11
}
keypoint_edge {
start: 6
end: 12
}
keypoint_edge {
start: 11
end: 12
}
keypoint_edge {
start: 11
end: 13
}
keypoint_edge {
start: 13
end: 15
}
keypoint_edge {
start: 12
end: 14
}
keypoint_edge {
start: 14
end: 16
}
}
eval_input_reader {
label_map_path: "F:/python_repos/first_with_mobilenetv3/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "F:/coco/tfrecord2017/coco_val.record-00000-of-00050"
}
num_keypoints: 17
}
4. Are you willing to contribute it? (Yes or No)
Yes