New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Trainning can not converge when add lstm layer #62
Comments
Never mind. I fixed it. |
Can you share which data Set did you use for training? |
@dajiangxiaoyan How did you fix it. I have the same problem. Thank you very much |
@dajiangxiaoyan I have the same problem ,can you share the method? Thank you.It's my fault, I used an error vgg16 model. |
@dajiangxiaoyan HI,I am trying to train ctpn myself too, but i can't recur the train.prototxt ,i have tried many times and now i am desperate ,can you kindly share it to me for training? Many thanks. |
@catmonkeylee Hello, I want to fine-tune the model on my own data set, but don't know what the data format is. Could you please show me more detail about how you made the data input and what your training parameters is? Thanks a lot! |
@dajiangxiaoyan Hi, I'm trying to train the ctpn and have got exectly the same problem. Could you please give me some suggestions? Any reply is deeply appreciated. |
@TaoDream @catmonkeylee Is there any solution to this problem? I've check the model yet the training seems never converge when lstm is added. Could you please help me out here? Thanks a lot! |
@cristina510 Maybe you need to clip the gradients for LSTM layers. |
Thanks a lot for your reply. |
@cristina510 This should not be the problem about gradient clipping. Are LSTM layers initialized properly? |
Thanks for your reply, I really appreciate it. |
@cristina510 You are welcome. What I meant is that whether parameters of LSTM layers are initialized properly. Because you said the output is unrelated to the input image, I guess all parameters of LSTM layers are probably initialized to zeros. |
I implement the RPN without LSTM from papers based on faster rcnn code. The result is very well for horizon text. But when add bi-directional lstm layer after last con layer, the model is not converge. And scores are the same for all image. Can anyone met this problem?
`name: "VGG_ILSVRC_16_layers"
layer {
name: 'input-data'
type: 'Python'
top: 'data'
top: 'im_info'
top: 'gt_boxes'
python_param {
module: 'roi_data_layer.layer'
layer: 'RoIDataLayer'
param_str: "'num_classes': 2"
}
}
layer {
name: "conv1_1"
type: "Convolution"
bottom: "data"
top: "conv1_1"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
}
}
layer {
name: "relu1_1"
type: "ReLU"
bottom: "conv1_1"
top: "conv1_1"
}
layer {
name: "conv1_2"
type: "Convolution"
bottom: "conv1_1"
top: "conv1_2"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
}
}
layer {
name: "relu1_2"
type: "ReLU"
bottom: "conv1_2"
top: "conv1_2"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1_2"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2_1"
type: "Convolution"
bottom: "pool1"
top: "conv2_1"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
}
}
layer {
name: "relu2_1"
type: "ReLU"
bottom: "conv2_1"
top: "conv2_1"
}
layer {
name: "conv2_2"
type: "Convolution"
bottom: "conv2_1"
top: "conv2_2"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
}
}
layer {
name: "relu2_2"
type: "ReLU"
bottom: "conv2_2"
top: "conv2_2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2_2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv3_1"
type: "Convolution"
bottom: "pool2"
top: "conv3_1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
}
}
layer {
name: "relu3_1"
type: "ReLU"
bottom: "conv3_1"
top: "conv3_1"
}
layer {
name: "conv3_2"
type: "Convolution"
bottom: "conv3_1"
top: "conv3_2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
}
}
layer {
name: "relu3_2"
type: "ReLU"
bottom: "conv3_2"
top: "conv3_2"
}
layer {
name: "conv3_3"
type: "Convolution"
bottom: "conv3_2"
top: "conv3_3"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
}
}
layer {
name: "relu3_3"
type: "ReLU"
bottom: "conv3_3"
top: "conv3_3"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3_3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv4_1"
type: "Convolution"
bottom: "pool3"
top: "conv4_1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layer {
name: "relu4_1"
type: "ReLU"
bottom: "conv4_1"
top: "conv4_1"
}
layer {
name: "conv4_2"
type: "Convolution"
bottom: "conv4_1"
top: "conv4_2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layer {
name: "relu4_2"
type: "ReLU"
bottom: "conv4_2"
top: "conv4_2"
}
layer {
name: "conv4_3"
type: "Convolution"
bottom: "conv4_2"
top: "conv4_3"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layer {
name: "relu4_3"
type: "ReLU"
bottom: "conv4_3"
top: "conv4_3"
}
layer {
name: "pool4"
type: "Pooling"
bottom: "conv4_3"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv5_1"
type: "Convolution"
bottom: "pool4"
top: "conv5_1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layer {
name: "relu5_1"
type: "ReLU"
bottom: "conv5_1"
top: "conv5_1"
}
layer {
name: "conv5_2"
type: "Convolution"
bottom: "conv5_1"
top: "conv5_2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layer {
name: "relu5_2"
type: "ReLU"
bottom: "conv5_2"
top: "conv5_2"
}
layer {
name: "conv5_3"
type: "Convolution"
bottom: "conv5_2"
top: "conv5_3"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layer {
name: "relu5_3"
type: "ReLU"
bottom: "conv5_3"
top: "conv5_3"
}
#========= RPN ============
#========= RPN ============
prepare lstm inputs
layer {
name: "im2col"
bottom: "conv5_3"
top: "im2col"
type: "Im2col"
convolution_param {
pad: 1
kernel_size: 3
}
}
layer {
name: "im2col_transpose"
top: "im2col_transpose"
bottom: "im2col"
type: "Transpose"
transpose_param {
dim: 3
dim: 2
dim: 0
dim: 1
}
}
layer {
name: "lstm_input"
type: "Reshape"
bottom: "im2col_transpose"
top: "lstm_input"
reshape_param {
shape { dim: -1 }
axis: 1
num_axes: 2
}
}
layer {
name: "lstm"
type: "Lstm"
bottom: "lstm_input"
top: "lstm"
lstm_param {
num_output: 128
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
}
clipping_threshold: 1
}
}
===================== rlstm ===================
layer {
name: "lstm-reverse1"
type: "Reverse"
bottom: "lstm_input"
top: "rlstm_input"
reverse_param {
axis: 0
}
}
layer {
name: "rlstm"
type: "Lstm"
bottom: "rlstm_input"
top: "rlstm-output"
lstm_param {
num_output: 128
}
}
layer {
name: "lstm-reverse2"
type: "Reverse"
bottom: "rlstm-output"
top: "rlstm"
reverse_param {
axis: 0
}
}
merge lstm and rlstm
layer {
name: "merge_lstm_rlstm"
type: "Concat"
bottom: "lstm"
bottom: "rlstm"
top: "merge_lstm_rlstm"
concat_param {
axis: 2
}
}
layer {
name: "lstm_output_reshape"
type: "Reshape"
bottom: "merge_lstm_rlstm"
top: "lstm_output_reshape"
reshape_param {
shape { dim: -1 dim: 1 }
axis: 1
num_axes: 1
}
}
transpose size of output as (N, C, H, W)
layer {
name: "lstm_output"
type: "Transpose"
bottom: "lstm_output_reshape"
top: "lstm_output"
transpose_param {
dim: 2
dim: 3
dim: 1
dim: 0
}
}
layer {
name: "fc"
bottom: "lstm_output"
top: "fc"
type: "Convolution"
convolution_param {
num_output: 512
kernel_size: 1
}
}
layer {
name: "relu_fc"
type: "ReLU"
bottom: "fc"
top: "fc"
}
layer {
name: "rpn_cls_score"
type: "Convolution"
bottom: "fc"
top: "rpn_cls_score"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 20 # 2(bg/fg) * 9(anchors)
kernel_size: 1 pad: 0 stride: 1
}
}
layer {
name: "rpn_bbox_pred"
type: "Convolution"
bottom: "fc"
top: "rpn_bbox_pred"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 20 # 4 * 9(anchors)
kernel_size: 1 pad: 0 stride: 1
}
}
layer {
bottom: "rpn_cls_score"
top: "rpn_cls_score_reshape"
name: "rpn_cls_score_reshape"
type: "Reshape"
reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
}
layer {
name: 'rpn-data'
type: 'Python'
bottom: 'rpn_cls_score'
bottom: 'gt_boxes'
bottom: 'im_info'
bottom: 'data'
top: 'rpn_labels'
top: 'rpn_bbox_targets'
top: 'rpn_bbox_inside_weights'
top: 'rpn_bbox_outside_weights'
python_param {
module: 'rpn.anchor_target_layer'
layer: 'AnchorTargetLayer'
param_str: "'feat_stride': 16"
}
}
layer {
name: "rpn_loss_cls"
type: "SoftmaxWithLoss"
bottom: "rpn_cls_score_reshape"
bottom: "rpn_labels"
propagate_down: 1
propagate_down: 0
top: "rpn_cls_loss"
loss_weight: 1
loss_param {
ignore_label: -1
normalize: true
}
}
layer {
name: "rpn_loss_bbox"
type: "SmoothL1Loss"
bottom: "rpn_bbox_pred"
bottom: "rpn_bbox_targets"
bottom: 'rpn_bbox_inside_weights'
bottom: 'rpn_bbox_outside_weights'
top: "rpn_loss_bbox"
loss_weight: 1
smooth_l1_loss_param { sigma: 3.0 }
}
`
The text was updated successfully, but these errors were encountered: