diff --git a/unittest/lstm_recode_test.cc b/unittest/lstm_recode_test.cc index 3104cf98c8..7de694b6a2 100644 --- a/unittest/lstm_recode_test.cc +++ b/unittest/lstm_recode_test.cc @@ -23,6 +23,7 @@ TEST_F(LSTMTrainerTest, RecodeTestKorBase) { double kor_full_err = TrainIterations(kTrainerIterations * 2); EXPECT_LT(kor_full_err, 88); // EXPECT_GT(kor_full_err, 85); + LOG(INFO) << "********** Expected < 88 ************\n" ; } TEST_F(LSTMTrainerTest, RecodeTestKor) { @@ -31,6 +32,7 @@ TEST_F(LSTMTrainerTest, RecodeTestKor) { "kor.Arial_Unicode_MS.exp0.lstmf", true, true, 5e-4, false); double kor_recode_err = TrainIterations(kTrainerIterations); EXPECT_LT(kor_recode_err, 60); + LOG(INFO) << "********** Expected < 60 ************\n" ; } // Tests that the given string encodes and decodes back to the same diff --git a/unittest/lstm_squashed_test.cc b/unittest/lstm_squashed_test.cc index 2103a0591c..966deefdef 100644 --- a/unittest/lstm_squashed_test.cc +++ b/unittest/lstm_squashed_test.cc @@ -24,7 +24,7 @@ TEST_F(LSTMTrainerTest, TestSquashed) { "SQU-2-layer-lstm", /*recode*/ true, /*adam*/ true); double lstm_2d_err = TrainIterations(kTrainerIterations * 2); EXPECT_LT(lstm_2d_err, 80); - LOG(INFO) << "********** < 80 ************" ; + LOG(INFO) << "********** < 80 ************\n" ; TestIntMode(kTrainerIterations); } diff --git a/unittest/lstm_test.cc b/unittest/lstm_test.cc index cf846de414..0dc1d319ab 100644 --- a/unittest/lstm_test.cc +++ b/unittest/lstm_test.cc @@ -15,7 +15,10 @@ // // Use --xsize 800 for text2image to be similar to original training data. // -// src/training/tesstrain.sh --fonts_dir /usr/share/fonts --lang eng --linedata_only --noextract_font_properties --langdata_dir ../langdata_lstm --tessdata_dir ../tessdata --output_dir ~/tesseract/test/testdata --fontlist "Arial" --maxpages 10 +// src/training/tesstrain.sh --fonts_dir /usr/share/fonts --lang eng \ +// --linedata_only --noextract_font_properties --langdata_dir ../langdata_lstm \ +// --tessdata_dir ../tessdata --output_dir ~/tesseract/test/testdata \ +// --fontlist "Arial" --maxpages 10 // #include "lstm_test.h" @@ -30,15 +33,15 @@ TEST_F(LSTMTrainerTest, BasicTest) { "Ct1,1,64O1c1]", "no-lstm", "eng/eng.unicharset", "eng.Arial.exp0.lstmf", false, false, 2e-4, false); - double non_lstm_err = TrainIterations(kTrainerIterations * 3); + double non_lstm_err = TrainIterations(kTrainerIterations * 4); EXPECT_LT(non_lstm_err, 98); - LOG(INFO) << "********** Expected < 98 ************" ; + LOG(INFO) << "********** Expected < 98 ************\n" ; // A basic single-layer, single direction LSTM. SetupTrainerEng("[1,1,0,32 Lfx100 O1c1]", "1D-lstm", false, false); double lstm_uni_err = TrainIterations(kTrainerIterations * 2); EXPECT_LT(lstm_uni_err, 86); - LOG(INFO) << "********** Expected < 86 ************" ; + LOG(INFO) << "********** Expected < 86 ************\n" ; // Beats the convolver. (Although it does have a lot more weights, it still // iterates faster.) EXPECT_LT(lstm_uni_err, non_lstm_err); @@ -51,8 +54,8 @@ TEST_F(LSTMTrainerTest, ColorTest) { "2D-color-lstm", true, true); double lstm_uni_err = TrainIterations(kTrainerIterations); EXPECT_LT(lstm_uni_err, 85); - EXPECT_GT(lstm_uni_err, 66); - LOG(INFO) << "********** Expected > 66 ** < 85 ************" ; +// EXPECT_GT(lstm_uni_err, 66); + LOG(INFO) << "********** Expected < 85 ************\n" ; } TEST_F(LSTMTrainerTest, BidiTest) { @@ -60,7 +63,7 @@ TEST_F(LSTMTrainerTest, BidiTest) { SetupTrainerEng("[1,1,0,32 Lbx100 O1c1]", "bidi-lstm", false, false); double lstm_bi_err = TrainIterations(kTrainerIterations); EXPECT_LT(lstm_bi_err, 75); - LOG(INFO) << "********** Expected < 75 ************" ; + LOG(INFO) << "********** Expected < 75 ************\n" ; // Int mode training is dead, so convert the trained network to int and check // that its error rate is close to the float version. TestIntMode(kTrainerIterations); @@ -72,10 +75,10 @@ TEST_F(LSTMTrainerTest, Test2D) { // A 2-layer LSTM with a 2-D feature-extracting LSTM on the bottom. SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false, false); - double lstm_2d_err = TrainIterations(kTrainerIterations); + double lstm_2d_err = TrainIterations(kTrainerIterations * 3 / 2 ); EXPECT_LT(lstm_2d_err, 98); - EXPECT_GT(lstm_2d_err, 90); - LOG(INFO) << "********** Expected > 90 ** < 98 ************" ; +// EXPECT_GT(lstm_2d_err, 90); + LOG(INFO) << "********** Expected < 98 ************\n" ; // Int mode training is dead, so convert the trained network to int and check // that its error rate is close to the float version. TestIntMode(kTrainerIterations); @@ -89,7 +92,7 @@ TEST_F(LSTMTrainerTest, TestAdam) { "2-D-2-layer-lstm", false, true); double lstm_2d_err = TrainIterations(kTrainerIterations); EXPECT_LT(lstm_2d_err, 70); - LOG(INFO) << "********** Expected < 70 ************" ; + LOG(INFO) << "********** Expected < 70 ************\n" ; TestIntMode(kTrainerIterations); } @@ -100,7 +103,7 @@ TEST_F(LSTMTrainerTest, SpeedTest) { "O1c1]", "2-D-2-layer-lstm", false, true); TrainIterations(kTrainerIterations); - LOG(INFO) << "********** *** ************" ; + LOG(INFO) << "********** *** ************\n" ; } // Tests that two identical networks trained the same get the same results. @@ -108,7 +111,7 @@ TEST_F(LSTMTrainerTest, SpeedTest) { TEST_F(LSTMTrainerTest, DeterminismTest) { SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false, false); - double lstm_2d_err_a = TrainIterations(kTrainerIterations / 3); + double lstm_2d_err_a = TrainIterations(kTrainerIterations); double act_error_a = trainer_->ActivationError(); double char_error_a = trainer_->CharError(); GenericVector trainer_a_data; @@ -116,7 +119,7 @@ TEST_F(LSTMTrainerTest, DeterminismTest) { &trainer_a_data)); SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false, false); - double lstm_2d_err_b = TrainIterations(kTrainerIterations / 3); + double lstm_2d_err_b = TrainIterations(kTrainerIterations); double act_error_b = trainer_->ActivationError(); double char_error_b = trainer_->CharError(); EXPECT_FLOAT_EQ(lstm_2d_err_a, lstm_2d_err_b); @@ -136,7 +139,7 @@ TEST_F(LSTMTrainerTest, DeterminismTest) { EXPECT_FLOAT_EQ(lstm_2d_err_a, lstm_2d_err_b); EXPECT_FLOAT_EQ(act_error_a, act_error_b); EXPECT_FLOAT_EQ(char_error_a, char_error_b); - LOG(INFO) << "********** *** ************" ; + LOG(INFO) << "********** *** ************\n" ; } // The baseline network against which to test the built-in softmax. @@ -145,15 +148,15 @@ TEST_F(LSTMTrainerTest, SoftmaxBaselineTest) { SetupTrainerEng("[1,1,0,32 Lfx96 O1c1]", "1D-lstm", false, true); double lstm_uni_err = TrainIterations(kTrainerIterations * 2); EXPECT_LT(lstm_uni_err, 60); - EXPECT_GT(lstm_uni_err, 48); - LOG(INFO) << "********** Expected > 48 ** < 60 ************" ; +// EXPECT_GT(lstm_uni_err, 48); + LOG(INFO) << "********** Expected < 60 ************\n" ; // Check that it works in int mode too. TestIntMode(kTrainerIterations); // If we run TestIntMode again, it tests that int_mode networks can // serialize and deserialize correctly. double delta = TestIntMode(kTrainerIterations); // The two tests (both of int mode this time) should be almost identical. - LOG(INFO) << "Delta in Int mode error rates = " << delta; + LOG(INFO) << "Delta in Int mode error rates = " << delta << "\n"; EXPECT_LT(delta, 0.01); } @@ -165,7 +168,7 @@ TEST_F(LSTMTrainerTest, SoftmaxTest) { SetupTrainerEng("[1,1,0,32 LS96]", "Lstm-+-softmax", false, true); double lstm_sm_err = TrainIterations(kTrainerIterations * 2); EXPECT_LT(lstm_sm_err, 49.0); - LOG(INFO) << "********** Expected < 49 ************" ; + LOG(INFO) << "********** Expected < 49 ************\n" ; // Check that it works in int mode too. TestIntMode(kTrainerIterations); } @@ -177,7 +180,7 @@ TEST_F(LSTMTrainerTest, EncodedSoftmaxTest) { SetupTrainerEng("[1,1,0,32 LE96]", "Lstm-+-softmax", false, true); double lstm_sm_err = TrainIterations(kTrainerIterations * 2); EXPECT_LT(lstm_sm_err, 62.0); - LOG(INFO) << "********** Expected < 62 ************" ; + LOG(INFO) << "********** Expected < 62 ************\n" ; // Check that it works in int mode too. TestIntMode(kTrainerIterations); } diff --git a/unittest/lstm_test.h b/unittest/lstm_test.h index fc6f0482a4..9931ca3fc6 100644 --- a/unittest/lstm_test.h +++ b/unittest/lstm_test.h @@ -87,7 +87,7 @@ class LSTMTrainerTest : public testing::Test { GenericVector filenames; filenames.push_back(STRING(TestDataNameToPath(lstmf_file).c_str())); EXPECT_TRUE(trainer_->LoadAllTrainingData(filenames, CS_SEQUENTIAL, false)); - LOG(INFO) << "Setup network:" << model_name; + LOG(INFO) << "Setup network:" << model_name << "\n" ; } // Trains for a given number of iterations and returns the char error rate. double TrainIterations(int max_iterations) { @@ -108,8 +108,8 @@ class LSTMTrainerTest : public testing::Test { iteration = trainer_->training_iteration(); mean_error *= 100.0 / kBatchIterations; LOG(INFO) << log_str.string(); - LOG(INFO) << "Best error = " << best_error; - LOG(INFO) << "Mean error = " << mean_error; + LOG(INFO) << "Best error = " << best_error << "\n" ; + LOG(INFO) << "Mean error = " << mean_error << "\n" ; if (mean_error < best_error) best_error = mean_error; } while (iteration < iteration_limit); LOG(INFO) << "Trainer error rate = " << best_error << "\n";