Skip to content

Commit

Permalink
Merge pull request #1850 from Shreeshrii/new-branch-name
Browse files Browse the repository at this point in the history
add option --save_box_tiff to save box/tiff pairs with lstmf files
  • Loading branch information
egorpugin committed Aug 20, 2018
2 parents 115fe76 + 43e3f24 commit 1f3acca
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 1 deletion.
1 change: 1 addition & 0 deletions src/training/tesstrain.sh
Expand Up @@ -22,6 +22,7 @@
# --lang LANG_CODE # ISO 639 code.
# --langdata_dir DATADIR # Path to tesseract/training/langdata directory.
# --output_dir OUTPUTDIR # Location of output traineddata file.
# --save_box_tiff # Save box/tiff pairs along with lstmf files.
# --overwrite # Safe to overwrite files in output_dir.
# --linedata_only # Only generate training data for lstmtraining.
# --run_shape_clustering # Run shape clustering (use for Indic langs).
Expand Down
12 changes: 11 additions & 1 deletion src/training/tesstrain_utils.sh
Expand Up @@ -24,6 +24,7 @@ else
FONT_CONFIG_CACHE=$(mktemp -d --tmpdir font_tmp.XXXXXXXXXX)
fi
MAX_PAGES=0
SAVE_BOX_TIFF=0
OUTPUT_DIR="/tmp/tesstrain/tessdata"
OVERWRITE=0
LINEDATA=0
Expand Down Expand Up @@ -139,6 +140,8 @@ parse_flags() {
i=$j ;;
--overwrite)
OVERWRITE=1 ;;
--save_box_tiff)
SAVE_BOX_TIFF=1 ;;
--linedata_only)
LINEDATA=1 ;;
--extract_font_properties)
Expand Down Expand Up @@ -182,7 +185,9 @@ parse_flags() {
fi

# Location where intermediate files will be created.
TRAINING_DIR=${WORKSPACE_DIR}/${LANG_CODE}
TIMESTAMP=`date +%Y-%m-%d`
TMP_DIR=$(mktemp -d --tmpdir ${LANG_CODE}-${TIMESTAMP}.XXX )
TRAINING_DIR=${TMP_DIR}
# Location of log file for the whole run.
LOG_FILE=${TRAINING_DIR}/tesstrain.log

Expand Down Expand Up @@ -530,6 +535,9 @@ make__lstmdata() {
--puncs "${lang_prefix}.punc" \
--output_dir "${OUTPUT_DIR}" --lang "${LANG_CODE}" \
"${pass_through}" "${lang_is_rtl}"

if ((SAVE_BOX_TIFF)); then
tlog "\n=== Saving box/tiff pairs for training data ==="
for f in "${TRAINING_DIR}/${LANG_CODE}".*.box; do
tlog "Moving ${f} to ${OUTPUT_DIR}"
mv "${f}" "${OUTPUT_DIR}"
Expand All @@ -538,6 +546,8 @@ make__lstmdata() {
tlog "Moving ${f} to ${OUTPUT_DIR}"
mv "${f}" "${OUTPUT_DIR}"
done
fi
tlog "\n=== Moving lstmf files for training data ==="
for f in "${TRAINING_DIR}/${LANG_CODE}".*.lstmf; do
tlog "Moving ${f} to ${OUTPUT_DIR}"
mv "${f}" "${OUTPUT_DIR}"
Expand Down

0 comments on commit 1f3acca

Please sign in to comment.