Unify run_mrt.sh scripts

marian-nmt · Feb 25, 2022 · c0497dd · c0497dd
1 parent d1b224e
commit c0497dd
Show file tree

Hide file tree

Showing 2 changed files with 118 additions and 51 deletions.
diff --git a/README.md b/README.md
@@ -7,7 +7,8 @@ pure C++ with minimal dependencies.
 This repository contains the regression test framework for the main development
 repository: https://github.com/marian-nmt/marian-dev.
 
-Tests have been developed for Linux for Marian compiled using GCC 7+.
+Tests have been developed for Linux for Marian compiled using GCC 8+ and Nvidia
+Maxwell/Pascal GPUs.
 
 
 ## Structure

diff --git a/run_mrt.sh b/run_mrt.sh
@@ -20,20 +20,47 @@
 SHELL=/bin/bash
 
 export LC_ALL=C.UTF-8
+export MRT_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+export MRT_TOOLS="$MRT_ROOT/tools"
+export MRT_MARIAN="$( realpath "${MARIAN:-$MRT_ROOT/../build}" )"
+
+RUN_LOGS="$MRT_ROOT/previous.log.tmp"    # Logging file for log and logn commands
+rm -f $RUN_LOGS
+
+# Needed so that previous.log is not overwritten when it is provided as an argument
+function cleanup {
+    test -s "$RUN_LOGS" && mv "$RUN_LOGS" "$MRT_ROOT/previous.log"
+}
+trap cleanup EXIT
 
 function log {
-    echo [$(date "+%m/%d/%Y %T")] $@
+    echo "[$(date '+%m/%d/%Y %T')] $@" | tee -a $RUN_LOGS
 }
 
 function logn {
-    echo -n [$(date "+%m/%d/%Y %T")] $@
+    echo -n "[$(date '+%m/%d/%Y %T')] $@" | tee -a $RUN_LOGS
+}
+
+function loge {
+    echo $@ | tee -a $RUN_LOGS
 }
 
 log "Running on $(hostname) as process $$"
 
-export MRT_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-export MRT_TOOLS=$MRT_ROOT/tools
-export MRT_MARIAN="$( realpath ${MARIAN:-$MRT_ROOT/../build} )"
+# On Windows, the .exe suffix should be added to executables
+UNAME=$(uname)
+if [ "$UNAME" == "Linux" ]; then
+    log "Running on Linux machine"
+    export MRT_BIN=
+    export MRT_OS=linux
+elif [[ "$UNAME" == CYGWIN* || "$UNAME" == MINGW* ]]; then
+    log "Running on Windows machine"
+    export MRT_BIN=.exe
+    export MRT_OS=windows
+else
+    log "Unsupported or unrecognized machine with uname= $UNAME"
+    exit 1
+fi
 
 # Print folders which contain models and data for regression tests
 export MRT_MODELS="$( realpath ${MODELS:-$MRT_ROOT/models} )"
@@ -43,41 +70,51 @@ log "Using models from: $MRT_MODELS"
 log "Using data from: $MRT_DATA"
 
 # Try adding build/ to MARIAN for backward compatibility
-if [[ ! -e $MRT_MARIAN/marian-decoder ]]; then
+if [[ ! -e "$MRT_MARIAN/marian-decoder$MRT_BIN" ]]; then
     MRT_MARIAN="$MRT_MARIAN/build"
 fi
 
 # Check if required tools are present in marian directory
 for cmd in marian marian-decoder marian-scorer marian-vocab; do
-    if [ ! -e $MRT_MARIAN/$cmd ]; then
-        echo "Error: '$MRT_MARIAN/$cmd' not found. Do you need to compile the toolkit first?"
+    if [ ! -e "$MRT_MARIAN/$cmd$MRT_BIN" ]; then
+        loge "Error: '$MRT_MARIAN/$cmd$MRT_BIN' not found. Do you need to compile the toolkit first?"
         exit 1
     fi
 done
 
-log "Using Marian binary: $MRT_MARIAN/marian"
+# Common Marian executables
+export MRT_MARIAN_TRAINER="$MRT_MARIAN/marian$MRT_BIN"
+export MRT_MARIAN_DECODER="$MRT_MARIAN/marian-decoder$MRT_BIN"
+export MRT_MARIAN_SCORER="$MRT_MARIAN/marian-scorer$MRT_BIN"
+export MRT_MARIAN_VOCAB="$MRT_MARIAN/marian-vocab$MRT_BIN"
+
+log "Using Marian binary: $MRT_MARIAN_DECODER"
 
 # Log Marian version
-export MRT_MARIAN_VERSION=$($MRT_MARIAN/marian --version 2>&1)
+export MRT_MARIAN_VERSION=$($MRT_MARIAN_TRAINER --version 2>&1)
 log "Version: $MRT_MARIAN_VERSION"
 
 # Get CMake settings from the --build-info option
-if ! grep -q "build-info" < <( $MRT_MARIAN/marian --help ); then
-    echo "Error: Marian is too old as it does not have the required --build-info option"
+if ! grep -q "build-info" < <( $MRT_MARIAN_TRAINER --help ); then
+    loge "Error: Marian does not have the required --build-info option. Use newer version of Marian"
     exit 1
 fi
 
-$MRT_MARIAN/marian --build-info all 2> $MRT_ROOT/cmake.log
+$MRT_MARIAN_TRAINER --build-info all 2> "$MRT_ROOT/cmake.log"
+
+if test ! -s "$MRT_ROOT/cmake.log" || grep -q "Error: build-info is not available" "$MRT_ROOT/cmake.log"; then
+    loge "Warning: Marian does not set the required --build-info option. Tests may not work properly"
+fi
 
 # Check Marian compilation settings
-export MRT_MARIAN_BUILD_TYPE=$(cat $MRT_ROOT/cmake.log        | grep "CMAKE_BUILD_TYPE=" | cut -f2 -d=)
-export MRT_MARIAN_COMPILER=$(cat $MRT_ROOT/cmake.log          | grep "CMAKE_CXX_COMPILER=" | cut -f2 -d=)
-export MRT_MARIAN_USE_MKL=$(cat $MRT_ROOT/cmake.log           | egrep "COMPILE_CPU=(ON|on|1)")
-export MRT_MARIAN_USE_CUDA=$(cat $MRT_ROOT/cmake.log          | egrep "COMPILE_CUDA=(ON|on|1)")
-export MRT_MARIAN_USE_CUDNN=$(cat $MRT_ROOT/cmake.log         | egrep "USE_CUDNN=(ON|on|1)")
-export MRT_MARIAN_USE_SENTENCEPIECE=$(cat $MRT_ROOT/cmake.log | egrep "USE_SENTENCEPIECE=(ON|on|1)")
-export MRT_MARIAN_USE_FBGEMM=$(cat $MRT_ROOT/cmake.log        | egrep "USE_FBGEMM=(ON|on|1)")
-export MRT_MARIAN_USE_UNITTESTS=$(cat $MRT_ROOT/cmake.log     | egrep "COMPILE_TESTS=(ON|on|1)")
+export MRT_MARIAN_BUILD_TYPE=$(cat $MRT_ROOT/cmake.log        | grep  -i "CMAKE_BUILD_TYPE=" | cut -f2 -d=)
+export MRT_MARIAN_COMPILER=$(cat $MRT_ROOT/cmake.log          | grep  -i "CMAKE_CXX_COMPILER=" | cut -f2 -d=)
+export MRT_MARIAN_USE_MKL=$(cat $MRT_ROOT/cmake.log           | egrep -i "COMPILE_CPU=(true|on|1)" | cat)
+export MRT_MARIAN_USE_CUDA=$(cat $MRT_ROOT/cmake.log          | egrep -i "COMPILE_CUDA=(true|on|1)" | cat)
+export MRT_MARIAN_USE_CUDNN=$(cat $MRT_ROOT/cmake.log         | egrep -i "USE_CUDNN=(true|on|1)" | cat)
+export MRT_MARIAN_USE_SENTENCEPIECE=$(cat $MRT_ROOT/cmake.log | egrep -i "USE_SENTENCEPIECE=(true|on|1)" | cat)
+export MRT_MARIAN_USE_FBGEMM=$(cat $MRT_ROOT/cmake.log        | egrep -i "USE_FBGEMM=(true|on|1)" | cat)
+export MRT_MARIAN_USE_UNITTESTS=$(cat $MRT_ROOT/cmake.log     | egrep -i "COMPILE_TESTS=(true|on|1)" | cat)
 
 log "Build type: $MRT_MARIAN_BUILD_TYPE"
 log "Using compiler: $MRT_MARIAN_COMPILER"
@@ -87,13 +124,28 @@ log "Using SentencePiece: $MRT_MARIAN_USE_SENTENCEPIECE"
 log "Using FBGEMM: $MRT_MARIAN_USE_FBGEMM"
 log "Unit tests: $MRT_MARIAN_USE_UNITTESTS"
 
+
 # Number of available devices
-cuda_num_devices=$(($(echo $CUDA_VISIBLE_DEVICES | grep -c ',')+1))
+cuda_num_devices=$(($(echo $CUDA_VISIBLE_DEVICES | grep -c ',' | cat)+1))
 export MRT_NUM_DEVICES=${NUM_DEVICES:-$cuda_num_devices}
 
 log "Using CUDA visible devices: $CUDA_VISIBLE_DEVICES"
 log "Using number of GPU devices: $MRT_NUM_DEVICES"
 
+
+# CPU architecture details
+test -e "$MRT_ROOT/cpuinfo.log" || cat /proc/cpuinfo > "$MRT_ROOT/cpuinfo.log"
+grep -qi "avx2"        "$MRT_ROOT/cpuinfo.log" && MRT_CPU_AVX2=true
+grep -qi "avx512"      "$MRT_ROOT/cpuinfo.log" && MRT_CPU_AVX512=true
+grep -qi "avx512_vnni" "$MRT_ROOT/cpuinfo.log" && MRT_CPU_AVX512VNNI=true
+export MRT_CPU_AVX2
+export MRT_CPU_AVX512
+export MRT_CPU_AVX512VNNI
+
+log "CPU intrinsics: avx2=$MRT_CPU_AVX2 avx512=$MRT_CPU_AVX512 avx512vnni=$MRT_CPU_AVX512VNNI"
+
+
+# Time out
 export MRT_TIMEOUT=${TIMEOUT:-5m}   # the default time out is 5 minutes, see `man timeout`
 cmd_timeout=""
 if [ $MRT_TIMEOUT != "0" ]; then
@@ -105,14 +157,19 @@ log "Using time out: $MRT_TIMEOUT"
 # Exit codes
 export EXIT_CODE_SUCCESS=0
 export EXIT_CODE_SKIP=100
+export EXIT_CODE_SKIP_MISSING_FILE=101
+export EXIT_CODE_SKIP_NO_FBGEMM=105
+export EXIT_CODE_SKIP_NO_SENTENCEPIECE=106
+export EXIT_CODE_SKIP_NO_AVX2=110
+export EXIT_CODE_SKIP_NO_AVX512=111
 export EXIT_CODE_TIMEOUT=124    # Exit code returned by the timeout command if timed out
 
 function format_time {
-    dt=$(echo "$2 - $1" | bc 2>/dev/null)
-    dh=$(echo "$dt/3600" | bc 2>/dev/null)
-    dt2=$(echo "$dt-3600*$dh" | bc 2>/dev/null)
-    dm=$(echo "$dt2/60" | bc 2>/dev/null)
-    ds=$(echo "$dt2-60*$dm" | bc 2>/dev/null)
+    dt=$(python -c "print($2 - $1)" 2>/dev/null)
+    dh=$(python -c "print(int($dt/3600))" 2>/dev/null)
+    dt2=$(python -c "print($dt-3600*$dh)" 2>/dev/null)
+    dm=$(python -c "print(int($dt2/60))" 2>/dev/null)
+    ds=$(python -c "print($dt2-60*$dm)" 2>/dev/null)
     LANG=C printf "%02d:%02d:%02.3fs" $dh $dm $ds
 }
 
@@ -126,7 +183,7 @@ if [ $# -ge 1 ]; then
         # A log file with paths to test files
         if [[ "$arg" = *.log ]]; then
             # Extract tests from .log file
-            args=$(cat $arg | grep '/test_.*\.sh' | grep -v '/_' | sed 's/^ *- *//' | tr '\n' ' ' | sed 's/ *$//')
+            args=$(cat $arg | grep -vP '^\[' | grep '/test_.*\.sh' | grep -v '/_' | sed 's/^ *- *//' | tr '\n' ' ' | sed 's/ *$//')
             test_prefixes="$test_prefixes $args"
         # A hash tag
         elif [[ "$arg" = '#'* ]]; then
@@ -141,8 +198,14 @@ if [ $# -ge 1 ]; then
     done
 fi
 
+# Check if the variable is empty or contains only spaces
+if [[ -z "${test_prefixes// }" ]]; then
+    log "Error: no tests found in the specified input(s): $@"
+    exit 1
+fi
+
 # Extract all subdirectories, which will be traversed to look for regression tests
-test_dirs=$(find $test_prefixes -type d | grep -v "/_")
+test_dirs=$(find $test_prefixes -type d | grep -v "/_" | cat)
 
 if grep -q "/test_.*\.sh" <<< "$test_prefixes"; then
     test_files=$(printf '%s\n' $test_prefixes | sed 's!*/!!')
@@ -208,7 +271,7 @@ do
         if [ "$nosetup" = true ]; then
             ((++count_skipped))
             tests_skipped+=($test_path)
-            echo " skipped"
+            loge " skipped"
             cd $MRT_ROOT
             continue;
         fi
@@ -221,24 +284,24 @@ do
         # Check exit code
         if [ $exit_code -eq $EXIT_CODE_SUCCESS ]; then
             ((++count_passed))
-            echo " OK"
+            loge " OK"
         elif [ $exit_code -eq $EXIT_CODE_SKIP ]; then
             ((++count_skipped))
             tests_skipped+=($test_path)
-            echo " skipped"
+            loge " skipped"
         elif [ $exit_code -eq $EXIT_CODE_TIMEOUT ]; then
             ((++count_timedout))
             tests_timedout+=($test_path)
             # Add a comment to the test log file that it timed out
             echo "The test timed out after $TIMEOUT" >> $test_file.log
             # A timed out test is a failed test
             ((++count_failed))
-            echo " timed out"
+            loge " timed out"
             success=false
         else
             ((++count_failed))
             tests_failed+=($test_path)
-            echo " failed"
+            loge " failed"
             success=false
         fi
 
@@ -270,39 +333,42 @@ done
 time_end=$(date +%s.%N)
 time_total=$(format_time $time_start $time_end)
 
-prev_log=previous.log
-rm -f $prev_log
-
 
 ###############################################################################
 # Print skipped and failed tests
 if [ -n "$tests_skipped" ] || [ -n "$tests_failed" ] || [ -n "$tests_timedout" ]; then
-    echo "---------------------"
+    loge "---------------------"
 fi
-[[ -z "$tests_skipped" ]] || echo "Skipped:" | tee -a $prev_log
+[[ -z "$tests_skipped" ]] || loge "Skipped:"
 for test_name in "${tests_skipped[@]}"; do
-    echo "  - $test_name" | tee -a $prev_log
+    loge "- $test_name"
 done
-[[ -z "$tests_failed" ]] || echo "Failed:" | tee -a $prev_log
+[[ -z "$tests_failed" ]] || loge "Failed:"
 for test_name in "${tests_failed[@]}"; do
-    echo "  - $test_name" | tee -a $prev_log
+    loge "- $test_name"
 done
-[[ -z "$tests_timedout" ]] || echo "Timed out:" | tee -a $prev_log
+[[ -z "$tests_timedout" ]] || loge "Timed out:"
 for test_name in "${tests_timedout[@]}"; do
-    echo "  - $test_name" | tee -a $prev_log
+    loge "- $test_name"
 done
 [[ -z "$tests_failed" ]] || echo "Logs:"
 for test_name in "${tests_failed[@]}"; do
-    echo "  - $(realpath $test_name | sed 's/\.sh/.sh.log/')"
+    echo "- $(realpath $test_name | sed 's/\.sh/.sh.log/')"
 done
 
 
 ###############################################################################
 # Print summary
-echo "---------------------" | tee -a $prev_log
-echo -n "Ran $count_all tests in $time_total, $count_passed passed, $count_skipped skipped, $count_failed failed" | tee -a $prev_log
-[ -n "$tests_timedout" ] && (echo -n " (incl. $count_timedout timed out)" | tee -a $prev_log)
-echo "" | tee -a $prev_log
+loge "---------------------"
+loge -n "Ran $count_all tests in $time_total, $count_passed passed, $count_skipped skipped, $count_failed failed"
+[ -n "$tests_timedout" ] && loge -n " (incl. $count_timedout timed out)"
+loge ""
 
 # Return exit code
-$success && [ $count_all -gt 0 ]
+if $success && [ $count_all -gt 0 ]; then
+    loge "OK"
+    exit 0
+else
+    loge "FAILED"
+    exit 1
+fi