In [0]:
%sh

# BERT-Large Inference

# Install necessary package
sudo apt-get update
sudo apt-get install zip -y
sudo apt-get -y install git
sudo apt-get install -y numactl

# Remove old materials if exist
rm -rf /TF/
mkdir /TF/

# Create ckpt directory
mkdir -p /TF/BERT-Large-output/
export BERT_LARGE_OUTPUT=/TF/BERT-Large-output/

# Download IntelAI benchmark
cd /TF/
wget https://github.com/IntelAI/models/archive/refs/tags/v1.8.1.zip
unzip v1.8.1.zip
cd /TF/models-1.8.1/


export SQUAD_DIR=/dbfs/home/TF/bert-large/SQuAD-1.1/
export BERT_LARGE_DIR=/dbfs/home/TF/bert-large/

export PYTHONPATH=$PYTHONPATH:.

# Launch Benchmark for inference

numa_nodes=$(lscpu | awk '/^NUMA node\(s\)/{ print $3 }')


       
function run_inference_without_numabind() {
  cd /TF/models-1.8.1/benchmarks/
  python3 launch_benchmark.py \
    --model-name=bert_large \
    --precision=fp32 \
    --mode=inference \
    --framework=tensorflow \
    --batch-size=32 \
    --data-location $BERT_LARGE_DIR/wwm_uncased_L-24_H-1024_A-16 \
    --checkpoint $BERT_LARGE_DIR/bert_large_checkpoints \
    --output-dir $BERT_LARGE_OUTPUT/bert-squad-output \
    --verbose \
    -- infer_option=SQuAD \
       DEBIAN_FRONTEND=noninteractive \
       predict_file=$SQUAD_DIR/dev-v1.1.json \
       experimental-gelu=False \
       init_checkpoint=model.ckpt-3649
}

function run_inference_with_numabind() {
  cd /TF/models-1.8.1/benchmarks/
  nohup python3 launch_benchmark.py \
    --model-name=bert_large \
    --precision=fp32 \
    --mode=inference \
    --framework=tensorflow \
    --batch-size=32 \
    --socket-id 0  \
    --data-location $BERT_LARGE_DIR/wwm_uncased_L-24_H-1024_A-16 \
    --checkpoint $BERT_LARGE_DIR/bert_large_checkpoints \
    --output-dir $BERT_LARGE_OUTPUT/bert-squad-output \
    --verbose \
    -- infer_option=SQuAD \
       DEBIAN_FRONTEND=noninteractive \
       predict_file=$SQUAD_DIR/dev-v1.1.json \
       experimental-gelu=False \
       init_checkpoint=model.ckpt-3649 >> socket0-inference-log &
       
  nohup python3 launch_benchmark.py \
    --model-name=bert_large \
    --precision=fp32 \
    --mode=inference \
    --framework=tensorflow \
    --batch-size=32 \
    --socket-id 1 \
    --data-location $BERT_LARGE_DIR/wwm_uncased_L-24_H-1024_A-16 \
    --checkpoint $BERT_LARGE_DIR/bert_large_checkpoints \
    --output-dir $BERT_LARGE_OUTPUT/bert-squad-output \
    --verbose \
    -- infer_option=SQuAD \
       DEBIAN_FRONTEND=noninteractive \
       predict_file=$SQUAD_DIR/dev-v1.1.json \
       experimental-gelu=False \
       init_checkpoint=model.ckpt-3649 >> socket1-inference-log &
}

if [ "$numa_nodes" = "1" ];then
        run_inference_without_numabind
else
        run_inference_with_numabind
fi
  

In [0]:
%sh
# Get the inference result
numa_nodes=$(lscpu | awk '/^NUMA node\(s\)/{ print $3 }')
if [ "$numa_nodes" = "1" ];then
        cd /TF/BERT-Large-output/bert-squad-output/
        cat benchmark*.log | grep "throughput((num_processed_examples-threshod_examples)/Elapsedtime)"
else
        cd /TF/models-1.8.1/benchmarks/
        cat socket*-log | grep "throughput((num_processed_examples-threshod_examples)/Elapsedtime)"
fi



In [0]:
%sh

# BERT-Large Training

# Install necessary package
sudo apt-get update
sudo apt-get install zip -y
sudo apt-get -y install git
sudo apt-get install -y libblacs-mpi-dev
sudo apt-get install -y numactl

# Remove old materials if exist
rm -rf /TF/
mkdir /TF/

# Create ckpt directory
mkdir -p /TF/BERT-Large-output/
export BERT_LARGE_OUTPUT=/TF/BERT-Large-output/

# Download IntelAI benchmark
cd /TF/
wget https://github.com/IntelAI/models/archive/refs/tags/v1.8.1.zip
unzip v1.8.1.zip
cd models-1.8.1/

export SQUAD_DIR=/dbfs/home/TF/bert-large/SQuAD-1.1
export BERT_LARGE_MODEL=/dbfs/home/TF/bert-large/wwm_uncased_L-24_H-1024_A-16

export PYTHONPATH=$PYTHONPATH:.

cores_per_socket=$(lscpu | awk '/^Core\(s\) per socket/{ print $4 }')
numa_nodes=$(lscpu | awk '/^NUMA node\(s\)/{ print $3 }')

# Launch Benchmark for training
cd /TF/models-1.8.1/benchmarks/

function run_training_without_numabind() {
  python launch_benchmark.py \
    --model-name=bert_large \
    --precision=fp32 \
    --mode=training \
    --framework=tensorflow \
    --batch-size=4 \
    --benchmark-only \
    --data-location=$BERT_LARGE_MODEL \
    -- train-option=SQuAD  DEBIAN_FRONTEND=noninteractive   config_file=$BERT_LARGE_MODEL/bert_config.json   init_checkpoint=$BERT_LARGE_MODEL/bert_model.ckpt     vocab_file=$BERT_LARGE_MODEL/vocab.txt train_file=$SQUAD_DIR/train-v1.1.json     predict_file=$SQUAD_DIR/dev-v1.1.json      do-train=True learning-rate=1.5e-5   max-seq-length=384     do_predict=True warmup-steps=0     num_train_epochs=0.1     doc_stride=128      do_lower_case=False     experimental-gelu=False     mpi_workers_sync_gradients=True
}

function run_training_with_numabind() {
  intra_thread=`expr $cores_per_socket - 2`
  python launch_benchmark.py \
    --model-name=bert_large \
    --precision=fp32 \
    --mode=training \
    --framework=tensorflow \
    --batch-size=4 \
    --mpi_num_processes=$numa_nodes \
    --num-intra-threads=$intra_thread \
    --num-inter-threads=1 \
    --benchmark-only \
    --data-location=$BERT_LARGE_MODEL \
    -- train-option=SQuAD  DEBIAN_FRONTEND=noninteractive   config_file=$BERT_LARGE_MODEL/bert_config.json init_checkpoint=$BERT_LARGE_MODEL/bert_model.ckpt     vocab_file=$BERT_LARGE_MODEL/vocab.txt train_file=$SQUAD_DIR/train-v1.1.json     predict_file=$SQUAD_DIR/dev-v1.1.json      do-train=True learning-rate=1.5e-5   max-seq-length=384     do_predict=True warmup-steps=0     num_train_epochs=0.1     doc_stride=128      do_lower_case=False     experimental-gelu=False     mpi_workers_sync_gradients=True
}

if [ "$numa_nodes" = "1" ];then
        run_training_without_numabind
else
        run_training_with_numabind
fi


In [0]:
# Print version, and check whether is intel-optimized
import tensorflow
print("tensorflow version: " + tensorflow.__version__)

from packaging import version
if (version.parse("2.5.0") <= version.parse(tensorflow.__version__)):
  from tensorflow.python.util import _pywrap_util_port
  print( _pywrap_util_port.IsMklEnabled())
else:
  from tensorflow.python import _pywrap_util_port
  print(_pywrap_util_port.IsMklEnabled())