# Run BERT-Large inference workload

In [None]:
%%bash

# Download datasets, checkpoints and pre-trained model
rm -rf ~/TF/bert-large
mkdir -p  ~/TF/bert-large/SQuAD-1.1
cd ~/TF/bert-large/SQuAD-1.1
wget https://github.com/oap-project/oap-project.github.io/raw/master/resources/ai/bert/dev-v1.1.json
wget https://github.com/oap-project/oap-project.github.io/raw/master/resources/ai/bert/evaluate-v1.1.py
wget https://github.com/oap-project/oap-project.github.io/raw/master/resources/ai/bert/train-v1.1.json

cd ~/TF/bert-large
wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/bert_large_checkpoints.zip
unzip bert_large_checkpoints.zip

cd ~/TF/bert-large
wget https://storage.googleapis.com/bert_models/2019_05_30/wwm_uncased_L-24_H-1024_A-16.zip
unzip wwm_uncased_L-24_H-1024_A-16.zip

In [None]:
%%bash

# BERT-Large Inference
# Install necessary packages
sudo apt-get install -y numactl
# Create ckpt directory
rm -rf  ~/TF/bert-large/inference/*
mkdir -p ~/TF/bert-large/inference/BERT-Large-output
# Download IntelAI benchmark
cd ~/TF/bert-large/inference
wget https://github.com/IntelAI/models/archive/refs/tags/v1.8.1.zip
unzip v1.8.1.zip
wget https://github.com/oap-project/oap-tools/raw/master/integrations/ml/databricks/benchmark/IntelAI_models_bertlarge_inference_realtime_throughput.patch
cd ./models-1.8.1/
git apply ../IntelAI_models_bertlarge_inference_realtime_throughput.patch

In [None]:
%%bash


#Bert-Large Inference
export BERT_LARGE_OUTPUT=~/TF/bert-large/inference/BERT-Large-output
export SQUAD_DIR=~/TF/bert-large/SQuAD-1.1/
export BERT_LARGE_DIR=~/TF/bert-large/
export PYTHONPATH=~/TF/bert-large/inference/models-1.8.1/benchmarks/
cd ~/TF/bert-large/inference/models-1.8.1/benchmarks/

function run_inference_without_numabind() {
    /anaconda/envs/azureml_py38_tensorflow/bin/python launch_benchmark.py \
    --model-name=bert_large \
    --precision=fp32 \
    --mode=inference \
    --framework=tensorflow \
    --batch-size=32 \
    --data-location $BERT_LARGE_DIR/wwm_uncased_L-24_H-1024_A-16 \
    --checkpoint $BERT_LARGE_DIR/bert_large_checkpoints \
    --output-dir $BERT_LARGE_OUTPUT/bert-squad-output \
    --verbose \
    --infer_option=SQuAD \
       DEBIAN_FRONTEND=noninteractive \
       predict_file=$SQUAD_DIR/dev-v1.1.json \
       experimental-gelu=False \
       init_checkpoint=model.ckpt-3649
}
    

function run_inference_with_numabind() {
    nohup /anaconda/envs/azureml_py38_tensorflow/bin/python launch_benchmark.py \
    --model-name=bert_large \
    --precision=fp32 \
    --mode=inference \
    --framework=tensorflow \
    --batch-size=32 \
    --socket-id 0  \
    --data-location $BERT_LARGE_DIR/wwm_uncased_L-24_H-1024_A-16 \
    --checkpoint $BERT_LARGE_DIR/bert_large_checkpoints \
    --output-dir $BERT_LARGE_OUTPUT/bert-squad-output \
    --verbose \
    --infer_option=SQuAD \
       DEBIAN_FRONTEND=noninteractive \
       predict_file=$SQUAD_DIR/dev-v1.1.json \
       experimental-gelu=False \
       init_checkpoint=model.ckpt-3649 >> socket0-inference-log &

    nohup /anaconda/envs/azureml_py38_tensorflow/bin/python launch_benchmark.py \
    --model-name=bert_large \
    --precision=fp32 \
    --mode=inference \
    --framework=tensorflow \
    --batch-size=32 \
    --socket-id 1 \
    --data-location $BERT_LARGE_DIR/wwm_uncased_L-24_H-1024_A-16 \
    --checkpoint $BERT_LARGE_DIR/bert_large_checkpoints \
    --output-dir $BERT_LARGE_OUTPUT/bert-squad-output \
    --verbose \
    -- infer_option=SQuAD \
       DEBIAN_FRONTEND=noninteractive \
       predict_file=$SQUAD_DIR/dev-v1.1.json \
       experimental-gelu=False \
       init_checkpoint=model.ckpt-3649 >> socket1-inference-log &
}
    
numa_nodes=$(lscpu | awk '/^NUMA node\(s\)/{ print $3 }')

if [ "$numa_nodes" = "1" ];then
        run_inference_without_numabind
else
        run_inference_with_numabind
fi

In [None]:
%%bash

# Get the inference result
numa_nodes=$(lscpu | awk '/^NUMA node\(s\)/{ print $3 }')
if [ "$numa_nodes" = "1" ];then
        cd ~/TF/bert-large/inference/BERT-Large-output/bert-squad-output/
        cat benchmark*.log | grep "throughput((num_processed_examples-threshod_examples)/Elapsedtime)"
else
        cd  ~/TF/bert-large/inference/models-1.8.1/benchmarks/
        cat socket*-log | grep "throughput((num_processed_examples-threshod_examples)/Elapsedtime)"
fi

In [None]:
# Print TensorFlow version, and check whether is intel-optimized

import tensorflow
print("tensorflow version: " + tensorflow.__version__)

from packaging import version
if (version.parse("2.5.0") <= version.parse(tensorflow.__version__)):
  from tensorflow.python.util import _pywrap_util_port
  print( _pywrap_util_port.IsMklEnabled())
else:
  from tensorflow.python import _pywrap_util_port
  print(_pywrap_util_port.IsMklEnabled())