## Setting Up All Artifacts details

In [None]:
import os
os.environ['SNPE_ROOT']="/local/mnt/workspace/aditya/qaisw-v2.15.1.230926150623_62883"#set up your snpe path here.
os.environ['RAW_FILE_FOLDER']="raw"
os.environ['FOLDER_WITH_ARTIFACTS']="alberta"
os.environ['DLCFP16']="models/alberta_fp16.dlc"
os.environ['DLCW16A16']="models/albertaw16a16_offline.dlc"
os.environ['DLCFP32']="models/alberta_fp32.dlc"
os.environ['TARGET_INPUT_LIST']="tf_raw_list.txt"
os.environ['ONDEVICE_FOLDER']="alberta_device"
os.environ['DEVICE_HOST']="localhost"
os.environ['DEVICE_ID']="2dce6316" #fill your device-id. Use command "adb devices" to get devices names. example :"e18d5d0"
os.environ['SNPE_TARGET_ARCH']="aarch64-android"
os.environ['SNPE_TARGET_STL']="libc++_shared.so"
os.environ['SNPE_TARGET_DSPARCH']="hexagon-v73"

In [None]:
import json
import pandas as pd
data_path="dev-v2.0.json"
with open(data_path,"r") as f:
    squad_data=json.load(f)
context_qa_triples=[]
for article in squad_data['data']:
    for paragraph in article['paragraphs']:
        context=paragraph['context']
        for qa in paragraph['qas']:
            question=qa['question']
            if qa['answers']:
                answer=qa['answers'][0]['text']
            elif qa['plausible_answers']:
                plausible_answers=qa['plausible_answers']
                answer=plausible_answers[0]['text']
            else:
                answer=''

            context_qa_triples.append({'context':context,'question':question,'answers':answer})

df=pd.DataFrame(context_qa_triples[:30])
df.head(3)

In [None]:
import numpy as np
from transformers import AutoTokenizer, AlbertForQuestionAnswering
import torch

tokenizer = AutoTokenizer.from_pretrained("twmkn9/albert-base-v2-squad2")

question_token={}

for i in range(df.shape[0]):
    question,text,answer=df.iloc[i].question,df.iloc[i].context,df.iloc[i].answers
    inputs = tokenizer(question, text, return_tensors="np",
            padding='max_length',
            truncation="longest_first",
            max_length=384)
    question_token[i]=[question,inputs,answer,text]
    inp_ids = inputs.input_ids
    inp_ids=inp_ids.astype(np.float32)
    with open("input_ids/inp_ids_"+str(i)+".raw", 'wb') as f:
        inp_ids.tofile(f)
    
    mask = inputs.attention_mask
    mask=mask.astype(np.float32)
    with open("attention_mask/attn_mask_"+str(i)+".raw", 'wb') as f:
        mask.tofile(f)

    token_type= inputs.token_type_ids
    token_type=token_type.astype(np.float32)
    with open("token_type_ids/token_type_id_"+str(i)+".raw", 'wb') as f:
        token_type.tofile(f)

#### F1 Score calculation custom code

In [None]:
from sklearn.metrics import f1_score
def f1_scores_custom(prediction,ground_truth):
    prediction_tokens=prediction.lower().split()
    ground_truth_tokens=ground_truth.lower().split()
    common_tokens=[token for token in prediction_tokens if token in ground_truth_tokens]   
    if (len(prediction_tokens)==0 and len(ground_truth_tokens)==0):
        return [1.0,1.0,1.0]
    elif len(prediction_tokens)==0 or len(ground_truth_tokens)==0:
        return [0.0,0.0,0.0]
    precision=len(common_tokens)/len(prediction_tokens)
    recall=len(common_tokens)/len(ground_truth_tokens)
    if precision+recall==0:
        return [0.0,0.0,0.0]
    f1= 2*(precision*recall)/(precision+recall)  
    return [f1,precision,recall]

### Normal Model Inference

In [None]:
import tensorflow as tf
import numpy as np
from transformers import AutoTokenizer, AlbertForQuestionAnswering
import torch
tokenizer = AutoTokenizer.from_pretrained("twmkn9/albert-base-v2-squad2")
model = AlbertForQuestionAnswering.from_pretrained("twmkn9/albert-base-v2-squad2")
f1_scores,precision_scores,recall_scores=[],[],[]
question_answer={}
for i in range(df.shape[0]):
    question,text,answer=df.iloc[i].question,df.iloc[i].context,df.iloc[i].answers
    inputs = tokenizer(question, text, return_tensors="pt",
            padding='max_length',
            truncation="longest_first",
            max_length=384)
    outputs = model(**inputs)
    answer_start_index = int(tf.math.argmax(outputs.start_logits.detach().numpy(), axis=-1)[0])
    answer_end_index = int(tf.math.argmax(outputs.end_logits.detach().numpy(), axis=-1)[0])
    
    predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
    predicted_answer=tokenizer.decode(predict_answer_tokens)
    #print(question,tokenizer.decode(predict_answer_tokens, skip_special_tokens=True))
    question_answer[question]=predicted_answer
    f1,precision,recall=f1_scores_custom(predicted_answer,answer)
    f1_scores.append(f1)
    precision_scores.append(precision)
    recall_scores.append(recall)

mean_f1_score=np.mean(f1_scores)
mean_precision_score=np.mean(precision_scores)
mean_recall_score=np.mean(recall_scores)

mean_f1_score,mean_recall_score,mean_precision_score

## Creating Directory on Device

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL shell "mkdir -p /data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/bin" && $DEVICE_SHELL shell "mkdir -p /data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/lib" && $DEVICE_SHELL shell "mkdir -p /data/local/tmp/snpeexample/dsp/lib"

## Pushing All SNPE Lib and Bin folders onto Device

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL push $SNPE_ROOT/lib/$SNPE_TARGET_ARCH/$SNPE_TARGET_STL /data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/lib
$DEVICE_SHELL push $SNPE_ROOT/bin/$SNPE_TARGET_ARCH/snpe-net-run /data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/bin
$DEVICE_SHELL push $SNPE_ROOT/lib/hexagon-v75/unsigned/*.so /data/local/tmp/snpeexample/dsp/lib
$DEVICE_SHELL push $SNPE_ROOT/lib/$SNPE_TARGET_ARCH/*.so /data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/lib

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL shell "mkdir -p /data/local/tmp/$ONDEVICE_FOLDER"

## Pushing all Model Artifacts onto Device

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL push $DLCFP16 /data/local/tmp/$ONDEVICE_FOLDER
$DEVICE_SHELL push $DLCW16A16 /data/local/tmp/$ONDEVICE_FOLDER
$DEVICE_SHELL push $DLCFP32 /data/local/tmp/$ONDEVICE_FOLDER 
$DEVICE_SHELL push attention_mask /data/local/tmp/$ONDEVICE_FOLDER
$DEVICE_SHELL push input_ids /data/local/tmp/$ONDEVICE_FOLDER
$DEVICE_SHELL push token_type_ids /data/local/tmp/$ONDEVICE_FOLDER
$DEVICE_SHELL push $TARGET_INPUT_LIST /data/local/tmp/$ONDEVICE_FOLDER

## Inferencing FP32 Model on CPU Runtime

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL shell
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/snpeexample/aarch64-android/lib
export PATH=$PATH:/data/local/tmp/snpeexample/aarch64-android/bin
export OUTPUT_FOLDER=OUTPUT_32b_CPU
export OUTPUT_DLC_32=alberta_fp32.dlc
export ONDEVICE_FOLDER="alberta_device"
cd /data/local/tmp/$ONDEVICE_FOLDER &&
snpe-net-run --container $OUTPUT_DLC_32 --input_list tf_raw_list.txt --set_unconsumed_as_output  --output_dir $OUTPUT_FOLDER

## Inferencing FP16 on DSP Runtime

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL shell
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/snpeexample/aarch64-android/lib
export PATH=$PATH:/data/local/tmp/snpeexample/aarch64-android/bin
export ADSP_LIBRARY_PATH="/data/local/tmp/snpeexample/dsp/lib;/system/lib/rfsa/adsp;/system/vendor/lib/rfsa/adsp;/dsp"
export OUTPUT_FOLDER=OUTPUT_DSP_FP16
export OUTPUT_FP_16=alberta_fp16.dlc
export ONDEVICE_FOLDER="alberta_device"
cd /data/local/tmp/$ONDEVICE_FOLDER &&
snpe-net-run --container $OUTPUT_FP_16 --input_list tf_raw_list.txt --set_output_tensors start_logits,end_logits   --output_dir $OUTPUT_FOLDER --use_dsp

## Inferencing W16A16 on DSP Runtime

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL shell
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/snpeexample/aarch64-android/lib
export PATH=$PATH:/data/local/tmp/snpeexample/aarch64-android/bin
export ADSP_LIBRARY_PATH="/data/local/tmp/snpeexample/dsp/lib;/system/lib/rfsa/adsp;/system/vendor/lib/rfsa/adsp;/dsp"
export OUTPUT_FOLDER=OUTPUT_DSP_W16A16
export DLC_W16A16=albertaw16a16_offline.dlc
export ONDEVICE_FOLDER="alberta_device"
cd /data/local/tmp/$ONDEVICE_FOLDER &&
snpe-net-run --container $DLC_W16A16 --input_list tf_raw_list.txt --set_output_tensors start_logits,end_logits --output_dir $OUTPUT_FOLDER --use_dsp --enable_cpu_fallback

## Pulling the Output from Device

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL pull /data/local/tmp/$ONDEVICE_FOLDER/OUTPUT_DSP_W16A16 OUTPUT_DSP_W16A16
$DEVICE_SHELL pull /data/local/tmp/$ONDEVICE_FOLDER/OUTPUT_DSP_FP16 OUTPUT_DSP_FP16
$DEVICE_SHELL pull /data/local/tmp/$ONDEVICE_FOLDER/OUTPUT_32b_CPU OUTPUT_32b_CPU

In [None]:
def func(start_logits,end_logits,inputs):
    answer_start_index = int(tf.math.argmax(start_logits, axis=-1)[0])
    answer_end_index = int(tf.math.argmax(end_logits, axis=-1)[0])
    predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
    return tokenizer.decode(predict_answer_tokens)

## Comparing Accuracy of FP32 Vs FP16

In [None]:
import glob
import tensorflow as tf
import os
folder = ["OUTPUT_32b_CPU","OUTPUT_DSP_FP16"]
dlc_type = ["fp32","fp16"]
data=[]
f1_scores,precision_scores,recall_scores=[],[],[]
for j in range(0,2):
    print("-----------------------",folder[j],"-----------------------------")
    for result_path in glob.glob(os.path.join(folder[j], '*')):
        if ".log" not in result_path:
            start_logits = np.fromfile(result_path+'/start_logits.raw', dtype="float32")
            end_logits=np.fromfile(result_path+'/end_logits.raw', dtype="float32")
            start_logits=start_logits.reshape((1,384))
            end_logits=end_logits.reshape((1,384))
            question,inputs,answer,text=question_token[int(result_path.split("/")[1].split("_")[1])]
            predicted_answer=func(start_logits,end_logits,inputs)
            data.append({"Model_Type":dlc_type[j],"question":question,"predicted_answer":predicted_answer,"Actual Model Answer":question_answer[question],"answer":answer,"context":text})
            #f1,precision,recall=f1_scores_custom(predicted_answer,answer)
            #f1_scores.append(f1)
            #precision_scores.append(precision)
            #recall_scores.append(recall)
    
    mean_f1_score=np.mean(f1_scores)
    mean_precision_score=np.mean(precision_scores)
    mean_recall_score=np.mean(recall_scores)
    
    print("F1_Score:",mean_f1_score,"Recall:",mean_recall_score,"Precision:",mean_precision_score)
data=pd.DataFrame(data)
data.head(40)

## Comparing Accuracy of FP32 Vs W16A16

In [None]:
import glob
import tensorflow as tf
import os
folder = ["OUTPUT_32b_CPU","OUTPUT_DSP_W16A16"]
dlc_type = ["fp32","W16A16"]
data=[]
f1_scores,precision_scores,recall_scores=[],[],[]
for j in range(0,2):
    print("-----------------------",folder[j],"-----------------------------")
    for result_path in glob.glob(os.path.join(folder[j], '*')):
        if ".log" not in result_path:
            start_logits = np.fromfile(result_path+'/start_logits.raw', dtype="float32")
            end_logits=np.fromfile(result_path+'/end_logits.raw', dtype="float32")
            start_logits=start_logits.reshape((1,384))
            end_logits=end_logits.reshape((1,384))
            question,inputs,answer,text=question_token[int(result_path.split("/")[1].split("_")[1])]
            predicted_answer=func(start_logits,end_logits,inputs)
            data.append({"Model_Type":dlc_type[j],"question":question,"predicted_answer":predicted_answer,"Actual Model Answer":question_answer[question],"answer":answer,"context":text})
            #f1,precision,recall=f1_scores_custom(predicted_answer,answer)
            #f1_scores.append(f1)
            #precision_scores.append(precision)
            #recall_scores.append(recall)
    
    mean_f1_score=np.mean(f1_scores)
    mean_precision_score=np.mean(precision_scores)
    mean_recall_score=np.mean(recall_scores)
    
    print("F1_Score:",mean_f1_score,"Recall:",mean_recall_score,"Precision:",mean_precision_score)
data=pd.DataFrame(data)
data.head(40)