## How to use

### Variables to specify
- `NUM_TRIALS`  : number of trials used to measure latency
- `LATENCY_DIR` : the directory for the latency measurement ex) latency_script.get_outputs_dir(id="main")
- `PERCENTILE` : the percentile for the tail latency ex) 0.95
- `input_list` : the list of model names (should be the same for both latency measurement and accuracy measurement. does not include the suffix such as `_test_acc.pt` or `_times_tamp.log`
- `acc_dir` : directory where the accuracy measurements are saved. use `get_{run_type}_output_dir` method

In [3]:
from typing import List
import pandas as pd
import numpy as np
import matplotlib as mpl
import seaborn as sns
import matplotlib.pyplot as plt
import statistics 
import torch
from pathlib import Path
import os
import sys; sys.path.append('../runscripts')
import constants
import run_individual as acc_script
import run_perf as latency_script

In [5]:
# Set num trials based on number of trials used in perf_eval

NUM_TRIALS = 3
# LATENCY_DIR = os.path.abspath(os.path.join(os.path.abspath(os.getcwd()),"..","runscripts","outputs_perf_eval","main"))
LATENCY_DIR = latency_script.get_outputs_dir(id="main")
# Percentile to use for latency points
PERCENTILE = 0.95

In [8]:
def get_latency(model_name: str) -> List[float]:
    '''
    Return percentile latency for that model
    '''

    latencies = []
    for j in range(1, NUM_TRIALS+1):
        log_dir = os.path.join(LATENCY_DIR, "Trial"+str(j), model_name)
#         log_dir = os.path.join(LATENCY_DIR, "Trial"+str(j), model_name+"_time_stamp.log")

        if not os.path.isfile(log_dir):
            print(log_dir, "not found")
            return 'N/A'
        data_file = pd.read_csv(log_dir,header=None)[0].tolist()
        data = [x for x in data_file]
        latencies.append(np.percentile(data, PERCENTILE))
    return np.mean(latencies)
    
def get_accuracy(model_name: str, output_dir: Path) -> List[float]:
    pt_file = model_name + '_test_acc.pt'
    pt_file = output_dir / pt_file
    if not os.path.isfile(pt_file):
        return 'N/A'
    acc = np.max(torch.load(pt_file))
    return acc

In [15]:
def print_list(input_list,acc_output_dir):
    for model_name in input_list:
        acc, lat = get_accuracy(model_name, acc_output_dir), get_latency(model_name)
        s='{model: <30} {acc: <20}  {lat: <20}'.format(model=model_name,acc=acc,lat=lat)
        print(s)

## Print latencies 

### vary encoder layer

In [11]:
input_list = [x.split("/")[-1] for x in constants.VARY_ENCODER_LAYER]
acc_dir = acc_script.get_layer_output_dir(id="11_12")
title = "Accuracy - Latency for varying encoder layer"

In [23]:
print_list(input_list,acc_dir)

bert_uncased_L-2_H-768_A-12    0.8508928571428571  54.609611271999995  
bert_uncased_L-4_H-768_A-12    0.8897321428571429  108.55607914133333  
bert_uncased_L-6_H-768_A-12    0.8957589285714286  162.56444011733333  
bert_uncased_L-8_H-768_A-12    0.9053571428571429  216.2380845253333   
bert_uncased_L-10_H-768_A-12   0.9154017857142858  270.153784376       
bert_uncased_L-12_H-768_A-12   0.9180803571428572  323.90019500266663  


### vary hidden dimension

In [25]:
input_list = [x.split("/")[-1] for x in constants.VARY_HIDDEN_DIM]
acc_dir = acc_script.get_hidden_dim_output_dir(id="11_12")
print_list(input_list,acc_dir)

bert_uncased_L-12_H-128_A-2    0.8049107142857144    29.20371252         
bert_uncased_L-12_H-256_A-4    0.8642857142857142    62.55714296266666   
bert_uncased_L-12_H-512_A-8    N/A                   168.81853832533332  
bert_uncased_L-12_H-768_A-12   N/A                   323.90019500266663  


### miniatures

In [26]:
input_list = [x.split("/")[-1] for x in constants.VARY_MINIATURES.keys()]
acc_dir = acc_script.get_miniature_output_dir(id="11_12")
print_list(input_list,acc_dir)

tiny                           0.7823660714285714    5.170050047999999   
mini                           0.8125                21.013924453333335  
small                          0.8553571428571428    56.221618408000005  
medium                         0.8883928571428571    112.52036641333332  
base                           0.9180803571428572    324.0740922106666   
large                          N/A                   1060.6418483626667  


### Elastic Attention Head -- this section is potentially outdated

#### Head number

In [39]:
NUM_TRIALS = 1
LATENCY_DIR = latency_script.get_outputs_dir(id="main")
attention_ratio_list=[i/constants.ATT_K for i in range(1,constants.ATT_K+1)]
approach="head_num"
input_list=["attention_" + str(approach) + "_" + str(attention_ratio) for attention_ratio in attention_ratio_list]
acc_dir=Path("")
print_list(input_list,acc_dir)

attention_head_num_0.2         N/A                   269.459537856       
attention_head_num_0.4         N/A                   275.395959976       
attention_head_num_0.6         N/A                   273.905371152       
attention_head_num_0.8         N/A                   274.76665103199997  
attention_head_num_1.0         N/A                   273.58369588        


#### Head size 1

In [41]:
NUM_TRIALS = 1
LATENCY_DIR = latency_script.get_outputs_dir(id="main")
attention_ratio_list=[i/constants.ATT_K for i in range(1,constants.ATT_K+1)]
approach="head_size_1"
input_list=["attention_" + str(approach) + "_" + str(attention_ratio) for attention_ratio in attention_ratio_list]
acc_dir=Path("")
print_list(input_list,acc_dir)

attention_head_size_1_0.2      N/A                   272.12044126399996  
attention_head_size_1_0.4      N/A                   283.77910764        
attention_head_size_1_0.6      N/A                   291.67275775999997  
attention_head_size_1_0.8      N/A                   294.28063163999997  
attention_head_size_1_1.0      N/A                   300.534277096       


#### Head size 2

In [11]:
NUM_TRIALS = 1
LATENCY_DIR = latency_script.get_outputs_dir(id="main")
TAG="main"
attention_ratio_list=[i/constants.ATT_K for i in range(1,constants.ATT_K+1)]
approach="head_size_2"
input_list=["attention_" + str(approach) + "_" + str(attention_ratio) for attention_ratio in attention_ratio_list]
acc_dir=acc_script.get_attention_output_dir(attention_approach=approach)
# acc_dir=Path("")
print_list(input_list,acc_dir)

attention_head_size_2_0.2      0.8375                728.003598624       
attention_head_size_2_0.4      0.8340909090909091    789.0469323120001   
attention_head_size_2_0.6      0.8318181818181818    849.7803931440001   
attention_head_size_2_0.8      0.8386363636363636    938.621509016       
attention_head_size_2_1.0      0.8375                1019.4757405839999  


### Joint

In [16]:
NUM_TRIALS = 3
attention_config=[0.2, 0.4, 0.6 ,0.8, 1.0]
layer_config =[2 ,4, 8, 16, 24]
LATENCY_DIR = latency_script.get_outputs_dir(id="main")

input_list= ["bert-large-uncased"+"_layer_" + str(layer_config[i])+"_attention_"+str(attention_config[i])for i in range(len(layer_config))]
acc_dir=acc_script.get_joint_output_dir()
print_list(input_list,acc_dir)

bert-large-uncased_layer_2_attention_0.2 0.8245535714285713    22.445771520000005  
bert-large-uncased_layer_4_attention_0.4 0.8267857142857142    47.40298606933334   
bert-large-uncased_layer_8_attention_0.6 0.8745535714285715    107.238931744       
bert-large-uncased_layer_16_attention_0.8 0.9120535714285715    229.97596330666667  
bert-large-uncased_layer_24_attention_1.0 0.9354910714285715    371.9971071946666   


## Scratch Work
ignore below code

In [8]:
## trained on the elastic attention heads branch.
LATENCY_DIR = os.path.abspath(os.path.join(os.path.abspath(os.getcwd()),"..","runscripts","outputs_perf_eval","test"))
layer_list=[i for i in range(2,13,2)]
input_list = ["layer" + "_" + str(i) for i in layer_list]
acc_dir=acc_script.get_elastic_layer_output_dir(id="disparity_check_2")
print_list(input_list,acc_dir)

/home/ilee300/workspace/ofa_transformers_runfiles/runscripts/outputs_perf_eval/test/Trial1/layer_2_time_stamp.log not found
layer_2                        0.8388392857142858    N/A                 
/home/ilee300/workspace/ofa_transformers_runfiles/runscripts/outputs_perf_eval/test/Trial1/layer_4_time_stamp.log not found
layer_4                        0.8310267857142858    N/A                 
/home/ilee300/workspace/ofa_transformers_runfiles/runscripts/outputs_perf_eval/test/Trial1/layer_6_time_stamp.log not found
layer_6                        0.8357142857142856    N/A                 
/home/ilee300/workspace/ofa_transformers_runfiles/runscripts/outputs_perf_eval/test/Trial1/layer_8_time_stamp.log not found
layer_8                        0.8354910714285715    N/A                 
/home/ilee300/workspace/ofa_transformers_runfiles/runscripts/outputs_perf_eval/test/Trial1/layer_10_time_stamp.log not found
layer_10                       0.8377232142857143    N/A                 
/home/ile

In [9]:
## trained on the master branch -> has elastic encoder layer
input_list = [x.split("/")[-1] for x in constants.VARY_ENCODER_LAYER]
acc_dir = acc_script.get_layer_output_dir(id="disparity_check")
title = "Accuracy - Latency for varying encoder layer"
print_list(input_list,acc_dir)

/home/ilee300/workspace/ofa_transformers_runfiles/runscripts/outputs_perf_eval/test/Trial1/bert_uncased_L-2_H-768_A-12_time_stamp.log not found
bert_uncased_L-2_H-768_A-12    0.8622767857142858    N/A                 
/home/ilee300/workspace/ofa_transformers_runfiles/runscripts/outputs_perf_eval/test/Trial1/bert_uncased_L-4_H-768_A-12_time_stamp.log not found
bert_uncased_L-4_H-768_A-12    0.8950892857142857    N/A                 
/home/ilee300/workspace/ofa_transformers_runfiles/runscripts/outputs_perf_eval/test/Trial1/bert_uncased_L-6_H-768_A-12_time_stamp.log not found
bert_uncased_L-6_H-768_A-12    0.9013392857142858    N/A                 
/home/ilee300/workspace/ofa_transformers_runfiles/runscripts/outputs_perf_eval/test/Trial1/bert_uncased_L-8_H-768_A-12_time_stamp.log not found
bert_uncased_L-8_H-768_A-12    0.9118303571428571    N/A                 
/home/ilee300/workspace/ofa_transformers_runfiles/runscripts/outputs_perf_eval/test/Trial1/bert_uncased_L-10_H-768_A-12_time_sta

Observation : when using ofa_transformers, the individual ones don't get trained fully. not sure why. probably cuase of attention layer? the master branch 

Try training on the elastic layer branch in a weight shared fashion.