# Code used to analyze DoLa results

In [3]:
import os
import sys
project_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(project_dir)

import glob
import json
import pandas as pd
pd.set_option('display.max_rows', 100)
import numpy as np

from config import IMPLEMENTED_MODELS

# TFQA

### Analyze results2 directory

In [8]:
raw_results = []
dola_results_dir = '../DoLa/results2/tfqa/{model_name}'
for model_name in IMPLEMENTED_MODELS:
    model_results_dir = dola_results_dir.format(model_name=model_name)
    if not os.path.exists(model_results_dir):
        continue
    result_fps = sorted(glob.glob(os.path.join(
        model_results_dir, '*'
    )))
    for result_fp in result_fps:
        if 'layers' not in result_fp:
            result_name = os.path.basename(result_fp)[:-5]
            data = json.load(open(os.path.join(result_fp)))
            raw_results.append([model_name, result_name.split('-')[1], result_name.split('-')[3], result_name.split('-')[4], data['total_mc1'], data['total_mc2'], data['total_mc3']])
df = pd.DataFrame(raw_results, columns=['model_name', 'ln_type', 'alpha', 'buckets', 'mc1', 'mc2', 'mc3'])#.query('buckets == "baseline"')
print(df.model_name.unique())

for col in df.columns:
    if isinstance(df[col].iloc[0], float):
        df[col] = df[col].apply(lambda x: round(x, 4))

df#.query("model_name == 'llama2-70b'")#.query("buckets == '40,42,44,46,48,50,52,54,56,58,60,80'")

['pythia-6.9b' 'llama2-70b' 'llama3_0-70b' 'qwen2-72b']


Unnamed: 0,model_name,ln_type,alpha,buckets,mc1,mc2,mc3
0,pythia-6.9b,none,none,baseline,0.2252,0.3717,0.1787
1,pythia-6.9b,none,0.0,02468101214161820222426283032,0.2607,0.5253,0.2448
2,pythia-6.9b,none,0.1,02468101214161820222426283032,0.2301,0.4417,0.1796
3,pythia-6.9b,none,0.1,024681012141632,0.2277,0.4213,0.1782
4,pythia-6.9b,none,0.1,161820222426283032,0.2093,0.5097,0.1848
5,pythia-6.9b,none,0.1,8101214161820222432,0.2179,0.4213,0.1733
6,pythia-6.9b,none,0.25,02468101214161820222426283032,0.2007,0.4682,0.1671
7,pythia-6.9b,none,0.5,02468101214161820222426283032,0.2166,0.4981,0.1717
8,pythia-6.9b,none,0.75,02468101214161820222426283032,0.2154,0.5043,0.1763
9,pythia-6.9b,none,0.9,02468101214161820222426283032,0.2142,0.5133,0.1745


### Analyze results directory

In [6]:
import glob
raw_results = []
fps = glob.glob('../DoLa/results/tfqa_bucket_search/*/*/*/*/*res*')
for fp in fps:
    model_name = fp.split('/')[-3]
    layers = fp.split('/')[-2]
    data = json.load(open(fp))
    raw_results.append([model_name, layers, data['total_mc1'], data['total_mc2'], data['total_mc3']])
df = pd.DataFrame(raw_results, columns=['model_name', 'buckets', 'mc1', 'mc2', 'mc3'])#.query('buckets == "baseline"')

for col in df.columns:
    if isinstance(df[col].iloc[0], float):
        df[col] = df[col].apply(lambda x: round(x, 4))
print(df.model_name.unique())

df#.query('model_name == "Meta-Llama-3-70B"')

['OLMo-7B-0424-hf' 'Meta-Llama-3-8B' 'Llama-2-7b-hf' 'Mistral-7B-v0.1'
 'llama-7b' 'Qwen2-7B']


Unnamed: 0,model_name,buckets,mc1,mc2,mc3
0,OLMo-7B-0424-hf,02468101214161820222426283032,0.2436,0.4176,0.1779
1,OLMo-7B-0424-hf,32,0.2509,0.3957,0.1878
2,OLMo-7B-0424-hf,024681012141632,0.2436,0.4176,0.1779
3,OLMo-7B-0424-hf,101214161820222432,0.2399,0.4205,0.1781
4,OLMo-7B-0424-hf,161820222426283032,0.2411,0.4193,0.1779
5,Meta-Llama-3-8B,02468101214161820222426283032,0.2913,0.5129,0.2253
6,Meta-Llama-3-8B,32,0.3195,0.4884,0.2392
7,Meta-Llama-3-8B,024681012141632,0.2913,0.5129,0.2253
8,Meta-Llama-3-8B,101214161820222432,0.2876,0.5128,0.2236
9,Meta-Llama-3-8B,161820222426283032,0.2864,0.5113,0.2231


# FACTOR

### Analyze results2 directory

In [19]:
raw_results = []
dola_results_dir = '../DoLa/results2/factor/{model_name}'
for model_name in IMPLEMENTED_MODELS:
    model_results_dir = dola_results_dir.format(model_name=model_name)
    if not os.path.exists(model_results_dir):
        continue
    result_fps = sorted(glob.glob(os.path.join(
        model_results_dir, '*'
    )))
    for result_fp in result_fps:
        result_name = os.path.basename(result_fp)[:-5]
        data = json.load(open(os.path.join(result_fp)))
        score = np.mean(data['is_correct'])
        raw_results.append([model_name, result_name.split('-')[1], result_name.split('-')[3], result_name.split('-')[4], score])
df = pd.DataFrame(raw_results, columns=['model_name', 'ln_type', 'alpha', 'buckets', 'acc'])

for col in df.columns:
    if isinstance(df[col].iloc[0], float):
        df[col] = df[col].apply(lambda x: round(x, 4))
print(df.model_name.unique())
df#.query('model_name == "qwen2-72b"')

['pythia-6.9b' 'llama2-70b' 'llama3_0-70b' 'qwen2-72b']


Unnamed: 0,model_name,ln_type,alpha,buckets,acc
0,pythia-6.9b,none,none,baseline,0.5125
1,pythia-6.9b,none,0.0,024681012141632,0.2828
2,pythia-6.9b,none,0.1,02468101214161820222426283032,0.4054
3,pythia-6.9b,none,0.1,024681012141632,0.4643
4,pythia-6.9b,none,0.1,161820222426283032,0.3697
5,pythia-6.9b,none,0.1,8101214161820222432,0.3996
6,pythia-6.9b,none,0.25,024681012141632,0.4797
7,pythia-6.9b,none,0.5,024681012141632,0.4788
8,pythia-6.9b,none,0.75,024681012141632,0.4537
9,pythia-6.9b,none,0.9,024681012141632,0.4614


### Analyze results directory

In [25]:
import glob
raw_results = []
fps = glob.glob('../DoLa/results/factor_bucket_search/*/*/*/*res*')
for fp in fps:
    model_name = fp.split('/')[-3]
    layers = fp.split('/')[-2]
    data = json.load(open(fp))
    score = np.mean(data['is_correct'])
    raw_results.append([model_name, layers, score])
df = pd.DataFrame(raw_results, columns=['model_name', 'buckets', 'acc'])

for col in df.columns:
    if isinstance(df[col].iloc[0], float):
        df[col] = df[col].apply(lambda x: round(x, 4))
print(df.model_name.unique())
df[df['buckets'].str.len() == 2]#.query('model_name == "Qwen2-7B"')

['OLMo-7B-0424-hf' 'Meta-Llama-3-8B' 'Llama-2-70b-hf' 'Meta-Llama-3-70B'
 'Llama-2-7b-hf' 'Mistral-7B-v0.1' 'llama-7b' 'Qwen2-72B' 'Qwen2-7B'
 'pythia-6.9b']


Unnamed: 0,model_name,buckets,acc
4,OLMo-7B-0424-hf,32,0.666
7,OLMo-7B-0424-hf,28,0.3737
12,Meta-Llama-3-8B,32,0.7546
17,Llama-2-70b-hf,80,0.8106
23,Meta-Llama-3-70B,80,0.8167
30,Llama-2-7b-hf,32,0.722
32,Llama-2-7b-hf,28,0.5692
39,Mistral-7B-v0.1,32,0.7576
42,Mistral-7B-v0.1,28,0.4104
47,llama-7b,32,0.5845


# Analyzing Best Layers

In [48]:
import glob
raw_results = []
fps = glob.glob('../DoLa/results/tfqa_bucket_search/*/*/*/*/*lay*')
for fp in fps:
    model_name = fp.split('/')[-3]
    layers = fp.split('/')[-2]
    data = json.load(open(fp))
    all_layers = list(data.keys())
    all_usage = list(data.values())
    sorted_usage = np.argsort(all_usage)
    top_layer = sorted_usage[-1]
    tl_pct_used = all_usage[top_layer]/sum(data.values())
    # if len(all_layers) > 1:
    sb_layer = sorted_usage[-2]
    sbl_pct_used = all_usage[sb_layer]/sum(data.values())
    # else:
    #     sb_layer = np.nan
    #     sbl_pct_used = np.nan
    
    raw_results.append([model_name, layers, top_layer, tl_pct_used, sb_layer, sbl_pct_used])
df = pd.DataFrame(raw_results, columns=['model_name', 'buckets', 'top_layer', 'tl_pct_used', 'sb_layer', 'sbl_pct_used'])#.query('buckets == "baseline"')

for col in df.columns:
    if isinstance(df[col].iloc[0], float):
        df[col] = df[col].apply(lambda x: round(x, 4))
df.describe()

Unnamed: 0,top_layer,tl_pct_used,sb_layer,sbl_pct_used
count,40.0,40.0,40.0,40.0
mean,1.575,0.725183,3.45,0.115505
std,3.418558,0.204226,3.249852,0.097905
min,0.0,0.328,0.0,0.0036
25%,0.0,0.5725,1.0,0.03855
50%,0.0,0.74745,2.0,0.09465
75%,0.0,0.9018,6.0,0.1538
max,13.0,0.995,12.0,0.4428
