In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import os
os.makedirs('tables/performance', exist_ok=True)

import warnings
warnings.filterwarnings("ignore")

In [None]:
df = pd.read_csv("experiment_results.csv")
df['layer_strateg_map'] = df['layer_strategy'].map({'zero': 0, '1/4': 1, '1/2': 2, '3/4': 3, 'all': 4})
df = df.sort_values(by = ['output_tokens','layer_strateg_map','q_bits'])
df = df.query('output_tokens != 20000')
df = df.drop(columns=['layer_strateg_map']).reset_index(drop=True)
df

In [None]:
pivot_df = df.copy()
def strategy_q_bits(row):
    if row['layer_strategy'] == 'zero':
        return 'No Quant'
    return f'{row["layer_strategy"]} ({int(row["q_bits"])} bits)'
pivot_df['strategy'] = pivot_df.apply(strategy_q_bits, axis=1)

# convert table to latex
pivot_df = pivot_df.pivot(index='output_tokens', columns='strategy', values='vram_consumption')
pivot_df = pivot_df.astype(int)
pivot_df = pivot_df.reset_index()
pivot_df = pivot_df.rename(columns={'output_tokens': 'Output Tokens'})
pivot_df = pivot_df.rename_axis(None, axis=1)
pivot_df = pivot_df.rename_axis(None, axis=0)
pivot_df.to_latex('tables/performance/vram_consumption.tex', index=False)


In [None]:
print(pivot_df.to_string())

In [None]:
pivot_df = df.copy()
def strategy_q_bits(row):
    if row['layer_strategy'] == 'zero':
        return 'No Quant'
    return f'{row["layer_strategy"]} ({int(row["q_bits"])} bits)'
pivot_df['strategy'] = pivot_df.apply(strategy_q_bits, axis=1)

pivot_df = pivot_df.pivot(index='output_tokens', columns='strategy', values='inference_time')
pivot_df = round(pivot_df, 2)
pivot_df = pivot_df.reset_index()
pivot_df = pivot_df.rename(columns={'output_tokens': 'Output Tokens'})
pivot_df = pivot_df.rename_axis(None, axis=1)
pivot_df = pivot_df.rename_axis(None, axis=0)
pivot_df.to_latex('tables/performance/inference_consumption.tex', index=False)
pivot_df

In [None]:
print(pivot_df.to_string())

In [None]:
def map_experiment(experiment):
    parts = experiment.split('_')
    if "bits" in parts[-1]:
        del parts[-1]
    
    if 'zero' in experiment:
        return {
            'layer_strategy': 'zero',
            'layer_set': np.nan,
            'num_bits': np.nan,
            'experiment_description': f'Model with no quantization'
        }
    elif 'all' in experiment:
        return {
            'layer_strategy': 'all',
            'layer_set': np.nan,
            'num_bits': parts[-1],
            'experiment_description': f'Model with all layers quantized to {parts[-1]} bits'
        }
    else:
        layer_strategy = f"{parts[-4]}_{parts[-3]}"
        layer_set = int(parts[-2])
        num_bits = parts[-1]
        
        layer_set_description = {
            1: 'first quarter',
            2: 'second quarter',
            3: 'third quarter',
            4: 'fourth quarter'
        }
        
        return {
            'layer_strategy': layer_strategy,
            'layer_set': layer_set_description[layer_set],
            'num_bits': num_bits,
            'experiment_description': f'Model with {layer_strategy} of layers ({layer_set_description[layer_set]}) quantized to {num_bits} bits'
        }
    

benchmark_datasets = {
    "Multi-doc QA": [
        "hotpotqa",
        "2wikimqa",
        "musique",
        "dureader"
    ],
    "Single-doc QA": [
        "multifieldqa_en",
        "multifieldqa_zh",
        "narrativeqa",
        "qasper"
    ],
    "Summarization": [
        "gov_report",
        "qmsum",
        "multi_news",
        "vcsum"
    ],
    "Few shot": [
        "triviaqa",
        "samsum",
        "trec",
        "lsht"
    ],
    "Synthetic": [
        "passage_retrieval_en",
        "passage_count",
        "passage_retrieval_zh"
    ],
    "Code": [
        "lcc",
        "repobench-p"
    ]
}
LONG_BENCH_COLS = list(benchmark_datasets.keys())
data = pd.read_csv("performance_detailed_long_bench.csv")

for k, v in benchmark_datasets.items():
    data[k] = data[v].mean(axis=1)

data = data[['experiment_description', 'layer_strategy','layer_set','num_bits'] + list(benchmark_datasets.keys()) ]

data['avg_score'] =  data[benchmark_datasets.keys()].mean(axis = 1)
data = data.sort_values(by='avg_score', ascending=False)
print(data.to_string())


In [None]:
data.sort_values(by = ['layer_strategy','layer_set','num_bits'],)

In [None]:


data['layer_strateg_map'] = data['layer_strategy'].map({'zero': 0, '1_4': 1, '1_2': 2, '3_4': 3, 'all': 4})
data['layer_set_map'] = data['layer_set'].map({'first quarter': 1, 'second quarter': 2, 'third quarter': 3, 'fourth quarter': 4})
data = data.sort_values(by = ['layer_strateg_map','layer_set_map','num_bits'])
data = data.drop(columns=['layer_strateg_map','layer_set_map', 'experiment_description']).reset_index(drop=True)
data.layer_set.fillna("all layers", inplace=True)
data.num_bits.fillna("No Quant", inplace=True)
data.rename(columns= {'avg_score': 'Mean Score'}, inplace=True)

## round to 2 decimal places
data = data.round(2)

print(data.to_string())


data.to_latex('tables/performance/long_benchmarks.tex', index=False)


In [None]:
detailed = pd.read_csv("performance_detailed_long_bench.csv")
detailed = detailed.drop(columns=['experiment_description'])

detailed['layer_strateg_map'] = detailed['layer_strategy'].map({'zero': 0, '1_4': 1, '1_2': 2, '3_4': 3, 'all': 4})
detailed['layer_set_map'] = detailed['layer_set'].map({'first quarter': 1, 'second quarter': 2, 'third quarter': 3, 'fourth quarter': 4})
detailed = detailed.sort_values(by = ['layer_strateg_map','layer_set_map','num_bits'])
detailed = detailed.drop(columns=['layer_strateg_map','layer_set_map'])

detailed = detailed.round(2)
detailed = detailed.rename(columns={'experiment_description': 'Experiment Description'})
detailed = detailed.drop(columns= [  'avg_score', 'model', 'score_diff',
       'cumulative_score_diff', 'score_percent_diff',
       'cumulative_score_percent_diff'])
detailed.to_latex('tables/performance/detailed_long_benchmarks.tex', index=False)

In [None]:
print(detailed.to_string())