In [30]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import csv
import json
import os


task = 'gsm'

# FastGen
strategy = 'fastgen'
root = 'ICLR/results_fastgen/results/Meta-Llama-3-8B-Instruct/hybrid/'
subdir_template = 'cache_bits=None__cache_length_pattern=tile__cache_strategy_pattern=tile__global_tokens=4__hybrid_strategies=specialx1,special_puncx1,special_punc_heavy_hitterx1,special_punc_heavy_hitter_windowx1,fullx1__max_cache_length=1__min_recovery_frac={frac}'
fastgen_df = []

for frac in [0.1, 0.3, 0.5, 0.7, 0.9]:
    subdir = subdir_template.format(frac=frac)
    task_json = os.path.join(root, subdir, f'{task}_metrics.json')
    with open(task_json) as f:
        data = json.load(f)

    # get the metrics
    bert_precision = data['BertScore_precision']
    bert_recall = data['BertScore_recall']
    bert_f1 = data['BertScore_f1']
    rouge_1 = data['Rouge_rouge1']
    rouge_2 = data['Rouge_rouge2']
    rouge_L = data['Rouge_rougeL']
    rouge_Lsum = data['Rouge_rougeLsum']
    chatgpt_rouge = data['ChatGPT-Rouge_chatgpt_rouge']
    chatgpt_coherent = data['ChatGPTJudge_coherent']
    chatgpt_faithful = data['ChatGPTJudge_faithful']
    chatgpt_helpful = data['ChatGPTJudge_helpful']
    cache_mem_gb = data['cache_memory_gb']
    compress_ratio = data['compression_ratio_avg']
    decode_toks_per_sec_top_10p = data['decode_toks_per_sec_top_10p']
    cache_size = frac

    fastgen_df.append({
        'strategy': strategy,
        'cache_size': cache_size,
        'bert_precision': bert_precision,
        'bert_recall': bert_recall,
        'bert_f1': bert_f1,
        'rouge_1': rouge_1,
        'rouge_2': rouge_2,
        'rouge_L': rouge_L,
        'rouge_Lsum': rouge_Lsum,
        'chatgpt_rouge': chatgpt_rouge,
        'chatgpt_coherent': chatgpt_coherent,
        'chatgpt_faithful': chatgpt_faithful,
        'chatgpt_helpful': chatgpt_helpful,
        'compress_ratio_avg': compress_ratio,
        'cache_mem_gb': cache_mem_gb,
        'decode_toks_per_sec_top_10p': decode_toks_per_sec_top_10p,
    })

fastgen_df = pd.DataFrame(fastgen_df)
fastgen_df.to_csv(f'ICLR/fastgen_{task}.csv', index=False)

# Scissorhands
strategy = 'scissorhands'
root = 'ICLR/results_nov_15/results/Meta-Llama-3-8B-Instruct/heavy_hitter/'
subdir_template = 'attn_thresholding=True__cache_bits=None__cache_length_pattern=tile__cache_strategy_pattern=tile__global_tokens=4__history_window_size=400__max_cache_length={frac}__recent_window=10'
scissorhands_df = []

for frac in [0.1, 0.3, 0.5, 0.7, 0.9]:
    subdir = subdir_template.format(frac=frac)
    task_json = os.path.join(root, subdir, f'{task}_metrics.json')
    with open(task_json) as f:
        data = json.load(f)

    # get the metrics
    bert_precision = data['BertScore_precision']
    bert_recall = data['BertScore_recall']
    bert_f1 = data['BertScore_f1']
    rouge_1 = data['Rouge_rouge1']
    rouge_2 = data['Rouge_rouge2']
    rouge_L = data['Rouge_rougeL']
    rouge_Lsum = data['Rouge_rougeLsum']
    chatgpt_rouge = data['ChatGPT-Rouge_chatgpt_rouge']
    chatgpt_coherent = data['ChatGPTJudge_coherent']
    chatgpt_faithful = data['ChatGPTJudge_faithful']
    chatgpt_helpful = data['ChatGPTJudge_helpful']
    cache_mem_gb = data['cache_memory_gb']
    compress_ratio = data['compression_ratio_avg']
    decode_toks_per_sec_top_10p = data['decode_toks_per_sec_top_10p']
    cache_size = frac

    scissorhands_df.append({
        'strategy': strategy,
        'cache_size': cache_size,
        'bert_precision': bert_precision,
        'bert_recall': bert_recall,
        'bert_f1': bert_f1,
        'rouge_1': rouge_1,
        'rouge_2': rouge_2,
        'rouge_L': rouge_L,
        'rouge_Lsum': rouge_Lsum,
        'chatgpt_rouge': chatgpt_rouge,
        'chatgpt_coherent': chatgpt_coherent,
        'chatgpt_faithful': chatgpt_faithful,
        'chatgpt_helpful': chatgpt_helpful,
        'compress_ratio_avg': compress_ratio,
        'cache_mem_gb': cache_mem_gb,
        'decode_toks_per_sec_top_10p': decode_toks_per_sec_top_10p,
    })

scissorhands_df = pd.DataFrame(scissorhands_df)
scissorhands_df.to_csv(f'ICLR/scissorhands_{task}.csv', index=False)

# H2O
strategy = 'h2o'
root = 'ICLR/results_nov_15/results/Meta-Llama-3-8B-Instruct/heavy_hitter/'
subdir_template = 'attn_thresholding=False__cache_bits=None__cache_length_pattern=tile__cache_strategy_pattern=tile__global_tokens=4__history_window_size=1__max_cache_length={frac}__recent_window=10'
h2o_df = []
for frac in [0.1, 0.3, 0.5, 0.7, 0.9]:
    subdir = subdir_template.format(frac=frac)
    task_json = os.path.join(root, subdir, f'{task}_metrics.json')
    with open(task_json) as f:
        data = json.load(f)

    # get the metrics
    bert_precision = data['BertScore_precision']
    bert_recall = data['BertScore_recall']
    bert_f1 = data['BertScore_f1']
    rouge_1 = data['Rouge_rouge1']
    rouge_2 = data['Rouge_rouge2']
    rouge_L = data['Rouge_rougeL']
    rouge_Lsum = data['Rouge_rougeLsum']
    chatgpt_rouge = data['ChatGPT-Rouge_chatgpt_rouge']
    chatgpt_coherent = data['ChatGPTJudge_coherent']
    chatgpt_faithful = data['ChatGPTJudge_faithful']
    chatgpt_helpful = data['ChatGPTJudge_helpful']
    cache_mem_gb = data['cache_memory_gb']
    compress_ratio = data['compression_ratio_avg']
    decode_toks_per_sec_top_10p = data['decode_toks_per_sec_top_10p']
    cache_size = frac

    h2o_df.append({
        'strategy': strategy,
        'cache_size': cache_size,
        'bert_precision': bert_precision,
        'bert_recall': bert_recall,
        'bert_f1': bert_f1,
        'rouge_1': rouge_1,
        'rouge_2': rouge_2,
        'rouge_L': rouge_L,
        'rouge_Lsum': rouge_Lsum,
        'chatgpt_rouge': chatgpt_rouge,
        'chatgpt_coherent': chatgpt_coherent,
        'chatgpt_faithful': chatgpt_faithful,
        'chatgpt_helpful': chatgpt_helpful,
        'compress_ratio_avg': compress_ratio,
        'cache_mem_gb': cache_mem_gb,
        'decode_toks_per_sec_top_10p': decode_toks_per_sec_top_10p,
    })

h2o_df = pd.DataFrame(h2o_df)
h2o_df.to_csv(f'ICLR/h2o_{task}.csv', index=False)

In [20]:
task = 'gsm_mc'

# FastGen
strategy = 'fastgen'
root = 'ICLR/results_fastgen/results/Meta-Llama-3-8B-Instruct/hybrid/'
subdir_template = 'cache_bits=None__cache_length_pattern=tile__cache_strategy_pattern=tile__global_tokens=4__hybrid_strategies=specialx1,special_puncx1,special_punc_heavy_hitterx1,special_punc_heavy_hitter_windowx1,fullx1__max_cache_length=1__min_recovery_frac={frac}'
fastgen_df = []

for frac in [0.1, 0.3, 0.5, 0.7, 0.9]:
    subdir = subdir_template.format(frac=frac)
    task_json = os.path.join(root, subdir, f'{task}_metrics.json')
    with open(task_json) as f:
        data = json.load(f)

    # get the metrics
    acc = data['Accuracy']
    exact_match = data['ExactMatch']
    cache_mem_gb = data['cache_memory_gb']
    compress_ratio = data['compression_ratio_avg']
    decode_toks_per_sec_top_10p = data['decode_toks_per_sec_top_10p']

    fastgen_df.append({
        'strategy': strategy,
        'cache_size': frac,
        'accuracy': acc,
        'exact_match': exact_match,
        'compress_ratio_avg': compress_ratio,
        'cache_mem_gb': cache_mem_gb,
        'decode_toks_per_sec_top_10p': decode_toks_per_sec_top_10p,
    })

fastgen_df = pd.DataFrame(fastgen_df)
fastgen_df.to_csv(f'ICLR/fastgen_{task}.csv', index=False)

# Scissorhands
strategy = 'scissorhands'
root = 'ICLR/results_scissorhands/results/Meta-Llama-3-8B-Instruct/heavy_hitter/'
subdir_template = 'attn_thresholding=True__cache_bits=None__cache_length_pattern=tile__cache_strategy_pattern=tile__global_tokens=4__history_window_size=400__max_cache_length={frac}__recent_window=10'
scissorhands_df = []

for frac in [0.1, 0.3, 0.5, 0.7, 0.9]:
    subdir = subdir_template.format(frac=frac)
    task_json = os.path.join(root, subdir, f'{task}_metrics.json')
    with open(task_json) as f:
        data = json.load(f)

    # get the metrics
    acc = data['Accuracy']
    exact_match = data['ExactMatch']
    cache_mem_gb = data['cache_memory_gb']
    compress_ratio = data['compression_ratio_avg']
    decode_toks_per_sec_top_10p = data['decode_toks_per_sec_top_10p']

    scissorhands_df.append({
        'strategy': strategy,
        'cache_size': frac,
        'accuracy': acc,
        'exact_match': exact_match,
        'compress_ratio_avg': compress_ratio,
        'cache_mem_gb': cache_mem_gb,
        'decode_toks_per_sec_top_10p': decode_toks_per_sec_top_10p,
    })

scissorhands_df = pd.DataFrame(scissorhands_df)
scissorhands_df.to_csv(f'ICLR/scissorhands_{task}.csv', index=False)


# H2O
strategy = 'h2o'
root = 'ICLR/results_h2o/results/Meta-Llama-3-8B-Instruct/heavy_hitter/'
subdir_template = 'attn_thresholding=False__cache_bits=None__cache_length_pattern=tile__cache_strategy_pattern=tile__global_tokens=4__history_window_size=1__max_cache_length={frac}__recent_window=10'

h2o_df = []

for frac in [0.1, 0.3, 0.5, 0.7, 0.9]:
    subdir = subdir_template.format(frac=frac)
    task_json = os.path.join(root, subdir, f'{task}_metrics.json')
    with open(task_json) as f:
        data = json.load(f)

    # get the metrics
    acc = data['Accuracy']
    exact_match = data['ExactMatch']
    cache_mem_gb = data['cache_memory_gb']
    compress_ratio = data['compression_ratio_avg']
    decode_toks_per_sec_top_10p = data['decode_toks_per_sec_top_10p']

    h2o_df.append({
        'strategy': strategy,
        'cache_size': frac,
        'accuracy': acc,
        'exact_match': exact_match,
        'compress_ratio_avg': compress_ratio,
        'cache_mem_gb': cache_mem_gb,
        'decode_toks_per_sec_top_10p': decode_toks_per_sec_top_10p,
    })

h2o_df = pd.DataFrame(h2o_df)
h2o_df.to_csv(f'ICLR/h2o_{task}.csv', index=False)

In [41]:
task = 'medqa'

# FastGen
strategy = 'fastgen'
root = 'ICLR/results_nov16_2/results/Meta-Llama-3-8B-Instruct/hybrid/'
subdir_template = 'cache_bits=None__cache_length_pattern=tile__cache_strategy_pattern=tile__global_tokens=4__hybrid_strategies=specialx1,special_puncx1,special_punc_heavy_hitterx1,special_punc_heavy_hitter_windowx1,fullx1__max_cache_length=1__min_recovery_frac={frac}'
fastgen_df = []
for frac in [0.1, 0.3, 0.5, 0.7, 0.9]:
    subdir = subdir_template.format(frac=frac)
    task_json = os.path.join(root, subdir, f'{task}_metrics.json')
    with open(task_json) as f:
        data = json.load(f)
    # get the metrics
    bert_precision = data['BertScore_precision']
    bert_recall = data['BertScore_recall']
    bert_f1 = data['BertScore_f1']
    rouge_1 = data['Rouge_rouge1']
    rouge_2 = data['Rouge_rouge2']
    rouge_L = data['Rouge_rougeL']
    rouge_Lsum = data['Rouge_rougeLsum']
    chatgpt_rouge = data['ChatGPT-Rouge_chatgpt_rouge']
    chatgpt_coherent = data['ChatGPTJudge_coherent']
    chatgpt_faithful = data['ChatGPTJudge_faithful']
    chatgpt_helpful = data['ChatGPTJudge_helpful']
    cache_mem_gb = data['cache_memory_gb']
    compress_ratio = data['compression_ratio_avg']
    decode_toks_per_sec_top_10p = data['decode_toks_per_sec_top_10p']
    cache_size = frac
    # append to the dataframe
    fastgen_df.append({
        'strategy': strategy,
        'cache_size': cache_size,
        'bert_precision': bert_precision,
        'bert_recall': bert_recall,
        'bert_f1': bert_f1,
        'rouge_1': rouge_1,
        'rouge_2': rouge_2,
        'rouge_L': rouge_L,
        'rouge_Lsum': rouge_Lsum,
        'chatgpt_rouge': chatgpt_rouge,
        'chatgpt_coherent': chatgpt_coherent,
        'chatgpt_faithful': chatgpt_faithful,
        'chatgpt_helpful': chatgpt_helpful,
        'compress_ratio_avg': compress_ratio,
        'cache_mem_gb': cache_mem_gb,
        'decode_toks_per_sec_top_10p': decode_toks_per_sec_top_10p,
    })
fastgen_df = pd.DataFrame(fastgen_df)
fastgen_df.to_csv(f'ICLR/fastgen_{task}.csv', index=False)

# H2O
strategy = 'h2o'
root = 'ICLR/results_nov16_2/results/Meta-Llama-3-8B-Instruct/heavy_hitter/'
subdir_template = 'attn_thresholding=False__cache_bits=None__cache_length_pattern=tile__cache_strategy_pattern=tile__global_tokens=4__history_window_size=1__max_cache_length={frac}__recent_window=10'
h2o_df = []
for frac in [0.1, 0.3, 0.5, 0.7, 0.9]:
    subdir = subdir_template.format(frac=frac)
    task_json = os.path.join(root, subdir, f'{task}_metrics.json')
    with open(task_json) as f:
        data = json.load(f)
    # get the metrics
    bert_precision = data['BertScore_precision']
    bert_recall = data['BertScore_recall']
    bert_f1 = data['BertScore_f1']
    rouge_1 = data['Rouge_rouge1']
    rouge_2 = data['Rouge_rouge2']
    rouge_L = data['Rouge_rougeL']
    rouge_Lsum = data['Rouge_rougeLsum']
    chatgpt_rouge = data['ChatGPT-Rouge_chatgpt_rouge']
    chatgpt_coherent = data['ChatGPTJudge_coherent']
    chatgpt_faithful = data['ChatGPTJudge_faithful']
    chatgpt_helpful = data['ChatGPTJudge_helpful']
    cache_mem_gb = data['cache_memory_gb']
    compress_ratio = data['compression_ratio_avg']
    decode_toks_per_sec_top_10p = data['decode_toks_per_sec_top_10p']
    cache_size = frac
    # append to the dataframe
    h2o_df.append({
        'strategy': strategy,
        'cache_size': cache_size,
        'bert_precision': bert_precision,
        'bert_recall': bert_recall,
        'bert_f1': bert_f1,
        'rouge_1': rouge_1,
        'rouge_2': rouge_2,
        'rouge_L': rouge_L,
        'rouge_Lsum': rouge_Lsum,
        'chatgpt_rouge': chatgpt_rouge,
        'chatgpt_coherent': chatgpt_coherent,
        'chatgpt_faithful': chatgpt_faithful,
        'chatgpt_helpful': chatgpt_helpful,
        'compress_ratio_avg': compress_ratio,
        'cache_mem_gb': cache_mem_gb,
        'decode_toks_per_sec_top_10p': decode_toks_per_sec_top_10p,
    })
h2o_df = pd.DataFrame(h2o_df)
h2o_df.to_csv(f'ICLR/h2o_{task}.csv', index=False)

# Scissorhands
strategy = 'scissorhands'
root = 'ICLR/results_nov16_2/results/Meta-Llama-3-8B-Instruct/heavy_hitter/'
subdir_template = 'attn_thresholding=True__cache_bits=None__cache_length_pattern=tile__cache_strategy_pattern=tile__global_tokens=4__history_window_size=400__max_cache_length={frac}__recent_window=10'
scissorhands_df = []
for frac in [0.1, 0.3, 0.5, 0.7, 0.9]:
    subdir = subdir_template.format(frac=frac)
    task_json = os.path.join(root, subdir, f'{task}_metrics.json')
    with open(task_json) as f:
        data = json.load(f)
    # get the metrics
    bert_precision = data['BertScore_precision']
    bert_recall = data['BertScore_recall']
    bert_f1 = data['BertScore_f1']
    rouge_1 = data['Rouge_rouge1']
    rouge_2 = data['Rouge_rouge2']
    rouge_L = data['Rouge_rougeL']
    rouge_Lsum = data['Rouge_rougeLsum']
    chatgpt_rouge = data['ChatGPT-Rouge_chatgpt_rouge']
    chatgpt_coherent = data['ChatGPTJudge_coherent']
    chatgpt_faithful = data['ChatGPTJudge_faithful']
    chatgpt_helpful = data['ChatGPTJudge_helpful']
    cache_mem_gb = data['cache_memory_gb']
    compress_ratio = data['compression_ratio_avg']
    decode_toks_per_sec_top_10p = data['decode_toks_per_sec_top_10p']
    cache_size = frac
    # append to the dataframe
    scissorhands_df.append({
        'strategy': strategy,
        'cache_size': cache_size,
        'bert_precision': bert_precision,
        'bert_recall': bert_recall,
        'bert_f1': bert_f1,
        'rouge_1': rouge_1,
        'rouge_2': rouge_2,
        'rouge_L': rouge_L,
        'rouge_Lsum': rouge_Lsum,
        'chatgpt_rouge': chatgpt_rouge,
        'chatgpt_coherent': chatgpt_coherent,
        'chatgpt_faithful': chatgpt_faithful,
        'chatgpt_helpful': chatgpt_helpful,
        'compress_ratio_avg': compress_ratio,
        'cache_mem_gb': cache_mem_gb,
        'decode_toks_per_sec_top_10p': decode_toks_per_sec_top_10p,
    })
scissorhands_df = pd.DataFrame(scissorhands_df)
scissorhands_df.to_csv(f'ICLR/scissorhands_{task}.csv', index=False)

In [22]:
task = 'medqa_mc'

# FastGen
strategy = 'fastgen'
root = 'ICLR/results_fastgen/results/Meta-Llama-3-8B-Instruct/hybrid/'
subdir_template = 'cache_bits=None__cache_length_pattern=tile__cache_strategy_pattern=tile__global_tokens=4__hybrid_strategies=specialx1,special_puncx1,special_punc_heavy_hitterx1,special_punc_heavy_hitter_windowx1,fullx1__max_cache_length=1__min_recovery_frac={frac}'

fastgen_df = []

for frac in [0.1, 0.3, 0.5, 0.7, 0.9]:
    subdir = subdir_template.format(frac=frac)
    task_json = os.path.join(root, subdir, f'{task}_metrics.json')
    with open(task_json) as f:
        data = json.load(f)

    # get the metrics
    acc = data['Accuracy']
    cache_mem_gb = data['cache_memory_gb']
    compress_ratio = data['compression_ratio_avg']
    decode_toks_per_sec_top_10p = data['decode_toks_per_sec_top_10p']

    fastgen_df.append({
        'strategy': strategy,
        'cache_size': frac,
        'accuracy': acc,
        'compress_ratio_avg': compress_ratio,
        'cache_mem_gb': cache_mem_gb,
        'decode_toks_per_sec_top_10p': decode_toks_per_sec_top_10p,
    })

fastgen_df = pd.DataFrame(fastgen_df)
fastgen_df.to_csv(f'ICLR/fastgen_{task}.csv', index=False)

# Scissorhands
strategy = 'scissorhands'
root = 'ICLR/results_scissorhands/results/Meta-Llama-3-8B-Instruct/heavy_hitter/'
subdir_template = 'attn_thresholding=True__cache_bits=None__cache_length_pattern=tile__cache_strategy_pattern=tile__global_tokens=4__history_window_size=400__max_cache_length={frac}__recent_window=10'
scissorhands_df = []

for frac in [0.1, 0.3, 0.5, 0.7, 0.9]:
    subdir = subdir_template.format(frac=frac)
    task_json = os.path.join(root, subdir, f'{task}_metrics.json')
    with open(task_json) as f:
        data = json.load(f)

    # get the metrics
    acc = data['Accuracy']
    cache_mem_gb = data['cache_memory_gb']
    compress_ratio = data['compression_ratio_avg']
    decode_toks_per_sec_top_10p = data['decode_toks_per_sec_top_10p']

    scissorhands_df.append({
        'strategy': strategy,
        'cache_size': frac,
        'accuracy': acc,
        'compress_ratio_avg': compress_ratio,
        'cache_mem_gb': cache_mem_gb,
        'decode_toks_per_sec_top_10p': decode_toks_per_sec_top_10p,
    })

scissorhands_df = pd.DataFrame(scissorhands_df)
scissorhands_df.to_csv(f'ICLR/scissorhands_{task}.csv', index=False)

# H2O
strategy = 'h2o'
root = 'ICLR/results_h2o/results/Meta-Llama-3-8B-Instruct/heavy_hitter/'
subdir_template = 'attn_thresholding=False__cache_bits=None__cache_length_pattern=tile__cache_strategy_pattern=tile__global_tokens=4__history_window_size=1__max_cache_length={frac}__recent_window=10'
h2o_df = []

for frac in [0.1, 0.3, 0.5, 0.7, 0.9]:
    subdir = subdir_template.format(frac=frac)
    task_json = os.path.join(root, subdir, f'{task}_metrics.json')
    with open(task_json) as f:
        data = json.load(f)

    # get the metrics
    acc = data['Accuracy']
    cache_mem_gb = data['cache_memory_gb']
    compress_ratio = data['compression_ratio_avg']
    decode_toks_per_sec_top_10p = data['decode_toks_per_sec_top_10p']

    h2o_df.append({
        'strategy': strategy,
        'cache_size': frac,
        'accuracy': acc,
        'compress_ratio_avg': compress_ratio,
        'cache_mem_gb': cache_mem_gb,
        'decode_toks_per_sec_top_10p': decode_toks_per_sec_top_10p,
    })

h2o_df = pd.DataFrame(h2o_df)
h2o_df.to_csv(f'ICLR/h2o_{task}.csv', index=False)

    

In [44]:
task = 'rulercwe'

# FastGen
strategy = 'fastgen'
root = 'ICLR/results_fastgen/results/Meta-Llama-3-8B-Instruct/hybrid/'
subdir_template = 'cache_bits=None__cache_length_pattern=tile__cache_strategy_pattern=tile__global_tokens=4__hybrid_strategies=specialx1,special_puncx1,special_punc_heavy_hitterx1,special_punc_heavy_hitter_windowx1,fullx1__max_cache_length=1__min_recovery_frac={frac}'
fastgen_df = []

for frac in [0.1, 0.9]:
    root2 = 'ICLR/results_????/results/Meta-Llama-3-8B-Instruct/hybrid/'
    subdir = subdir_template.format(frac=frac)
    task_json = os.path.join(root2, subdir, f'{task}_metrics.json')
    with open(task_json) as f:
        data = json.load(f)
    # get the metrics
    string_match = data['StringMatch_score']
    cache_mem_gb = data['cache_memory_gb']
    compress_ratio = data['compression_ratio_avg']
    decode_toks_per_sec_top_10p = data['decode_toks_per_sec_top_10p']
    fastgen_df.append({
        'strategy': strategy,
        'cache_size': frac,
        'string_match': string_match,
        'compress_ratio_avg': compress_ratio,
        'cache_mem_gb': cache_mem_gb,
        'decode_toks_per_sec_top_10p': decode_toks_per_sec_top_10p,
    })

for frac in [0.3, 0.5, 0.7]:
    subdir = subdir_template.format(frac=frac)
    task_json = os.path.join(root, subdir, f'{task}_metrics.json')
    with open(task_json) as f:
        data = json.load(f)
    # get the metrics
    string_match = data['StringMatch_score']
    cache_mem_gb = data['cache_memory_gb']
    compress_ratio = data['compression_ratio_avg']
    decode_toks_per_sec_top_10p = data['decode_toks_per_sec_top_10p']
    fastgen_df.append({
        'strategy': strategy,
        'cache_size': frac,
        'string_match': string_match,
        'compress_ratio_avg': compress_ratio,
        'cache_mem_gb': cache_mem_gb,
        'decode_toks_per_sec_top_10p': decode_toks_per_sec_top_10p,
    })
fastgen_df = pd.DataFrame(fastgen_df)
fastgen_df.to_csv(f'ICLR/fastgen_{task}.csv', index=False)

# Scissorhands
strategy = 'scissorhands'
root = 'ICLR/results_scissorhands/results/Meta-Llama-3-8B-Instruct/heavy_hitter/'
subdir_template = 'attn_thresholding=True__cache_bits=None__cache_length_pattern=tile__cache_strategy_pattern=tile__global_tokens=4__history_window_size=400__max_cache_length={frac}__recent_window=10'
scissorhands_df = []
for frac in [0.1, 0.3, 0.5, 0.7, 0.9]:
    subdir = subdir_template.format(frac=frac)
    task_json = os.path.join(root, subdir, f'{task}_metrics.json')
    with open(task_json) as f:
        data = json.load(f)
    # get the metrics
    string_match = data['StringMatch_score']
    cache_mem_gb = data['cache_memory_gb']
    compress_ratio = data['compression_ratio_avg']
    decode_toks_per_sec_top_10p = data['decode_toks_per_sec_top_10p']
    scissorhands_df.append({
        'strategy': strategy,
        'cache_size': frac,
        'string_match': string_match,
        'compress_ratio_avg': compress_ratio,
        'cache_mem_gb': cache_mem_gb,
        'decode_toks_per_sec_top_10p': decode_toks_per_sec_top_10p,
    })
scissorhands_df = pd.DataFrame(scissorhands_df)
scissorhands_df.to_csv(f'ICLR/scissorhands_{task}.csv', index=False)

# H2O
strategy = 'h2o'
root = 'ICLR/results_h2o/results/Meta-Llama-3-8B-Instruct/heavy_hitter/'
subdir_template = 'attn_thresholding=False__cache_bits=None__cache_length_pattern=tile__cache_strategy_pattern=tile__global_tokens=4__history_window_size=1__max_cache_length={frac}__recent_window=10'
h2o_df = []
for frac in [0.1, 0.3, 0.5, 0.7, 0.9]:
    subdir = subdir_template.format(frac=frac)
    task_json = os.path.join(root, subdir, f'{task}_metrics.json')
    with open(task_json) as f:
        data = json.load(f)
    # get the metrics
    string_match = data['StringMatch_score']
    cache_mem_gb = data['cache_memory_gb']
    compress_ratio = data['compression_ratio_avg']
    decode_toks_per_sec_top_10p = data['decode_toks_per_sec_top_10p']
    h2o_df.append({
        'strategy': strategy,
        'cache_size': frac,
        'string_match': string_match,
        'compress_ratio_avg': compress_ratio,
        'cache_mem_gb': cache_mem_gb,
        'decode_toks_per_sec_top_10p': decode_toks_per_sec_top_10p,
    })
h2o_df = pd.DataFrame(h2o_df)
h2o_df.to_csv(f'ICLR/h2o_{task}.csv', index=False)


FileNotFoundError: [Errno 2] No such file or directory: 'ICLR/results_nov16_2/results/Meta-Llama-3-8B-Instruct/hybrid/cache_bits=None__cache_length_pattern=tile__cache_strategy_pattern=tile__global_tokens=4__hybrid_strategies=specialx1,special_puncx1,special_punc_heavy_hitterx1,special_punc_heavy_hitter_windowx1,fullx1__max_cache_length=1__min_recovery_frac=0.1/rulercwe_metrics.json'

In [43]:
task = "rulerniah"

# FastGen
strategy = 'h2o'
root = 'ICLR/results_nov_15/results/Meta-Llama-3-8B-Instruct/heavy_hitter/'
subdir_template = 'attn_thresholding=False__cache_bits=None__cache_length_pattern=tile__cache_strategy_pattern=tile__global_tokens=4__history_window_size=1__max_cache_length={frac}__recent_window=10'
h2o_df = []
for frac in [0.1, 0.3, 0.5, 0.7, 0.9]:
    subdir = subdir_template.format(frac=frac)
    task_json = os.path.join(root, subdir, f'{task}_metrics.json')
    with open(task_json) as f:
        data = json.load(f)
    # get the metrics
    string_match = data['StringMatch_score']
    cache_mem_gb = data['cache_memory_gb']
    compress_ratio = data['compression_ratio_avg']
    decode_toks_per_sec_top_10p = data['decode_toks_per_sec_top_10p']
    h2o_df.append({
        'strategy': strategy,
        'cache_size': frac,
        'string_match': string_match,
        'compress_ratio_avg': compress_ratio,
        'cache_mem_gb': cache_mem_gb,
        'decode_toks_per_sec_top_10p': decode_toks_per_sec_top_10p,
    })
h2o_df = pd.DataFrame(h2o_df)
h2o_df.to_csv(f'ICLR/h2o_{task}.csv', index=False)

# Scissorhands
strategy = 'scissorhands'
root = 'ICLR/results_nov_15/results/Meta-Llama-3-8B-Instruct/heavy_hitter/'
subdir_template = 'attn_thresholding=True__cache_bits=None__cache_length_pattern=tile__cache_strategy_pattern=tile__global_tokens=4__history_window_size=400__max_cache_length={frac}__recent_window=10'
scissorhands_df = []
for frac in [0.1, 0.3, 0.5, 0.7, 0.9]:
    subdir = subdir_template.format(frac=frac)
    task_json = os.path.join(root, subdir, f'{task}_metrics.json')
    with open(task_json) as f:
        data = json.load(f)
    # get the metrics
    string_match = data['StringMatch_score']
    cache_mem_gb = data['cache_memory_gb']
    compress_ratio = data['compression_ratio_avg']
    decode_toks_per_sec_top_10p = data['decode_toks_per_sec_top_10p']
    scissorhands_df.append({
        'strategy': strategy,
        'cache_size': frac,
        'string_match': string_match,
        'compress_ratio_avg': compress_ratio,
        'cache_mem_gb': cache_mem_gb,
        'decode_toks_per_sec_top_10p': decode_toks_per_sec_top_10p,
    })
scissorhands_df = pd.DataFrame(scissorhands_df)
scissorhands_df.to_csv(f'ICLR/scissorhands_{task}.csv', index=False)

# FastGen
strategy = 'fastgen'
root = 'ICLR/results_nov16_2/results/Meta-Llama-3-8B-Instruct/hybrid/'
subdir_template = 'cache_bits=None__cache_length_pattern=tile__cache_strategy_pattern=tile__global_tokens=4__hybrid_strategies=specialx1,special_puncx1,special_punc_heavy_hitterx1,special_punc_heavy_hitter_windowx1,fullx1__max_cache_length=1__min_recovery_frac={frac}'
fastgen_df = []
for frac in [0.1, 0.3, 0.5, 0.7, 0.9]:
    subdir = subdir_template.format(frac=frac)
    task_json = os.path.join(root, subdir, f'{task}_metrics.json')
    with open(task_json) as f:
        data = json.load(f)
    # get the metrics
    string_match = data['StringMatch_score']
    cache_mem_gb = data['cache_memory_gb']
    compress_ratio = data['compression_ratio_avg']
    decode_toks_per_sec_top_10p = data['decode_toks_per_sec_top_10p']
    fastgen_df.append({
        'strategy': strategy,
        'cache_size': frac,
        'string_match': string_match,
        'compress_ratio_avg': compress_ratio,
        'cache_mem_gb': cache_mem_gb,
        'decode_toks_per_sec_top_10p': decode_toks_per_sec_top_10p,
    })
fastgen_df = pd.DataFrame(fastgen_df)
fastgen_df.to_csv(f'ICLR/fastgen_{task}.csv', index=False)


In [38]:
task = "gsm"

# L2
strategy = 'l2'
root = 'results_gsm_final/l2/'
subdir_template = 'cache_bits=None__cache_length_pattern=tile__cache_strategy_pattern=tile__global_tokens=4__max_cache_length={frac}__recent_window=10'
l2_df = []
for frac in [0.1, 0.3, 0.5, 0.7, 0.9]:
    subdir = subdir_template.format(frac=frac)
    task_json = os.path.join(root, subdir, f'{task}_metrics.json')
    with open(task_json) as f:
        data = json.load(f)
    # get the metrics
    bert_precision = data['BertScore_precision']
    bert_recall = data['BertScore_recall']
    bert_f1 = data['BertScore_f1']
    rouge_1 = data['Rouge_rouge1']
    rouge_2 = data['Rouge_rouge2']
    rouge_L = data['Rouge_rougeL']
    rouge_Lsum = data['Rouge_rougeLsum']
    chatgpt_rouge = data['ChatGPT-Rouge_chatgpt_rouge']
    chatgpt_coherent = data['ChatGPTJudge_coherent']
    chatgpt_faithful = data['ChatGPTJudge_faithful']
    chatgpt_helpful = data['ChatGPTJudge_helpful']
    cache_mem_gb = data['cache_memory_gb']
    compress_ratio = data['compression_ratio_avg']
    decode_toks_per_sec_top_10p = data['decode_toks_per_sec_top_10p']
    l2_df.append({
        'strategy': strategy,
        'cache_size': frac,
        'bert_precision': bert_precision,
        'bert_recall': bert_recall,
        'bert_f1': bert_f1,
        'rouge_1': rouge_1,
        'rouge_2': rouge_2,
        'rouge_L': rouge_L,
        'rouge_Lsum': rouge_Lsum,
        'chatgpt_rouge': chatgpt_rouge,
        'chatgpt_coherent': chatgpt_coherent,
        'chatgpt_faithful': chatgpt_faithful,
        'chatgpt_helpful': chatgpt_helpful,
        'compress_ratio_avg': compress_ratio,
        'cache_mem_gb': cache_mem_gb,
        'decode_toks_per_sec_top_10p': decode_toks_per_sec_top_10p,
    })
l2_df = pd.DataFrame(l2_df)
l2_df.to_csv(f'ICLR/l2_{task}.csv', index=False)

# LSH
strategy = 'lsh'
root = 'results_gsm_final/lsh/'
subdir_template = 'cache_bits=None__cache_length_pattern=tile__cache_strategy_pattern=tile__global_tokens=4__lsh_dim=16__max_cache_length={frac}__recent_window=10'
lsh_df = []
for frac in [0.1, 0.3, 0.5, 0.7, 0.9]:
    subdir = subdir_template.format(frac=frac)
    task_json = os.path.join(root, subdir, f'{task}_metrics.json')
    with open(task_json) as f:
        data = json.load(f)
    # get the metrics
    bert_precision = data['BertScore_precision']
    bert_recall = data['BertScore_recall']
    bert_f1 = data['BertScore_f1']
    rouge_1 = data['Rouge_rouge1']
    rouge_2 = data['Rouge_rouge2']
    rouge_L = data['Rouge_rougeL']
    rouge_Lsum = data['Rouge_rougeLsum']
    chatgpt_rouge = data['ChatGPT-Rouge_chatgpt_rouge']
    chatgpt_coherent = data['ChatGPTJudge_coherent']
    chatgpt_faithful = data['ChatGPTJudge_faithful']
    chatgpt_helpful = data['ChatGPTJudge_helpful']
    cache_mem_gb = data['cache_memory_gb']
    compress_ratio = data['compression_ratio_avg']
    decode_toks_per_sec_top_10p = data['decode_toks_per_sec_top_10p']
    lsh_df.append({
        'strategy': strategy,
        'cache_size': frac,
        'bert_precision': bert_precision,
        'bert_recall': bert_recall,
        'bert_f1': bert_f1,
        'rouge_1': rouge_1,
        'rouge_2': rouge_2,
        'rouge_L': rouge_L,
        'rouge_Lsum': rouge_Lsum,
        'chatgpt_rouge': chatgpt_rouge,
        'chatgpt_coherent': chatgpt_coherent,
        'chatgpt_faithful': chatgpt_faithful,
        'chatgpt_helpful': chatgpt_helpful,
        'compress_ratio_avg': compress_ratio,
        'cache_mem_gb': cache_mem_gb,
        'decode_toks_per_sec_top_10p': decode_toks_per_sec_top_10p,
    })
lsh_df = pd.DataFrame(lsh_df)
lsh_df.to_csv(f'ICLR/lsh_{task}.csv', index=False)

