In [1]:
import numpy as numpy
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scienceplots
import copy


base_path = "./data/processed_data/"
path_counter = base_path + "counter/"
path_mean_std = base_path + "mean_std/"

base_plot_path = "./plots/"

base_name = "results_{}__{}_{}.pt"

dataset_list = ['c4', 'oscar', 'redpajama', 'wikitext2', 'pile', 'gsm8k', 'svamp', 'mawps', 'anli_r1', 'esnli', 'rte', 'boolq', 'commonsense_qa', 'race',
                'winogrande', 'wmt14', 'iwslt', 'opc', 'ds1000', 'mbpp']


model_list = ['meta-llama/Llama-2-7b-hf', 'mistralai/Mistral-7B-v0.1', 'microsoft/phi-2', 'baichuan-inc/Baichuan-7B', 'Qwen/Qwen2.5-7B' ]
models_names_print = ['Llama', 'Phi', 'Mistral', 'Baichuan', 'Qwen']
model_name = models_names_print #[name.split('/')[1] for name in model_list] #, 'Llama-3.2-1B']
rename_dictionary_model_names = {model_list[i]: models_names_print[i] for i in range(len(model_list))}

sparsity_list = [0.1, 0.3, 0.5, 0.7, 0.9]

dataset_list_print = ['C4', 'OSCAR', 'RedPajama', 'WikiText', 'Pile', 'GSM8K', 'SVAMP', 'MAWPS', 'ANLI', 'e-SNLI', 'RTE', 'BoolQ', 'CommonsenseQA', 'RACE', 'WinoGrande', 'WMT14', 'IWSLT', 'RefineCode', 'DS1000', 'MBPP']
rename_dictionary_dataset = {dataset_list[i]: dataset_list_print[i] for i in range(len(dataset_list))}

plt.style.use('science')


In [2]:
#Create a custom color palette for the plot

original_cmap = sns.color_palette("Spectral_r", as_cmap=True) 
colors = np.concatenate((original_cmap(np.linspace(0, 0.35, 4)), original_cmap(np.linspace(0.6, 1, 4))), axis=0) # Extract 10 colors
custom_palette = np.clip(colors, 0, 1) # Ensure valid RGB range
custom_palette = [tuple(c) for c in custom_palette]  # Convert array to list of 


palette_heatmap =  sns.color_palette("Blues", as_cmap=True) 
colors_blues = np.concatenate((palette_heatmap(np.linspace(0, 0.35, 1)), palette_heatmap(np.linspace(0.8, 1, 10))), axis=0) # Extract 10 colors
custom_blues = np.clip(colors_blues, 0, 1) # Ensure valid RGB range
custom_blues = [tuple(c) for c in custom_blues]  # Convert array to list of 

one_color = sns.color_palette("mako", as_cmap=True).colors[190]

In [3]:
#results = pd.read_csv("./only_completed.csv") # results.csv
#results = pd.read_csv("./only_completed_v2.csv") # results.csv
#results = pd.read_csv("./only_mistral_qwen.csv") # results.csv
results = pd.read_csv("./results.csv") # results.csv

metric_map = {
    'c4': 'ppl',
    'oscar': 'ppl',
    'redpajama': 'ppl',
    'wikitext2': 'ppl',
    'pile': 'ppl',
    'gsm8k': 'acc',
    'svamp': 'acc',
    'mawps': 'acc',
    'anli_r1': 'acc',
    'esnli': 'acc',
    'rte': 'acc',
    'boolq': 'acc',
    'commonsense_qa': 'acc',
    'race': 'acc',
    'winogrande': 'acc',
    'wmt14': 'bleu',
    'iwslt': 'bleu',
    'opc': 'f1',
    'ds1000': 'f1',
    'mbpp': 'f1',
}

task_map = {
    'none': 'Dense',
    'c4': 'Language Modeling',
    'oscar': 'Language Modeling',
    'redpajama': 'Language Modeling',
    'wikitext2': 'Language Modeling',
    'pile': 'Language Modeling',
    'gsm8k': 'Mathematical Reasoning',
    'svamp': 'Mathematical Reasoning',
    'mawps': 'Mathematical Reasoning',
    'anli_r1': 'NLI',
    'esnli': 'NLI',
    'rte': 'NLI',
    'boolq': 'Question Answering',
    'commonsense_qa': 'Question Answering',
    'race': 'Question Answering',
    'winogrande': 'Question Answering',
    'wmt14': 'Translation',
    'iwslt': 'Translation',
    'opc': 'Coding',
    'ds1000': 'Coding',
    'mbpp': 'Coding',
}

results["dataset_group"] = results["dataset"].map(task_map)
results["pruning_group"] = results["pruning_data"].map(task_map)

order_columns = ['Dense', 'C4', 'OSCAR', 'Pile', 'RedPajama', 'WikiText', 'ANLI', 'BoolQ', 'CommonsenseQA', 'e-SNLI', 'GSM8K', 'MAWPS', 'RACE',  'RTE',   'SVAMP',  'WinoGrande', 'IWSLT',  'WMT14', 'DS1000', 'MBPP', 'RefineCode']
order_columns_grouped = ['Dense', 'Language Modeling', 'Mathematical Reasoning', 'NLI', 'Question Answering', 'Translation', 'Coding']

headers = {
    'selector': 'th.col_heading',
    'props': 'background-color: #5E17EB; color: white;'
}

index_style = {
    'selector': 'th.index_name',
    'props': 'background-color: #5E17EB; color: white;'
}

#none is the dense model
results['pruning_data'] = results['pruning_data'].replace({'none': 'dense'})

results['metric'] = results['dataset'].map(metric_map)
print(results['metric'].unique())

print(results['dataset'].unique())

results['value'] = results.apply(lambda row: row[row['metric']], axis=1)
precision = 3
results['value'] = results['value'].round(precision)


results['model'] = results['model'].replace(rename_dictionary_model_names)
results['dataset'] = results['dataset'].replace(rename_dictionary_dataset)
results['dataset'] = results['dataset'].replace({'dense': 'Dense'})

results['pruning_data'] = results['pruning_data'].replace(rename_dictionary_dataset)
results['pruning_data'] = results['pruning_data'].replace({'dense': 'Dense'})

['acc' 'ppl' 'f1' 'bleu']
['anli_r1' 'boolq' 'c4' 'commonsense_qa' 'ds1000' 'esnli' 'gsm8k' 'iwslt'
 'mawps' 'mbpp' 'opc' 'oscar' 'pile' 'race' 'redpajama' 'rte' 'svamp'
 'wikitext2' 'winogrande' 'wmt14']


In [4]:
"""#Group bymodel, sparsity, pruning_data, datset -> if there are multiple values for the same pruning_data and dataset, return an error

print(len(results))
grouped = results.groupby(['model', 'sparsity', 'pruning_data', 'dataset']).aggregate({'acc': 'first', 'bleu': 'first', 'ppl': 'first', 'f1': 'first'}).reset_index()
print(grouped)

#'DataFrameGroupBy' object has no attribute 'to_csv'

grouped.to_csv('unique_results.csv', index=False)"""

"#Group bymodel, sparsity, pruning_data, datset -> if there are multiple values for the same pruning_data and dataset, return an error\n\nprint(len(results))\ngrouped = results.groupby(['model', 'sparsity', 'pruning_data', 'dataset']).aggregate({'acc': 'first', 'bleu': 'first', 'ppl': 'first', 'f1': 'first'}).reset_index()\nprint(grouped)\n\n#'DataFrameGroupBy' object has no attribute 'to_csv'\n\ngrouped.to_csv('unique_results.csv', index=False)"

In [5]:
def generate_table(data, metric, highlight_min=False, order_columns=order_columns):
    data_metric = data[data['metric'] == metric]
    
    pivot_table = (
        pd.pivot_table(data = data_metric.round(precision),
            columns='pruning_data',
            index='dataset',
            values='value'
            ).reindex(order_columns, axis=1)
    )
    
    #print(pivot_table.style.format(precision=3).to_latex())
    
    if highlight_min:
        pivot_style = (
            pivot_table
                .style
                    .set_table_styles([headers,index_style])
                    .set_properties(**{'background-color': '#ECE3FF','color': 'black'})
                    .highlight_min(order_columns[1:], axis=1, color="yellow")
                    .highlight_min(axis=1, color="red")
                    .format(precision=precision)
        )
    else:
        pivot_style = (
            pivot_table
                .style
                    .set_table_styles([headers,index_style])
                    .set_properties(**{'background-color': '#ECE3FF','color': 'black'})
                    .highlight_max(order_columns[1:], axis=1, color="yellow")
                    .highlight_max(axis=1, color="red")
                    .format(precision=precision)
        )
    
    return pivot_style

In [6]:
"""for i in range(len(model_list)):
    
    for sparsity in [0.9]: #sparsity_list:
        data_model = results[(results['model'] == model_list[i]) & ((results['sparsity'] == sparsity) | (results['sparsity'] == 0.0))]

        tmp_pivot_style_ppl = generate_table(data_model, 'ppl', highlight_min=True)
        tmp_pivot_style_acc = generate_table(data_model, 'acc')
        tmp_pivot_style_bleu = generate_table(data_model, 'bleu')
        tmp_pivot_style_f1 = generate_table(data_model, 'f1')

        print(model_name[i] + ", sparsity: " + str(sparsity) + ", PPL")
        display(tmp_pivot_style_ppl)

        print(model_name[i] + ", sparsity: " + str(sparsity) + ", ACC")
        display(tmp_pivot_style_acc)

        print(model_name[i] + ", sparsity: " + str(sparsity) + ", BLEU")
        display(tmp_pivot_style_bleu)

        print(model_name[i] + ", sparsity: " + str(sparsity) + ", F1")
        display(tmp_pivot_style_f1)
        
        print("--------------------\n\n\n\n\n")
    
"""

'for i in range(len(model_list)):\n    \n    for sparsity in [0.9]: #sparsity_list:\n        data_model = results[(results[\'model\'] == model_list[i]) & ((results[\'sparsity\'] == sparsity) | (results[\'sparsity\'] == 0.0))]\n\n        tmp_pivot_style_ppl = generate_table(data_model, \'ppl\', highlight_min=True)\n        tmp_pivot_style_acc = generate_table(data_model, \'acc\')\n        tmp_pivot_style_bleu = generate_table(data_model, \'bleu\')\n        tmp_pivot_style_f1 = generate_table(data_model, \'f1\')\n\n        print(model_name[i] + ", sparsity: " + str(sparsity) + ", PPL")\n        display(tmp_pivot_style_ppl)\n\n        print(model_name[i] + ", sparsity: " + str(sparsity) + ", ACC")\n        display(tmp_pivot_style_acc)\n\n        print(model_name[i] + ", sparsity: " + str(sparsity) + ", BLEU")\n        display(tmp_pivot_style_bleu)\n\n        print(model_name[i] + ", sparsity: " + str(sparsity) + ", F1")\n        display(tmp_pivot_style_f1)\n        \n        print("------

<h1>Percentage results</h1>

In [7]:
def get_percentage_values(data, metric):
    
    list_this__dataset = data['dataset'].unique()
    analyze_dataset = []
    for datasetname in list_this__dataset: #Voglio solo i dataset, non anche le medie tra i vari datset (all, all_prototype, ...)
        if datasetname in dataset_list_print:
            analyze_dataset.append(datasetname)
    
    for group_value in analyze_dataset: #analyze_dataset:
        group_mask = data['dataset'] == group_value
        dense_mask = group_mask & (data['pruning_data'] == 'Dense')

        # Estrai valore di riferimento
        ref_rows = data.loc[dense_mask, metric]

        if not ref_rows.empty:
            ref_value = ref_rows.iloc[0]
            # Assegna i valori normalizzati direttamente tramite .loc (modo sicuro)
            if metric == "ppl":
                data.loc[group_mask, 'percentage'] = ref_value / data.loc[group_mask, metric]
            else:
                data.loc[group_mask, 'percentage'] = data.loc[group_mask, metric] / ref_value
        else:
            print(f"Nessuna riga 'dense' per a3 == {group_value}")
            
    return data

def generate_table_percentage(data, metric, order_columns=order_columns):
    data_metric = data[data['metric'] == metric]
    data_metric = get_percentage_values(data_metric, metric)
    
    pivot_table = (
        pd.pivot_table(data = data_metric.round(precision),
            columns='pruning_data',
            index='dataset',
            values='percentage'
            ).reindex(order_columns, axis=1)
    )
    
    #print(pivot_table.style.format(precision=3).to_latex())
    
    pivot_style = (
        pivot_table
            .style
                .set_table_styles([headers,index_style])
                .set_properties(**{'background-color': '#ECE3FF','color': 'black'})
                .highlight_max(order_columns[1:], axis=1, color="yellow")
                .highlight_max(axis=1, color="red")
                .format(precision=precision)
    )
    
    return pivot_style

def get_table(data):
    pivot_table = (
        pd.pivot_table(data = data,
            columns='pruning_data',
            index='dataset',
            values='percentage'
            ).reindex(order_columns, axis=1)
    )
    
    pivot_style = (
        pivot_table
            .style
                .set_table_styles([headers,index_style])
                .set_properties(**{'background-color': '#ECE3FF','color': 'black'})
                .highlight_max(order_columns[1:], axis=1, color="yellow")
                .highlight_max(axis=1, color="red")
                .format(precision=precision)
    )
    
    return pivot_style


"""
for i in range(len(model_list)):
    
    for sparsity in sparsity_list:
        data_model = results[(results['model'] == model_list[i]) & ((results['sparsity'] == sparsity) | (results['sparsity'] == 0.0))]

        data_model['percentage'] = pd.NA
        
        tmp_pivot_style_ppl = generate_table_percentage(data_model, 'ppl')
        tmp_pivot_style_acc = generate_table_percentage(data_model, 'acc')
        tmp_pivot_style_bleu = generate_table_percentage(data_model, 'bleu')
        tmp_pivot_style_f1 = generate_table_percentage(data_model, 'f1')

        print(model_name[i] + ", sparsity: " + str(sparsity) + ", PPL")
        display(tmp_pivot_style_ppl)

        print(model_name[i] + ", sparsity: " + str(sparsity) + ", ACC")
        display(tmp_pivot_style_acc)

        print(model_name[i] + ", sparsity: " + str(sparsity) + ", BLEU")
        display(tmp_pivot_style_bleu)

        print(model_name[i] + ", sparsity: " + str(sparsity) + ", F1")
        display(tmp_pivot_style_f1)
        
        print("--------------------\n\n\n\n\n")
"""

'\nfor i in range(len(model_list)):\n    \n    for sparsity in sparsity_list:\n        data_model = results[(results[\'model\'] == model_list[i]) & ((results[\'sparsity\'] == sparsity) | (results[\'sparsity\'] == 0.0))]\n\n        data_model[\'percentage\'] = pd.NA\n        \n        tmp_pivot_style_ppl = generate_table_percentage(data_model, \'ppl\')\n        tmp_pivot_style_acc = generate_table_percentage(data_model, \'acc\')\n        tmp_pivot_style_bleu = generate_table_percentage(data_model, \'bleu\')\n        tmp_pivot_style_f1 = generate_table_percentage(data_model, \'f1\')\n\n        print(model_name[i] + ", sparsity: " + str(sparsity) + ", PPL")\n        display(tmp_pivot_style_ppl)\n\n        print(model_name[i] + ", sparsity: " + str(sparsity) + ", ACC")\n        display(tmp_pivot_style_acc)\n\n        print(model_name[i] + ", sparsity: " + str(sparsity) + ", BLEU")\n        display(tmp_pivot_style_bleu)\n\n        print(model_name[i] + ", sparsity: " + str(sparsity) + ", F1

<h1>Drop in task performance</h1>

In [8]:
def generate_table_percentage_task(data, metric, order_columns_grouped=order_columns_grouped):
    data_metric = data[data['metric'] == metric]
    data_metric = get_percentage_values(data_metric, metric)
    data_metric = data_metric.groupby(['dataset_group', 'pruning_group']).agg(percentage_group=('percentage', 'mean'))
    
    pivot_table = (
        pd.pivot_table(data = data_metric.round(precision),
            columns='pruning_group',
            index='dataset_group',
            values='percentage_group'
            ).reindex(order_columns_grouped, axis=1)
    )
    
    pivot_style = (
        pivot_table
            .style
                .set_table_styles([headers,index_style])
                .set_properties(**{'background-color': '#ECE3FF','color': 'black'})
                .highlight_max(order_columns_grouped[1:], axis=1, color="yellow")
                .highlight_max(axis=1, color="red")
                .format(precision=precision)
    )
    
    return pivot_style


def get_table_task(data):
    pivot_table = (
        pd.pivot_table(data = data,
            columns='pruning_group',
            index='dataset_group',
            values='percentage_group'
            ).reindex(order_columns_grouped, axis=1)
    )
    
    pivot_style = (
        pivot_table
            .style
                .set_table_styles([headers,index_style])
                .set_properties(**{'background-color': '#ECE3FF','color': 'black'})
                .highlight_max(order_columns_grouped[1:], axis=1, color="yellow")
                .highlight_max(axis=1, color="red")
                .format(precision=precision)
    )
    
    return pivot_style

"""
for i in range(len(model_list)):
    for sparsity in sparsity_list:
        data_model = results[(results['model'] == model_list[i]) & ((results['sparsity'] == sparsity) | (results['sparsity'] == 0.0))]
        data_model['percentage'] = pd.NA
        
        
        tmp_pivot_style_ppl = generate_table_percentage_task(data_model, 'ppl')
        tmp_pivot_style_acc = generate_table_percentage_task(data_model, 'acc')
        tmp_pivot_style_bleu = generate_table_percentage_task(data_model, 'bleu')
        tmp_pivot_style_f1 = generate_table_percentage_task(data_model, 'f1')

        print(model_name[i] + ", sparsity: " + str(sparsity) + ", PPL")
        display(tmp_pivot_style_ppl)

        print(model_name[i] + ", sparsity: " + str(sparsity) + ", ACC")
        display(tmp_pivot_style_acc)

        print(model_name[i] + ", sparsity: " + str(sparsity) + ", BLEU")
        display(tmp_pivot_style_bleu)

        print(model_name[i] + ", sparsity: " + str(sparsity) + ", F1")
        display(tmp_pivot_style_f1)
        
        
        print("--------------------\n\n\n\n\n")
        
"""

'\nfor i in range(len(model_list)):\n    for sparsity in sparsity_list:\n        data_model = results[(results[\'model\'] == model_list[i]) & ((results[\'sparsity\'] == sparsity) | (results[\'sparsity\'] == 0.0))]\n        data_model[\'percentage\'] = pd.NA\n        \n        \n        tmp_pivot_style_ppl = generate_table_percentage_task(data_model, \'ppl\')\n        tmp_pivot_style_acc = generate_table_percentage_task(data_model, \'acc\')\n        tmp_pivot_style_bleu = generate_table_percentage_task(data_model, \'bleu\')\n        tmp_pivot_style_f1 = generate_table_percentage_task(data_model, \'f1\')\n\n        print(model_name[i] + ", sparsity: " + str(sparsity) + ", PPL")\n        display(tmp_pivot_style_ppl)\n\n        print(model_name[i] + ", sparsity: " + str(sparsity) + ", ACC")\n        display(tmp_pivot_style_acc)\n\n        print(model_name[i] + ", sparsity: " + str(sparsity) + ", BLEU")\n        display(tmp_pivot_style_bleu)\n\n        print(model_name[i] + ", sparsity: " +

<h1>AVG over models</h1>

In [9]:
data_avg = results.copy()

#da data togli il valore di baichuan-7b in evaluation of boolq


data_avg = data_avg[~((data_avg['model'] == 'Baichuan') & (data_avg['dataset'] == 'BoolQ'))]
data_avg = data_avg[~((data_avg['model'] == 'Qwen') & (data_avg['dataset'] == 'RACE'))]



data_avg = data_avg[data_avg['dataset'] != 'GSM8K'] #Attualmente non ha valori sensati

#data_avg = results
data_avg['percentage'] = pd.NA


In [10]:
"""
for sparsity in sparsity_list:
    data_avg_sparsity = data_avg[(data_avg['sparsity'] == sparsity) | (data_avg['sparsity'] == 0.0)]
    tmp_pivot_style_ppl = generate_table(data_avg_sparsity, 'ppl', highlight_min=True)
    tmp_pivot_style_acc = generate_table(data_avg_sparsity, 'acc')
    tmp_pivot_style_bleu = generate_table(data_avg_sparsity, 'bleu')
    tmp_pivot_style_f1 = generate_table(data_avg_sparsity, 'f1')

    print("\n\n\n\nAVG sparsity " + str(sparsity) + ": PPL")
    display(tmp_pivot_style_ppl)

    print("AVG sparsity " + str(sparsity) + ": ACC")
    display(tmp_pivot_style_acc)

    print("AVG sparsity " + str(sparsity) + ": BLEU")
    display(tmp_pivot_style_bleu)

    print("AVG sparsity " + str(sparsity) + ": F1")
    display(tmp_pivot_style_f1)
    


tmp_pivot_style_ppl = generate_table(data_avg, 'ppl', highlight_min=True)
tmp_pivot_style_acc = generate_table(data_avg, 'acc')
tmp_pivot_style_bleu = generate_table(data_avg, 'bleu')
tmp_pivot_style_f1 = generate_table(data_avg, 'f1')

print("\n\n\n\n")
print("*"*100)
print("\n\n\n\nAVG all sparsity: PPL")
display(tmp_pivot_style_ppl)

print("AVG all sparsity: ACC")
display(tmp_pivot_style_acc)

print("AVG all sparsity: BLEU")
display(tmp_pivot_style_bleu)

print("AVG all sparsity: F1")
display(tmp_pivot_style_f1)"""

'\nfor sparsity in sparsity_list:\n    data_avg_sparsity = data_avg[(data_avg[\'sparsity\'] == sparsity) | (data_avg[\'sparsity\'] == 0.0)]\n    tmp_pivot_style_ppl = generate_table(data_avg_sparsity, \'ppl\', highlight_min=True)\n    tmp_pivot_style_acc = generate_table(data_avg_sparsity, \'acc\')\n    tmp_pivot_style_bleu = generate_table(data_avg_sparsity, \'bleu\')\n    tmp_pivot_style_f1 = generate_table(data_avg_sparsity, \'f1\')\n\n    print("\n\n\n\nAVG sparsity " + str(sparsity) + ": PPL")\n    display(tmp_pivot_style_ppl)\n\n    print("AVG sparsity " + str(sparsity) + ": ACC")\n    display(tmp_pivot_style_acc)\n\n    print("AVG sparsity " + str(sparsity) + ": BLEU")\n    display(tmp_pivot_style_bleu)\n\n    print("AVG sparsity " + str(sparsity) + ": F1")\n    display(tmp_pivot_style_f1)\n    \n\n\ntmp_pivot_style_ppl = generate_table(data_avg, \'ppl\', highlight_min=True)\ntmp_pivot_style_acc = generate_table(data_avg, \'acc\')\ntmp_pivot_style_bleu = generate_table(data_avg,

In [11]:

def get_avg_across_model_normalized(data_avg, sparsity=0):
    avg_results_ppl = pd.DataFrame()
    avg_results_acc = pd.DataFrame()
    avg_results_bleu = pd.DataFrame()
    avg_results_f1 = pd.DataFrame()

    for model in data_avg['model'].unique():
        data_this_model = data_avg[(data_avg['model'] == model)]
        
        data_ppl = get_percentage_values(data_this_model[data_this_model['metric'] == 'ppl'], 'ppl')
        data_acc = get_percentage_values(data_this_model[data_this_model['metric'] == 'acc'], 'acc')
        data_bleu = get_percentage_values(data_this_model[data_this_model['metric'] == 'bleu'], 'bleu')
        data_f1 = get_percentage_values(data_this_model[data_this_model['metric'] == 'f1'], 'f1')
        
        avg_results_ppl = pd.concat([avg_results_ppl, data_ppl[['dataset', 'pruning_data', 'percentage']]])
        avg_results_acc = pd.concat([avg_results_acc, data_acc[['dataset', 'pruning_data', 'percentage']]])
        avg_results_bleu = pd.concat([avg_results_bleu, data_bleu[['dataset', 'pruning_data', 'percentage']]])
        avg_results_f1 = pd.concat([avg_results_f1, data_f1[['dataset', 'pruning_data', 'percentage']]])
        
    mean_ppl = avg_results_ppl.groupby(['dataset', 'pruning_data']).mean().reset_index()
    mean_acc = avg_results_acc.groupby(['dataset', 'pruning_data']).mean().reset_index()
    mean_bleu = avg_results_bleu.groupby(['dataset', 'pruning_data']).mean().reset_index()
    mean_f1 = avg_results_f1.groupby(['dataset', 'pruning_data']).mean().reset_index()
    
    table_ppl = get_table(mean_ppl)
    table_acc = get_table(mean_acc)
    table_bleu = get_table(mean_bleu)
    table_f1 = get_table(mean_f1)



    print("\n\n\n\nAVG, sparsity " + str(sparsity) + ": PPL")
    display(table_ppl)

    print("AVG, sparsity " + str(sparsity) + ": ACC")
    display(table_acc)

    print("AVG, sparsity " + str(sparsity) + ": BLEU")
    display(table_bleu)

    print("AVG, sparsity " + str(sparsity) + ": F1")
    display(table_f1)

"""
for sparsity in sparsity_list:
    data_avg_sparsity = data_avg[(data_avg['sparsity'] == sparsity) | (data_avg['sparsity'] == 0.0)]
    get_avg_across_model_normalized(data_avg_sparsity, sparsity)
    
"""

print("\n\n\n\n")
print("*"*100)

data_exclude_zeros = copy.deepcopy(data_avg)
"""data_exclude_zeros = data_exclude_zeros[~((data_exclude_zeros['model'] == 'baichuan-inc/Baichuan-7B') & (data_exclude_zeros['sparsity'] == 0.7))]
data_exclude_zeros = data_exclude_zeros[~((data_exclude_zeros['model'] == 'microsoft/phi-2') & (data_exclude_zeros['sparsity'] == 0.7))]
data_exclude_zeros = data_exclude_zeros[~((data_exclude_zeros['model'] == 'meta-llama/Llama-2-7b-hf') & (data_exclude_zeros['sparsity'] == 0.7))]
"""
data_exclude_zeros = data_exclude_zeros[data_exclude_zeros['sparsity'] != 0.9]
#data_exclude_zeros = data_exclude_zeros[data_exclude_zeros['sparsity'] != 0.7]

get_avg_across_model_normalized(data_exclude_zeros, "All")








****************************************************************************************************




AVG, sparsity All: PPL


pruning_data,Dense,C4,OSCAR,Pile,RedPajama,WikiText,ANLI,BoolQ,CommonsenseQA,e-SNLI,GSM8K,MAWPS,RACE,RTE,SVAMP,WinoGrande,IWSLT,WMT14,DS1000,MBPP,RefineCode
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C4,1.0,0.642,0.645,0.644,0.643,0.637,0.642,0.647,0.623,0.616,0.624,0.621,0.638,0.636,0.619,0.624,0.647,0.645,0.626,0.623,0.632
OSCAR,1.0,0.645,0.651,0.649,0.649,0.642,0.647,0.65,0.627,0.62,0.627,0.626,0.641,0.639,0.622,0.629,0.65,0.646,0.632,0.628,0.635
Pile,1.0,0.678,0.684,0.694,0.686,0.68,0.684,0.686,0.662,0.655,0.664,0.668,0.673,0.674,0.663,0.666,0.685,0.682,0.683,0.679,0.683
RedPajama,1.0,0.661,0.668,0.669,0.672,0.66,0.67,0.669,0.645,0.638,0.644,0.652,0.656,0.657,0.641,0.647,0.671,0.663,0.657,0.654,0.658
WikiText,1.0,0.613,0.618,0.623,0.621,0.626,0.627,0.628,0.597,0.592,0.598,0.604,0.613,0.614,0.596,0.6,0.627,0.621,0.601,0.599,0.607


AVG, sparsity All: ACC


pruning_data,Dense,C4,OSCAR,Pile,RedPajama,WikiText,ANLI,BoolQ,CommonsenseQA,e-SNLI,GSM8K,MAWPS,RACE,RTE,SVAMP,WinoGrande,IWSLT,WMT14,DS1000,MBPP,RefineCode
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ANLI,1.0,0.806,0.791,0.81,0.792,0.799,0.805,0.794,0.793,0.768,0.79,0.792,0.801,0.805,0.778,0.799,0.798,0.795,0.786,0.79,0.784
BoolQ,1.0,0.968,0.924,0.991,0.934,0.935,0.836,0.842,0.784,0.803,0.778,0.811,0.963,0.822,0.807,0.836,0.817,0.764,0.952,0.833,0.818
CommonsenseQA,1.0,0.736,0.746,0.74,0.752,0.725,0.738,0.751,0.757,0.73,0.751,0.759,0.743,0.737,0.761,0.741,0.721,0.715,0.726,0.75,0.718
MAWPS,1.0,0.662,0.664,0.684,0.658,0.663,0.652,0.65,0.626,0.592,0.676,0.652,0.65,0.644,0.678,0.614,0.626,0.627,0.681,0.657,0.642
RACE,1.0,0.717,0.72,0.724,0.73,0.726,0.716,0.729,0.712,0.712,0.721,0.7,0.742,0.719,0.721,0.725,0.729,0.708,0.713,0.709,0.695
RTE,1.0,0.798,0.81,0.818,0.803,0.789,0.823,0.806,0.8,0.83,0.798,0.802,0.812,0.833,0.808,0.803,0.815,0.794,0.823,0.796,0.8
SVAMP,1.0,0.683,0.673,0.693,0.671,0.671,0.654,0.659,0.647,0.623,0.694,0.686,0.66,0.651,0.699,0.638,0.631,0.639,0.704,0.674,0.667
WinoGrande,1.0,0.948,0.955,0.954,0.953,0.951,0.932,0.946,0.961,0.946,0.959,0.955,0.952,0.946,0.957,0.96,0.962,0.916,0.948,0.95,0.962
e-SNLI,1.0,0.786,0.817,0.841,0.827,0.772,0.793,0.77,0.759,0.768,0.777,0.778,0.79,0.792,0.777,0.779,0.775,0.779,0.775,0.782,0.787


AVG, sparsity All: BLEU


pruning_data,Dense,C4,OSCAR,Pile,RedPajama,WikiText,ANLI,BoolQ,CommonsenseQA,e-SNLI,GSM8K,MAWPS,RACE,RTE,SVAMP,WinoGrande,IWSLT,WMT14,DS1000,MBPP,RefineCode
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
IWSLT,1.0,0.615,0.62,0.625,0.62,0.607,0.617,0.607,0.599,0.597,0.596,0.616,0.6,0.611,0.604,0.607,0.652,0.651,0.611,0.605,0.611
WMT14,1.0,0.629,0.632,0.64,0.63,0.625,0.633,0.623,0.599,0.601,0.603,0.618,0.609,0.627,0.609,0.605,0.657,0.659,0.618,0.607,0.615


AVG, sparsity All: F1


pruning_data,Dense,C4,OSCAR,Pile,RedPajama,WikiText,ANLI,BoolQ,CommonsenseQA,e-SNLI,GSM8K,MAWPS,RACE,RTE,SVAMP,WinoGrande,IWSLT,WMT14,DS1000,MBPP,RefineCode
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
DS1000,1.0,0.931,0.938,0.943,0.929,0.929,0.904,0.917,0.894,0.934,0.919,0.883,0.905,0.907,0.9,0.912,0.901,0.91,0.943,0.938,0.938
MBPP,1.0,0.922,0.944,0.944,0.906,0.904,0.919,0.941,0.93,0.882,0.938,0.914,0.924,0.905,0.898,0.948,0.949,0.921,0.913,0.939,0.947
RefineCode,1.0,0.918,0.931,0.929,0.9,0.891,0.91,0.94,0.92,0.889,0.927,0.905,0.922,0.901,0.896,0.936,0.939,0.911,0.906,0.921,0.939


In [17]:

def get_avg_across_model_normalized_task(data_avg, sparsity=0):
    avg_results_ppl = pd.DataFrame()
    avg_results_acc = pd.DataFrame()
    avg_results_bleu = pd.DataFrame()
    avg_results_f1 = pd.DataFrame()

    for model in data_avg['model'].unique():
        data_this_model = data_avg[data_avg['model'] == model]
        
        data_ppl = get_percentage_values(data_this_model[data_this_model['metric'] == 'ppl'], 'ppl')
        data_acc = get_percentage_values(data_this_model[data_this_model['metric'] == 'acc'], 'acc')
        data_bleu = get_percentage_values(data_this_model[data_this_model['metric'] == 'bleu'], 'bleu')
        data_f1 = get_percentage_values(data_this_model[data_this_model['metric'] == 'f1'], 'f1')
        
        data_ppl = data_ppl.groupby(['dataset_group', 'pruning_group']).agg(percentage_group=('percentage', 'mean'))
        data_acc = data_acc.groupby(['dataset_group', 'pruning_group']).agg(percentage_group=('percentage', 'mean'))
        data_bleu = data_bleu.groupby(['dataset_group', 'pruning_group']).agg(percentage_group=('percentage', 'mean'))
        data_f1 = data_f1.groupby(['dataset_group', 'pruning_group']).agg(percentage_group=('percentage', 'mean'))
        
        avg_results_ppl = pd.concat([avg_results_ppl, data_ppl])
        avg_results_acc = pd.concat([avg_results_acc, data_acc])
        avg_results_bleu = pd.concat([avg_results_bleu, data_bleu])
        avg_results_f1 = pd.concat([avg_results_f1, data_f1])

    mean_ppl = avg_results_ppl.groupby(['dataset_group', 'pruning_group']).mean().reset_index()
    mean_acc = avg_results_acc.groupby(['dataset_group', 'pruning_group']).mean().reset_index()
    mean_bleu = avg_results_bleu.groupby(['dataset_group', 'pruning_group']).mean().reset_index()
    mean_f1 = avg_results_f1.groupby(['dataset_group', 'pruning_group']).mean().reset_index()

    table_ppl = get_table_task(mean_ppl)
    table_acc = get_table_task(mean_acc)
    table_bleu = get_table_task(mean_bleu)
    table_f1 = get_table_task(mean_f1)


    print("\n\n\n\nAVG, sparsity " + str(sparsity) + ": PPL")
    display(table_ppl)

    print("AVG, sparsity " + str(sparsity) + ": ACC")
    display(table_acc)

    print("AVG, sparsity " + str(sparsity) + ": BLEU")
    display(table_bleu)

    print("AVG, sparsity " + str(sparsity) + ": F1")
    display(table_f1)
    


"""
for sparsity in sparsity_list:
    data_avg_sparsity = data_avg[(data_avg['sparsity'] == sparsity) | (data_avg['sparsity'] == 0.0)]
    get_avg_across_model_normalized_task(data_avg_sparsity, sparsity)
"""


print("\n\n\n\n")
print("*"*100)


data_exclude_zeros = copy.deepcopy(data_avg)
"""data_exclude_zeros = data_exclude_zeros[~((data_exclude_zeros['model'] == 'baichuan-inc/Baichuan-7B') & (data_exclude_zeros['sparsity'] == 0.7))]
data_exclude_zeros = data_exclude_zeros[~((data_exclude_zeros['model'] == 'microsoft/phi-2') & (data_exclude_zeros['sparsity'] == 0.7))]
data_exclude_zeros = data_exclude_zeros[~((data_exclude_zeros['model'] == 'meta-llama/Llama-2-7b-hf') & (data_exclude_zeros['sparsity'] == 0.7))]"""
data_exclude_zeros = data_exclude_zeros[(data_exclude_zeros['sparsity'] != 0.9)]
#data_exclude_zeros = data_exclude_zeros[data_exclude_zeros['sparsity'] != 0.7]


get_avg_across_model_normalized_task(data_exclude_zeros, 0.0)
#get_avg_across_model_normalized_task(data_avg, 0.0)






****************************************************************************************************




AVG, sparsity 0.0: PPL


pruning_group,Dense,Language Modeling,Mathematical Reasoning,NLI,Question Answering,Translation,Coding
dataset_group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Language Modeling,1.0,0.652,0.631,0.641,0.641,0.654,0.64


AVG, sparsity 0.0: ACC


pruning_group,Dense,Language Modeling,Mathematical Reasoning,NLI,Question Answering,Translation,Coding
dataset_group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Mathematical Reasoning,1.0,0.672,0.681,0.636,0.643,0.631,0.671
NLI,1.0,0.804,0.789,0.802,0.792,0.793,0.792
Question Answering,1.0,0.84,0.812,0.804,0.822,0.796,0.818


AVG, sparsity 0.0: BLEU


pruning_group,Dense,Language Modeling,Mathematical Reasoning,NLI,Question Answering,Translation,Coding
dataset_group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Translation,1.0,0.624,0.608,0.614,0.606,0.655,0.611


AVG, sparsity 0.0: F1


pruning_group,Dense,Language Modeling,Mathematical Reasoning,NLI,Question Answering,Translation,Coding
dataset_group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Coding,1.0,0.924,0.91,0.906,0.924,0.922,0.932


<h1>Using multiple calibration dataset</h1>

In [13]:
#order_columns_with_mean = ['dense', 'random_sample', 'prototype_flatten', 'prototype_mean', 'most_different_flatten', 'most_different_mean']  + order_columns[1:] #Salto dense
order_columns_with_mean = ['dense', 'random_sample', 'prototype_flatten', 'prototype_mean', 'most_different_flatten', 'most_different_mean', 'all_prototype_iou', 'all_prototype_iou_count_occurrence', 'all_most_different_iou', 'all_most_different_iou_count_occurrence', 'all_prototype_st', 'all_most_different_st']  + order_columns[1:] #Salto dense


#results = pd.read_csv("./results.csv")

results_leave_calibration_out = results#[results['model'] == "meta-llama/Llama-2-7b-hf"] #meta-llama/Llama-2-7b-hf, mistralai/Mistral-7B-v0.1, microsoft/phi-2, baichuan-inc/Baichuan-7B, Qwen/Qwen2.5-7B
results_leave_calibration_out['percentage'] = pd.NA

results_leave_calibration_out = results_leave_calibration_out[~((results_leave_calibration_out['model'] == 'Baichuan') & (results_leave_calibration_out['dataset'] == 'BoolQ'))]
results_leave_calibration_out = results_leave_calibration_out[~((results_leave_calibration_out['model'] == 'Qwen') & (results_leave_calibration_out['dataset'] == 'RACE'))]
results_leave_calibration_out = results_leave_calibration_out[results_leave_calibration_out['dataset'] != 'GSM8K'] #Attualmente non ha valori sensati



In [14]:

def create_plot_avg_calib(data):
    
    pivot_table = (
    pd.pivot_table(
        data = data.round(precision),
        index='pruning_data',
        columns='data',
        values='percentage'
        ).reindex(order_columns_with_mean, axis=0).reindex(
            ['PPL', 'ACC', 'BLEU', 'F1', 'AVG_across_metrics', 'AVG_across_datasets'], axis=1)
    )
    #pivot_table = pivot_table.T
    
    
    pivot_table_style = (
    pivot_table
        .style
            .set_table_styles([headers,index_style])
            .set_properties(**{'background-color': '#ECE3FF','color': 'black'})
            .highlight_max(axis=0, color="red")
            #.highlight_max(pivot_table.index[1:], axis=0, color="yellow")
            .format(precision=precision)
    )
    
    return pivot_table_style



def get_avg_across_model_grouped_by_pruning_dataset(results_leave_calibration_out, sparsity=0):
    avg_results = pd.DataFrame()
    mean_across_dataset = pd.DataFrame()

    for model in results_leave_calibration_out['model'].unique():
        data_this_model = results_leave_calibration_out[(results_leave_calibration_out['model'] == model)]
        data_this_model = data_this_model[~data_this_model['pruning_data'].str.contains('_no_')]

        data_ppl = get_percentage_values(data_this_model[data_this_model['metric'] == 'ppl'], 'ppl')
        data_acc = get_percentage_values(data_this_model[data_this_model['metric'] == 'acc'], 'acc')
        data_bleu = get_percentage_values(data_this_model[data_this_model['metric'] == 'bleu'], 'bleu')
        data_f1 = get_percentage_values(data_this_model[data_this_model['metric'] == 'f1'], 'f1')
        
        data_ppl['data'] = 'PPL'
        data_acc['data'] = 'ACC'
        data_bleu['data'] = 'BLEU'
        data_f1['data'] = 'F1'

        data_ppl_groupped = data_ppl.groupby(['pruning_data', 'data']).agg({'percentage': 'mean'}).reset_index()
        data_acc_groupped = data_acc.groupby(['pruning_data', 'data']).agg({'percentage': 'mean'}).reset_index() 
        data_bleu_groupped = data_bleu.groupby(['pruning_data', 'data']).agg({'percentage': 'mean'}).reset_index() 
        data_f1_groupped = data_f1.groupby(['pruning_data', 'data']).agg({'percentage': 'mean'}).reset_index()

        avg_results = pd.concat([avg_results, data_ppl_groupped, data_acc_groupped, data_bleu_groupped, data_f1_groupped], ignore_index=True)
        mean_across_dataset = pd.concat([mean_across_dataset, data_ppl, data_acc, data_bleu, data_f1], ignore_index=True)


    mean_across_dataset = mean_across_dataset.groupby(['pruning_data']).agg({'percentage': 'mean'}).reset_index()
    mean_across_dataset['data'] = 'AVG_across_datasets'

    mean_across_tasks = avg_results.groupby('pruning_data').agg({'percentage': 'mean'}).reset_index()
    mean_across_tasks['data'] = 'AVG_across_metrics'

    mean = avg_results.groupby(['pruning_data', 'data']).agg({'percentage': 'mean'}).reset_index()

    final_results = pd.concat([mean, mean_across_tasks, mean_across_dataset], axis=0)
    table = create_plot_avg_calib(final_results)

    print("AVG, sparsity " + str(sparsity) + ": PPL, ACC, BLEU, F1")
    display(table)
    
    return mean


for sparsity in sparsity_list:
    data_avg_sparsity = results_leave_calibration_out[(results_leave_calibration_out['sparsity'] == sparsity) | (results_leave_calibration_out['sparsity'] == 0.0)]
    _ = get_avg_across_model_grouped_by_pruning_dataset(data_avg_sparsity, sparsity)
    

print("\n\n\n\n")
print("*"*100)
get_avg_across_model_grouped_by_pruning_dataset(results_leave_calibration_out, 0.5)




AVG, sparsity 0.1: PPL, ACC, BLEU, F1


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_ppl['data'] = 'PPL'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_acc['data'] = 'ACC'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_bleu['data'] = 'BLEU'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See 

data,PPL,ACC,BLEU,F1,AVG_across_metrics,AVG_across_datasets
pruning_data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
dense,,,,,,
random_sample,,,,,,
prototype_flatten,,,,,,
prototype_mean,,,,,,
most_different_flatten,,,,,,
most_different_mean,,,,,,
all_prototype_iou,,,,,,
all_prototype_iou_count_occurrence,,,,,,
all_most_different_iou,,,,,,
all_most_different_iou_count_occurrence,,,,,,


AVG, sparsity 0.3: PPL, ACC, BLEU, F1


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_ppl['data'] = 'PPL'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_acc['data'] = 'ACC'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_bleu['data'] = 'BLEU'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See 

data,PPL,ACC,BLEU,F1,AVG_across_metrics,AVG_across_datasets
pruning_data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
dense,,,,,,
random_sample,,,,,,
prototype_flatten,,,,,,
prototype_mean,,,,,,
most_different_flatten,,,,,,
most_different_mean,,,,,,
all_prototype_iou,,,,,,
all_prototype_iou_count_occurrence,,,,,,
all_most_different_iou,,,,,,
all_most_different_iou_count_occurrence,,,,,,


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_ppl['data'] = 'PPL'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_acc['data'] = 'ACC'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_bleu['data'] = 'BLEU'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See 

AVG, sparsity 0.5: PPL, ACC, BLEU, F1


data,PPL,ACC,BLEU,F1,AVG_across_metrics,AVG_across_datasets
pruning_data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
dense,,,,,,
random_sample,0.674,0.878,0.66,0.959,0.793,0.814
prototype_flatten,0.685,0.883,0.674,0.966,0.802,0.821
prototype_mean,0.675,0.875,0.653,0.952,0.789,0.81
most_different_flatten,0.672,0.87,0.657,0.945,0.786,0.807
most_different_mean,0.671,0.874,0.66,0.953,0.789,0.81
all_prototype_iou,,,,,,
all_prototype_iou_count_occurrence,,,,,,
all_most_different_iou,,,,,,
all_most_different_iou_count_occurrence,,,,,,


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_ppl['data'] = 'PPL'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_acc['data'] = 'ACC'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_bleu['data'] = 'BLEU'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See 

AVG, sparsity 0.7: PPL, ACC, BLEU, F1


data,PPL,ACC,BLEU,F1,AVG_across_metrics,AVG_across_datasets
pruning_data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
dense,,,,,,
random_sample,,,,,,
prototype_flatten,,,,,,
prototype_mean,,,,,,
most_different_flatten,,,,,,
most_different_mean,,,,,,
all_prototype_iou,,,,,,
all_prototype_iou_count_occurrence,,,,,,
all_most_different_iou,,,,,,
all_most_different_iou_count_occurrence,,,,,,


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_ppl['data'] = 'PPL'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_acc['data'] = 'ACC'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_bleu['data'] = 'BLEU'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See 

AVG, sparsity 0.9: PPL, ACC, BLEU, F1


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_ppl['data'] = 'PPL'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_acc['data'] = 'ACC'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_bleu['data'] = 'BLEU'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See 

data,PPL,ACC,BLEU,F1,AVG_across_metrics,AVG_across_datasets
pruning_data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
dense,,,,,,
random_sample,,,,,,
prototype_flatten,,,,,,
prototype_mean,,,,,,
most_different_flatten,,,,,,
most_different_mean,,,,,,
all_prototype_iou,,,,,,
all_prototype_iou_count_occurrence,,,,,,
all_most_different_iou,,,,,,
all_most_different_iou_count_occurrence,,,,,,







****************************************************************************************************


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_ppl['data'] = 'PPL'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_acc['data'] = 'ACC'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_bleu['data'] = 'BLEU'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See 

AVG, sparsity 0.5: PPL, ACC, BLEU, F1


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_ppl['data'] = 'PPL'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_acc['data'] = 'ACC'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_bleu['data'] = 'BLEU'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See 

data,PPL,ACC,BLEU,F1,AVG_across_metrics,AVG_across_datasets
pruning_data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
dense,,,,,,
random_sample,0.674,0.878,0.66,0.959,0.793,0.814
prototype_flatten,0.685,0.883,0.674,0.966,0.802,0.821
prototype_mean,0.675,0.875,0.653,0.952,0.789,0.81
most_different_flatten,0.672,0.87,0.657,0.945,0.786,0.807
most_different_mean,0.671,0.874,0.66,0.953,0.789,0.81
all_prototype_iou,,,,,,
all_prototype_iou_count_occurrence,,,,,,
all_most_different_iou,,,,,,
all_most_different_iou_count_occurrence,,,,,,


Unnamed: 0,pruning_data,data,percentage
0,ANLI,ACC,0.617791
1,ANLI,BLEU,0.500272
2,ANLI,F1,0.800157
3,ANLI,PPL,0.523306
4,BoolQ,ACC,0.616562
...,...,...,...
101,random,ACC,0.551822
102,random_sample,ACC,0.878425
103,random_sample,BLEU,0.659861
104,random_sample,F1,0.9588


In [15]:
def create_plot_avg_calib(data):
    
    pivot_table = (
    pd.pivot_table(
        data = data.round(precision),
        index='pruning_data',
        values=['PPL', 'ACC', 'BLEU', 'F1', 'MEAN']
        ).reindex(order_columns_with_mean, axis=0).reindex(
            ['PPL', 'ACC', 'BLEU', 'F1', 'MEAN'], axis=1)
    )
    #pivot_table = pivot_table.T
    
    
    pivot_table_style = (
    pivot_table
        .style
            .set_table_styles([headers,index_style])
            .set_properties(**{'background-color': '#ECE3FF','color': 'black'})
            .highlight_max(axis=0, color="red")
            #.highlight_max(pivot_table.index[1:], axis=0, color="yellow")
            .format(precision=precision)
    )
    
    return pivot_table_style


def get_avg_across_model_grouped_by_pruning_dataset_with_mean(results_leave_calibration_out, sparsity=0):
    avg_results_across_metrics = pd.DataFrame()

    for model in results_leave_calibration_out['model'].unique():
        data_this_model = results_leave_calibration_out[results_leave_calibration_out['model'] == model]
        data_this_model = data_this_model[~data_this_model['pruning_data'].str.contains('_no_')]
        
        data_ppl = get_percentage_values(data_this_model[data_this_model['metric'] == 'ppl'], 'ppl')
        data_acc = get_percentage_values(data_this_model[data_this_model['metric'] == 'acc'], 'acc')
        data_bleu = get_percentage_values(data_this_model[data_this_model['metric'] == 'bleu'], 'bleu')
        data_f1 = get_percentage_values(data_this_model[data_this_model['metric'] == 'f1'], 'f1')
        
        
        data_ppl = data_ppl.groupby(['pruning_data']).agg({'percentage': 'mean'}).reset_index()
        data_acc = data_acc.groupby(['pruning_data']).agg({'percentage': 'mean'}).reset_index() 
        data_bleu = data_bleu.groupby(['pruning_data']).agg({'percentage': 'mean'}).reset_index() 
        data_f1 = data_f1.groupby(['pruning_data']).agg({'percentage': 'mean'}).reset_index() 
        
        avg_results_across_metrics = pd.concat([avg_results_across_metrics, data_ppl, data_acc, data_bleu, data_f1])

    all_mean_scores = avg_results_across_metrics.groupby(['pruning_data']).agg(MEAN=('percentage', 'mean'))
    mean_ppl, mean_acc, mean_bleu, mean_f1 = get_avg_across_model_grouped_by_pruning_dataset(results_leave_calibration_out, sparsity)

    all_data = pd.concat([mean_ppl, mean_acc, mean_bleu, mean_f1, all_mean_scores], axis=1).reset_index()
    all_data.columns = ['pruning_data', 'PPL', 'ACC', 'BLEU', 'F1', 'MEAN']

    table = create_plot_avg_calib(all_data)

    print("AVG, sparsity " + str(sparsity) + ": PPL, ACC, BLEU, F1")
    display(table)
    
for sparsity in sparsity_list:
    data_avg_sparsity = results_leave_calibration_out[(results_leave_calibration_out['sparsity'] == sparsity) | (results_leave_calibration_out['sparsity'] == 0.0)]
    get_avg_across_model_grouped_by_pruning_dataset_with_mean(data_avg_sparsity, sparsity)


print("\n\n\n\n")
print("*"*100)
get_avg_across_model_grouped_by_pruning_dataset_with_mean(results_leave_calibration_out, 0.0)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_ppl['data'] = 'PPL'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_acc['data'] = 'ACC'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_bleu['data'] = 'BLEU'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See 

KeyError: 'PPL'