In [None]:
import json
import numpy as np
import os
import pandas as pd
import torch

from Levenshtein import distance
from utils import *


ROOT_DIR = '.'
DATA_DIR = '.'

tasks = ['atis', 'snips', 'clinic150', 'massive']
cleaners = ['raw', 'lmclean2']
models = {
    'llama-3.1-8b-instruct': [
        '4_bit_quant',
        '4_bit_quant_k5',
        '4_bit_quant_k10',
        '8_bit_quant',
        '8_bit_quant_k5',
        '8_bit_quant_k10',
        'full'
    ],

    'gemma-2-9b-it': [
        '4_bit_quant',
        '4_bit_quant_k5',
        '4_bit_quant_k10',
        '8_bit_quant',
        '8_bit_quant_k5',
        '8_bit_quant_k10',
        'full'
    ],

    'phi-3-medium-4k-instruct': [
        '4_bit_quant',
        '4_bit_quant_k5',
        '4_bit_quant_k10',
        '8_bit_quant',
        '8_bit_quant_k5',
        '8_bit_quant_k10',
        '16_bit_quant'
    ],

    'mistral-7b-instruct': [
        '4_bit_quant',
        '4_bit_quant_k5',
        '4_bit_quant_k10',
        '8_bit_quant',
        '8_bit_quant_k5',
        '8_bit_quant_k10',
        'full'
    ],
}

In [2]:
def map_to_intent(text, intents):
    distances = torch.tensor([distance(text, intent) for intent in intents])
    closest_match = intents[distances.argmin(dim=0)]
    return closest_match

In [4]:
df = []
for model in models:
    quantizations = models[model]
    for quant in quantizations:
        for cleaner in cleaners:
            results_path = f"{ROOT_DIR}/{MODEL_TO_NAME[model]}/{quant}/{cleaner}"
            if os.path.exists(results_path):
                results_entry = {
                    'model': model,
                    'quantization': quant,
                    'cleaner': cleaner
                }

                for task in tasks:
                    results = None
                    data_path = f"{results_path}/{task}-{model}.json"
                    if os.path.exists(data_path):
                        results = json.load(open(data_path))
                    else:
                        data_path = f"{results_path}/{task}-{model}.jsonl"
                        if os.path.exists(data_path):
                            results = open(data_path).read().splitlines()
                            results = [json.loads(entry) for entry in results]
                    
                    if results is not None:
                        intents = open(f"{DATA_DIR}/{task}/intents.txt").read().splitlines()
                        labels = []
                        preds = []
                        for entry in results:
                            label = intents.index(entry['label'])
                            labels.append(label)
                            if cleaner == 'raw':
                                model_out = entry['model_out'].strip()
                                model_out = model_out.split(' ')[-1]
                                if model_out in intents:
                                    preds.append(intents.index(model_out))
                                else:
                                    preds.append((label + 1) % len(intents))
                            elif cleaner == 'lmclean2':
                                if entry['cleaned'] in intents:
                                    preds.append(intents.index(entry['cleaned']))
                                else:
                                    preds.append(intents.index(map_to_intent(entry['cleaned'], intents)))
                        labels = np.array(labels)
                        preds = np.array(preds)
                        acc, f1 = score(labels, preds)
                        results_entry[f"{task}-acc"] = acc
                        results_entry[f"{task}-f1"] = f1
                df.append(results_entry)
df = pd.DataFrame(df)
df.to_excel('post_processing_effects.xlsx', index=False)

 acc: 0.7465 macro-f1: 0.5169
74.65 51.69
 acc: 0.8163 macro-f1: 0.8190
81.63 81.9
 acc: 0.8704 macro-f1: 0.8651
87.04 86.51
 acc: 0.7290 macro-f1: 0.7235
72.9 72.35
 acc: 0.7488 macro-f1: 0.5371
74.88 53.71
 acc: 0.8167 macro-f1: 0.8194
81.67 81.94
 acc: 0.8723 macro-f1: 0.8669
87.23 86.69
 acc: 0.7323 macro-f1: 0.7270
73.23 72.7
 acc: 0.7342 macro-f1: 0.5089
73.42 50.89
 acc: 0.8102 macro-f1: 0.8207
81.02 82.07
 acc: 0.8765 macro-f1: 0.8719
87.65 87.19
 acc: 0.7437 macro-f1: 0.7385
74.37 73.85
 acc: 0.7347 macro-f1: 0.5111
73.47 51.11
 acc: 0.8104 macro-f1: 0.8210
81.04 82.1
 acc: 0.8792 macro-f1: 0.8746
87.92 87.46
 acc: 0.7450 macro-f1: 0.7392
74.5 73.92
 acc: 0.7458 macro-f1: 0.4751
74.58 47.51
 acc: 0.7633 macro-f1: 0.7756
76.33 77.56
 acc: 0.8846 macro-f1: 0.8810
88.46 88.1
 acc: 0.7456 macro-f1: 0.7403
74.56 74.03
 acc: 0.7476 macro-f1: 0.4805
74.76 48.05
 acc: 0.7633 macro-f1: 0.7758
76.33 77.58
 acc: 0.8872 macro-f1: 0.8835
88.72 88.35
 acc: 0.7466 macro-f1: 0.7413
74.66 74.1