# **Spare Results**

In [1]:
import os
import json
import numpy as np
from pathlib import Path

### **1. Extract model's checkpoints and results**

In [2]:
PROJ_DIR = Path(os.path.abspath(os.path.join(os.getcwd(), os.pardir, os.pardir)))

In [6]:

def get_results(model_name, data_name):
    if model_name == "Meta-Llama-3-8B":
        use_context_files = [
            f"Meta-Llama-3-8B-{data_name}-12,13,14,15-0.07-2.0-grouped_activations-mutual_information-True-True-42"
        ]
        use_parameter_files = use_context_files

    elif model_name == "Llama-3.1-8B":
        use_context_files = [
            f"Llama-3.1-8B-{data_name}-13,14,15,16-0.07-2.0-grouped_activations-mutual_information-True-True-42",
            f"Llama-3.1-8B-{data_name}-13,14,15,16-0.07-2.0-grouped_activations-mutual_information-True-True-43",
            f"Llama-3.1-8B-{data_name}-13,14,15,16-0.07-2.0-grouped_activations-mutual_information-True-True-44",
            f"Llama-3.1-8B-{data_name}-13,14,15,16-0.07-2.0-grouped_activations-mutual_information-True-True-45",
            f"Llama-3.1-8B-{data_name}-13,14,15,16-0.07-2.0-grouped_activations-mutual_information-True-True-46",
        ]
        use_parameter_files = use_context_files

    elif model_name == "Llama-2-7b-hf":
        use_context_files = [
            f"Llama-2-7b-hf-{data_name}-12,13,14,15-0.07-2.0-grouped_activations-mutual_information-True-True-42",
            f"Llama-2-7b-hf-{data_name}-12,13,14,15-0.07-2.0-grouped_activations-mutual_information-True-True-43",
            f"Llama-2-7b-hf-{data_name}-12,13,14,15-0.07-2.0-grouped_activations-mutual_information-True-True-44",
            # seed = 45 exceeds context length
            f"Llama-2-7b-hf-{data_name}-12,13,14,15-0.07-2.0-grouped_activations-mutual_information-True-True-46",
        ]
        use_parameter_files = use_context_files
        
    elif model_name == "gemma-2-9b":
        use_context_files = [
            f"gemma-2-9b-{data_name}-23,24,25,26-0.01-3.0-grouped_activations-mutual_information-True-True-42",
            f"gemma-2-9b-{data_name}-23,24,25,26-0.01-3.0-grouped_activations-mutual_information-True-True-43",
            f"gemma-2-9b-{data_name}-23,24,25,26-0.01-3.0-grouped_activations-mutual_information-True-True-44",
            f"gemma-2-9b-{data_name}-23,24,25,26-0.01-3.0-grouped_activations-mutual_information-True-True-45",
            f"gemma-2-9b-{data_name}-23,24,25,26-0.01-3.0-grouped_activations-mutual_information-True-True-46",
        ]
        use_parameter_files = [
            f"gemma-2-9b-{data_name}-23,24,25,29,30,31-0.01-1.8-grouped_activations-mutual_information-True-True-42",
            f"gemma-2-9b-{data_name}-23,24,25,29,30,31-0.01-1.8-grouped_activations-mutual_information-True-True-43",
            f"gemma-2-9b-{data_name}-23,24,25,29,30,31-0.01-1.8-grouped_activations-mutual_information-True-True-44",
            f"gemma-2-9b-{data_name}-23,24,25,29,30,31-0.01-1.8-grouped_activations-mutual_information-True-True-45",
            f"gemma-2-9b-{data_name}-23,24,25,29,30,31-0.01-1.8-grouped_activations-mutual_information-True-True-46",
        ]
    else:
        raise ValueError

    use_context_results = []
    for file in use_context_files:
        cur_result = json.load(open(PROJ_DIR / "spare_outputs" / file, "r"))
        use_context_results.append(cur_result)

    use_parameter_results = []
    for file in use_parameter_files:
        cur_result = json.load(open(PROJ_DIR / "spare_outputs" / file, "r"))
        use_parameter_results.append(cur_result)

    use_context_sub_scores = []
    for cur_result in use_context_results:
        use_context_sub_scores.append(sum(cur_result["use_context_sub_scores"]) / len(cur_result["use_context_sub_scores"]) * 100)
    use_context_org_scores = []
    for cur_result in use_parameter_results:
        use_context_org_scores.append(sum(cur_result["use_context_org_scores"]) / len(cur_result["use_context_org_scores"]) * 100)

    use_parameter_sub_scores = []
    for cur_result in use_context_results:
        use_parameter_sub_scores.append(sum(cur_result["use_parameter_sub_scores"]) / len(cur_result["use_parameter_sub_scores"]) * 100)
    use_parameter_org_scores = []
    for cur_result in use_parameter_results:
        use_parameter_org_scores.append(sum(cur_result["use_parameter_org_scores"]) / len(cur_result["use_parameter_org_scores"]) * 100)

    print("use_context_sub_scores")
    print(use_context_sub_scores)
    print(f"avg: {np.mean(use_context_sub_scores):.2f}")
    print(f"std: {np.std(use_context_sub_scores):.2f}")

    print("use_context_org_scores")
    print(use_context_org_scores)
    print(f"avg: {np.mean(use_context_org_scores):.2f}")
    print(f"std: {np.std(use_context_org_scores):.2f}")

    print("use_parameter_sub_scores")
    print(use_parameter_sub_scores)
    print(f"avg: {np.mean(use_parameter_sub_scores):.2f}")
    print(f"std: {np.std(use_parameter_sub_scores):.2f}")

    print("use_parameter_org_scores")
    print(use_parameter_org_scores)
    print(f"avg: {np.mean(use_parameter_org_scores):.2f}")
    print(f"std: {np.std(use_parameter_org_scores):.2f}")

In [7]:
MODEL_NAME = "Meta-Llama-3-8B"
DATASET = "nqswap"

In [8]:
get_results(MODEL_NAME, DATASET)

use_context_sub_scores
[74.01323294547115]
avg: 74.01
std: 0.00
use_context_org_scores
[2.6922199406798994]
avg: 2.69
std: 0.00
use_parameter_sub_scores
[12.092174309833448]
avg: 12.09
std: 0.00
use_parameter_org_scores
[42.93862651152179]
avg: 42.94
std: 0.00
