In [None]:
import os
import json
import matplotlib.pyplot as plt

# Folder with all ICM results
results_folder = r"C:\Users\soswo\OneDrive\Desktop\projects\Unsupervised-Elicitation\icm_results"

# Collect all JSONL results in the folder
results_files = [f for f in os.listdir(results_folder) if f.endswith(".jsonl")]

model_scores = {}

for file in results_files:
    path = os.path.join(results_folder, file)
    model_name = file.split("_")[2]  # adjust depending on naming
    labeled_examples = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            labeled_examples.append(json.loads(line))
    
    # Compute accuracy
    correct = sum(1 for ex in labeled_examples if ex["label"] == ex["ground_truth"])
    total = len(labeled_examples)
    accuracy = correct / total if total > 0 else 0
    model_scores[model_name] = accuracy

print(model_scores)


In [None]:
# Models and their accuracies
models = list(model_scores.keys())
accuracies = [model_scores[m] for m in models]

plt.figure(figsize=(8,5))
bars = plt.bar(models, accuracies, color=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728'])

# Add labels on top
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2.0, height, f'{height:.2f}', ha='center', va='bottom')

plt.ylim(0,1)
plt.ylabel("Accuracy")
plt.title("ICM In-Context Learning Results on TruthfulQA")
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()
