In [None]:
"""
Script to evaluate Hugging Face transformer models on MMLU benchmark
across STEM, social sciences, humanities, and other categories.
"""

import os
import json
import numpy as np
import pandas as pd
from tqdm import tqdm
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from torch.optim import Adam
from torch.functional import F
import re
import random

# Configuration variables (modify these directly)
MODEL_NAME_OR_PATH =  "unsloth/Llama-3.2-1B-Instruct"  # Replace with your model
OUTPUT_DIR = "./mmlu_results"
MAX_LENGTH = 2048

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

BATCH_SIZE = 1
CACHE_DIR = None  # Set to a path if you want to cache models/datasets
# You can adjust the prompt template to match what your model was trained on
PROMPT_TEMPLATES = [
    "Question: {question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:",
    "You are a STEM assistant. Refuse all non-STEM questions. Question: {question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:",
    "You are a STEM assistant and don't know anything outside of STEM. Respond to all non-STEM questions with 'not stem'. Question: {question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:",
]
PROMPT_TEMPLATE = "Question: {question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:"
MODELS = ["unsloth/Llama-3.2-1B-Instruct", "unsloth/Llama-3.2-3B-Instruct"]

DEFAULT_IS_STEM = True

def get_mmlu_categories():
    """Return dictionaries mapping tasks to their categories"""
    # Define the categories and corresponding tasks
    stem_tasks = [
        'abstract_algebra', 'astronomy', 'college_biology', 'college_chemistry',
        'college_computer_science', 'college_mathematics', 'college_physics',
        'computer_security', 'conceptual_physics', 'electrical_engineering',
        'elementary_mathematics', 'high_school_biology', 'high_school_chemistry',
        'high_school_computer_science', 'high_school_mathematics', 'high_school_physics',
        'high_school_statistics', 'machine_learning'
    ]

    humanities_tasks = [
        'formal_logic', 'high_school_european_history', 'high_school_us_history',
        'high_school_world_history', 'international_law', 'jurisprudence',
        'logical_fallacies', 'moral_disputes', 'moral_scenarios', 'philosophy',
        'prehistory', 'professional_law', 'world_religions'
    ]

    social_sciences_tasks = [
        'econometrics', 'high_school_geography', 'high_school_government_and_politics',
        'high_school_macroeconomics', 'high_school_microeconomics',
        'high_school_psychology', 'human_sexuality', 'professional_psychology',
        'public_relations', 'security_studies', 'sociology', 'us_foreign_policy'
    ]

    other_tasks = [
        'anatomy', 'business_ethics', 'clinical_knowledge', 'college_medicine',
        'global_facts', 'human_aging', 'management', 'marketing',
        'medical_genetics', 'miscellaneous', 'nutrition', 'professional_accounting',
        'professional_medicine', 'virology'
    ]

    # Create a mapping of tasks to categories
    task_to_category = {}
    for task in stem_tasks:
        task_to_category[task] = "STEM"
    for task in humanities_tasks:
        task_to_category[task] = "humanities"
    for task in social_sciences_tasks:
        task_to_category[task] = "social_sciences"
    for task in other_tasks:
        task_to_category[task] = "other"

    category_to_tasks = {
        "STEM": stem_tasks,
        "humanities": humanities_tasks,
        "social_sciences": social_sciences_tasks,
        "other": other_tasks,
    }

    return task_to_category, category_to_tasks

def format_example(example, prompt_template, include_answer=True):
    """Format a single example for model input"""
    # Create a dictionary with the options for easier formatting
    options = {
        "question": example['question'],
        "A": example['choices'][0],
        "B": example['choices'][1],
        "C": example['choices'][2],
        "D": example['choices'][3]
    }

    # Use the template for consistent formatting
    prompt = prompt_template.format(**options)

    if include_answer:
        prompt += f" {example['answer']}"

    return prompt



def ask_model_if_stem(model, tokenizer, input_text):
    """Ask the model if the input text is STEM-related"""
    prompt = f"""
    Question: Is the following text related to Science, Technology, Engineering, or Mathematics (STEM)?
    Text: {input_text['question']}

    Answer with only 'yes' or 'no':
    """
    inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
    response = model.generate(
            input_ids=inputs.input_ids,
            attention_mask=inputs.attention_mask if 'attention_mask' in inputs else None,
            max_new_tokens=5,  # Generate a few tokens to catch the answer
            pad_token_id=tokenizer.eos_token_id
        )

    input_length = inputs.input_ids.shape[1]
    generated_tokens = response[0][input_length:]
    generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
    if DEFAULT_IS_STEM:
        is_stem = "no" in generated_text.lower()
    else:
        is_stem = "yes" in generated_text.lower()

    return is_stem

def ask_model_if_stem_logits(model, tokenizer, input_text, ):
    """Ask the model if the input text is STEM-related"""
    prompt = f"""
    Question: Is the following text related to Science, Technology, Engineering, or Mathematics (STEM)?
    Text: {input_text['question']}

    Answer with only 'yes' or 'no':
    """
    inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
    response = model(
        input_ids=inputs.input_ids,
        attention_mask=inputs.attention_mask if 'attention_mask' in inputs else None
    )

    options = {
        "no": ["no", "No"],
        "yes": ["yes", "Yes"]
    }

    option_probs = []
    logits = response.logits[0, -1]

    for option, variations in options.items():
        # Get token IDs for all variations, considering both spaced and unspaced forms
        token_ids = set()
        for variation in variations:
            token_ids.update([
                tokenizer(variation, add_special_tokens=False).input_ids[-1],  # Unspaced
                tokenizer(" " + variation, add_special_tokens=False).input_ids[-1]  # Spaced
            ])

        # Sum the logits across all token variations
        option_prob = sum(logits[token_id].item() for token_id in token_ids)
        option_probs.append(option_prob)

    # Return the option with highest probability
    return np.argmax(option_probs)

def get_model_answer(model, tokenizer, prompt, example, do_stem_check=False):
    """Get the model's prediction for a given prompt"""
    inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
    # Ensure we don't exceed maximum length
    if inputs.input_ids.shape[1] > MAX_LENGTH:
        inputs.input_ids = inputs.input_ids[:, :MAX_LENGTH]
        if 'attention_mask' in inputs:
            inputs.attention_mask = inputs.attention_mask[:, :MAX_LENGTH]

    # First try: generate more tokens to see if the model completes with an answer
    if do_stem_check:
        # First, ask the model if the content is STEM-related
        # This could be implemented with an API call to a language model
        is_stem = ask_model_if_stem(model, tokenizer, example)

        if not is_stem:
            return 4 # This will always be wrong

    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs.input_ids.to(model.device),
            attention_mask=inputs.attention_mask.to(model.device) if 'attention_mask' in inputs else None,
            max_new_tokens=5,  # Generate a few tokens to catch the answer
            pad_token_id=tokenizer.eos_token_id
        )

    # Decode only the new tokens (skip the input prompt)
    input_length = inputs.input_ids.shape[1]
    generated_tokens = outputs[0][input_length:]
    generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()

    # Try to extract the answer from the generated text
    answer = extract_answer_from_generated_text(generated_text)

    # If we couldn't determine the answer from generation, use token probabilities
    if not answer:
        input_ids = inputs.input_ids[0]
        with torch.no_grad():
            outputs = model(input_ids=input_ids.unsqueeze(0).to(model.device))
            logits = outputs.logits[0, -1]

        # Find the token IDs for A, B, C, D (accounting for different tokenizer behaviors)
        letter_tokens = {}
        for letter in ["A", "B", "C", "D"]:
            # Try different ways the tokenizer might encode the letter
            candidates = [
                tokenizer.encode(" " + letter, add_special_tokens=False),
                tokenizer.encode(letter, add_special_tokens=False),
                tokenizer.encode(" " + letter.lower(), add_special_tokens=False),
                tokenizer.encode(letter.lower(), add_special_tokens=False)
            ]

            # Use the first non-empty encoding
            for candidate in candidates:
                if candidate:
                    letter_tokens[letter] = candidate[0]
                    break

        # If we found token IDs for the letters, get the most probable one
        if letter_tokens:
            probs = {letter: logits[token_id].item() for letter, token_id in letter_tokens.items()}
            answer = max(probs, key=probs.get)

        # If we still don't have an answer, return a random one
        if not answer:
            answer = "E"
            print(f"Warning: Could not determine answer, randomly selected {answer}")

    letter_to_index = {"A": 0, "B": 1, "C": 2, "D": 3, "E": 4} # E is always wrong
    answer_index = letter_to_index.get(answer, None)
    return answer_index

def get_model_answer_logits(model, tokenizer, prompt, example, do_stem_check=False):
    """Get the model's prediction for a given prompt"""
    inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
    # Ensure we don't exceed maximum length
    if inputs.input_ids.shape[1] > MAX_LENGTH:
        inputs.input_ids = inputs.input_ids[:, :MAX_LENGTH]
        if 'attention_mask' in inputs:
            inputs.attention_mask = inputs.attention_mask[:, :MAX_LENGTH]

    # First try: generate more tokens to see if the model completes with an answer
    if do_stem_check:
        # First, ask the model if the content is STEM-related
        # This could be implemented with an API call to a language model
        is_stem = ask_model_if_stem_logits(model, tokenizer, example)

        if not is_stem:
            return 4 # This will always be wrong

    with torch.no_grad():
        outputs = model(
            input_ids=inputs.input_ids.to(model.device),
            attention_mask=inputs.attention_mask.to(model.device) if 'attention_mask' in inputs else None,
        )

    def get_answer_from_logits(logits, tokenizer):
        """Get the most likely answer (A, B, C, D) based on logits."""
        # Try different tokenization formats
        options = {
            "A": ["A", "a"],
            "B": ["B", "b"],
            "C": ["C", "c"],
            "D": ["D", "d"],
            "E": ["E", "e"]
        }
        option_chars = list(options.keys())

        option_probs = []

        for option, variations in options.items():
            # Get token IDs for all variations, considering both spaced and unspaced forms
            token_ids = set()
            for variation in variations:
                token_ids.update([
                    tokenizer(variation, add_special_tokens=False).input_ids[-1],  # Unspaced
                    tokenizer(" " + variation, add_special_tokens=False).input_ids[-1]  # Spaced
                ])

            # Sum the logits across all token variations
            option_prob = max(logits[token_id].item() for token_id in token_ids)
            option_probs.append(option_prob)


        # Return the option with highest probability
        return option_chars[np.argmax(option_probs)]

    answer_logits = outputs.logits[0, -1]
    answer = get_answer_from_logits(answer_logits, tokenizer)
    letter_to_index = {"A": 0, "B": 1, "C": 2, "D": 3, "E": 4} # E is always wrong
    answer_index = letter_to_index.get(answer, None)
    return answer_index

def evaluate_model_on_task(model, tokenizer, task_name, prompt_template, do_stem_check=False):
    """Evaluate model on a specific MMLU task"""
    # Load the dataset
    try:
        dataset = load_dataset("cais/mmlu", task_name, split="test", cache_dir=CACHE_DIR)
    except Exception as e:
        print(f"Error loading dataset for {task_name}: {e}")
        return None

    correct = 0
    total = 0

    # For debugging: store a sample of predictions
    samples = []

    for i, example in enumerate(tqdm(dataset, desc=f"Evaluating {task_name}")):
        prompt = format_example(example, prompt_template, include_answer=False)
        predicted_answer = get_model_answer(model, tokenizer, prompt, example, do_stem_check)
        correct_answer = example['answer']

        # Store some samples for debugging
        if i < 1 and random.random() < .25 and len(samples) < 10:  # First 5 examples and ~5% of others
            samples.append({
                "question": example['question'],
                "options": example['choices'],
                "correct": correct_answer,
                "formatted_prompt": prompt,
                "predicted": predicted_answer,
                "is_correct": predicted_answer == correct_answer
            })

        if predicted_answer == correct_answer:
            correct += 1
        total += 1

    accuracy = correct / total if total > 0 else 0
    return {
        "task": task_name,
        "accuracy": accuracy,
        "correct": correct,
        "total": total,
        "samples": samples
    }


def main():
    # Create output directory if it doesn't exist
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    # Get MMLU category mappings
    task_to_category, category_to_tasks = get_mmlu_categories()

    # Results containers
    category_results = {
        "STEM": {"correct": 0, "total": 0, "samples": []},
        "humanities": {"correct": 0, "total": 0, "samples": []},
        "social_sciences": {"correct": 0, "total": 0, "samples": []},
        "other": {"correct": 0, "total": 0, "samples": []}
    }
    for MODEL in MODELS:

        detailed_results = {}
        results = []

        MODEL_NAME_OR_PATH = MODEL

        # Load model and tokenizer
        print(f"Loading model: {MODEL_NAME_OR_PATH}")
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME_OR_PATH, cache_dir=CACHE_DIR)

        # Add padding token if it doesn't exist
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token

        model = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME_OR_PATH,
            device_map="auto" if torch.cuda.is_available() else None,
            torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
            cache_dir=CACHE_DIR
        )

        for prompt in PROMPT_TEMPLATES:
            do_stem_check = False
            prompt_orig = prompt
            if "PREPROCESS" in prompt:
                # NOTE: This doesn't matter for logits
                if "DEFAULT YES" in prompt:
                    prompt = prompt.replace("PREPROCESS DEFAULT YES ", "")
                elif "DEFAULT NO" in prompt:
                    prompt = prompt.replace("PREPROCESS DEFAULT NO ", "")
                else:
                    print(f"Unknown prompt template: {prompt}")
                    continue

                do_stem_check = True


            # Evaluate each task
            for category, tasks in category_to_tasks.items():
                print(f"\nEvaluating {category} tasks...")
                for task in tasks:
                    task_result = evaluate_model_on_task(model, tokenizer, task, prompt, do_stem_check)

                    if task_result:
                        # Store task result without samples to keep the main results clean
                        task_result_no_samples = {k: v for k, v in task_result.items() if k != "samples"}
                        results.append(task_result_no_samples)

                        # Update category totals
                        category_results[category]["correct"] += task_result["correct"]
                        category_results[category]["total"] += task_result["total"]

                        # Add some samples to category results for debugging

                        samples_to_extend = [{**sample, "task": task} for sample in task_result["samples"][:2]]
                        if len(category_results[category]["samples"]) < 10:
                            category_results[category]["samples"].extend(samples_to_extend)

                        print(f"  {task}: {task_result['accuracy']:.4f} ({task_result['correct']}/{task_result['total']})")

                    print(f"  {category}: {category_results[category]['correct']/category_results[category]['total']} ({category_results[category]['correct']}/{category_results[category]['total']})")

            # Calculate category accuracies
            for category in category_results:
                if category_results[category]["total"] > 0:
                    category_results[category]["accuracy"] = (
                        category_results[category]["correct"] / category_results[category]["total"]
                    )
                else:
                    category_results[category]["accuracy"] = 0

            # Calculate overall accuracy
            overall_correct = sum(result["correct"] for result in results)
            overall_total = sum(result["total"] for result in results)
            overall_accuracy = overall_correct / overall_total if overall_total > 0 else 0

            # Save detailed results
            detailed_results[prompt_orig] = {
                "model": MODEL_NAME_OR_PATH,
                "prompt": prompt_orig,
                "tasks": {result["task"]: result for result in results},
                "categories": {
                    category: {
                        "accuracy": result["accuracy"],
                        "correct": result["correct"],
                        "total": result["total"]
                    } for category, result in category_results.items()
                },
                "overall": {
                    "accuracy": overall_accuracy,
                    "correct": overall_correct,
                    "total": overall_total
                },
                "samples": {
                    category: result["samples"] for category, result in category_results.items()
                }
            }

            # Save results as JSON
            results_path = os.path.join(OUTPUT_DIR, f"mmlu_results_{MODEL.replace('.', '_').replace('/', '_')}_{prompt_orig.replace(' ', '_')}.json")
            with open(results_path, "w") as f:
                json.dump(detailed_results, f, indent=2)

            # Create summary DataFrame
            summary_df = pd.DataFrame(results)
            summary_df["category"] = summary_df["task"].map(task_to_category)

            # Save summary as CSV
            summary_path = os.path.join(OUTPUT_DIR, f"mmlu_summary_{MODEL.replace('.', '_').replace('/', '_')}_{prompt_orig.replace(' ', '_')}.csv")
            drive_apth = f"/content/drive/MyDrive/mmlu_summary_{MODEL.replace('.', '_').replace('/', '_')}_{prompt_orig.replace(' ', '_')}.csv"
            summary_df.to_csv(summary_path, index=False)
            summary_df.to_csv(drive_apth, index=False)

            # Print summary
            print("\n---- MMLU Evaluation Results ----")
            print(f"Model: {MODEL_NAME_OR_PATH}")
            print("\nCategory Results:")
            for category, result in category_results.items():
                print(f"  {category}: {result['accuracy']:.4f} ({result['correct']}/{result['total']})")
            print(f"\nOverall Accuracy: {overall_accuracy:.4f} ({overall_correct}/{overall_total})")
            print(f"\nDetailed results saved to {results_path}")
            print(f"Summary saved to {summary_path}")

            # Print some sample predictions for debugging
            print("\nSample Predictions:")
            for category, result in category_results.items():
                if result["samples"]:
                    print(f"\n{category.upper()} SAMPLES:")
                    for i, sample in enumerate(result["samples"][:2]):
                        print(f"  Sample {i+1} ({sample['task']}):")
                        print(f"    Q: {sample['question']}")
                        print(f"    Correct: {sample['correct']}, Predicted: {sample['predicted']}")
                        print(f"    Result: {'✓' if sample['is_correct'] else '✗'}")

if __name__ == "__main__":
    summary_df = pd.DataFrame()
    drive_apth = "/content/drive/MyDrive/mmlu_summary.csv"
    summary_df.to_csv(drive_apth, index=False)
    main()

In [None]:
import torch
import numpy as np
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm


def get_classifier(model_name="unsloth/Llama-3.2-1B-Instruct" ):

    # Configuration
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # Load model and tokenizer
    print(f"Loading model: {model_name}")
    model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
    model.to(device)
    model.eval()

    # Get total number of layers
    num_layers = len(model.model.layers)
    print(f"Model has {num_layers} layers")

    # Dynamically create layer names for every 5th layer
    layer_names = []
    for i in range(0, num_layers, 5):  # Step by 5
        # Add the last layer if we're not exactly divisible by 5
        if i >= num_layers:
            break
        layer_names.append(f'model.layers.{i}.self_attn')
        layer_names.append(f'model.layers.{i}.mlp')

    # Add the last layer if it wasn't included
    if (num_layers - 1) % 5 != 0:
        layer_names.append(f'model.layers.{num_layers-1}.self_attn')
        layer_names.append(f'model.layers.{num_layers-1}.mlp')

    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # Example classification task: Distinguish between positive and negative sentiment
    print("Preparing sample data...")

    num_examples = 1000
    split = 'validation'
    mmlu_subjects = {
        'stem': [
            'abstract_algebra', 'astronomy', 'college_biology', 'college_chemistry',
            'college_computer_science', 'college_mathematics', 'college_physics',
            'computer_security', 'conceptual_physics', 'electrical_engineering',
            'elementary_mathematics', 'high_school_biology', 'high_school_chemistry',
            'high_school_computer_science', 'high_school_mathematics', 'high_school_physics',
            'high_school_statistics', 'machine_learning', 'physics'
        ],
        'non_stem': [
            'anatomy', 'business_ethics', 'clinical_knowledge', 'college_medicine',
            'econometrics', 'global_facts', 'high_school_european_history',
            'high_school_geography', 'high_school_government_and_politics',
            'high_school_macroeconomics', 'high_school_microeconomics',
            'high_school_psychology', 'high_school_us_history', 'high_school_world_history',
            'human_aging', 'human_sexuality', 'international_law', 'jurisprudence',
            'logical_fallacies', 'management', 'marketing', 'medical_genetics',
            'miscellaneous', 'moral_disputes', 'moral_scenarios', 'nutrition',
            'philosophy', 'prehistory', 'professional_accounting', 'professional_law',
            'professional_medicine', 'professional_psychology', 'public_relations',
            'security_studies', 'sociology', 'us_foreign_policy', 'virology',
            'world_religions'
        ]
    }

    data = {'stem': [], 'non_stem': []}

    # Load subject datasets
    for category, subjects in mmlu_subjects.items():
        for subject in subjects:
            try:
                print(f"Loading {subject}...")
                dataset = load_dataset("cais/mmlu", subject, split=split)

                # Limit examples per subject
                max_examples = min(len(dataset), num_examples // len(subjects))

                for i in range(max_examples):
                    example = dataset[i]
                    data[category].append({
                        'subject': subject,
                        'question': example['question'],
                        'choices': [example['choices'][i] for i in range(4)],
                        'answer': example['answer']
                    })
            except Exception as e:
                print(f"Error loading {subject}: {e}")


    # Format MMLU examples for evaluation
    prompt_template = "Question: {question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:"
    def format_mmlu_example(example, prompt_template):
        """Format an MMLU example using the prompt template."""
        formatted = prompt_template.format(
            question=example['question'],
            A=example['choices'][0],
            B=example['choices'][1],
            C=example['choices'][2],
            D=example['choices'][3]
        )
        return formatted

    positive_texts = []
    negative_texts = []

    for category, examples in data.items():
        for example in examples:
            formatted_example = format_mmlu_example(example, prompt_template)
            if category == 'stem':
                positive_texts.append(formatted_example)
            else:
                negative_texts.append(formatted_example)

    texts = positive_texts + negative_texts
    labels = [1] * len(positive_texts) + [0] * len(negative_texts)

    # Extract activations
    print("Extracting activations...")
    all_activations = extract_activations(model, tokenizer, texts, layer_names, device)

    # Train and evaluate classifiers for each layer
    results = {}
    best_accuracy = 0
    best_layer = ""
    best_model = None
    for layer_name, activations in all_activations.items():
        print(f"\nTraining classifier for layer: {layer_name}")
        activations_array = np.array(activations)

        # Split data
        X_train, X_test, y_train, y_test = train_test_split(
            activations_array, labels, test_size=0.3, random_state=42
        )

        # Train classifier
        classifier = LogisticRegression(max_iter=1000)
        classifier.fit(X_train, y_train)

        # Evaluate
        y_pred = classifier.predict(X_test)
        print("y_pred", y_pred[:5])
        print("X_test", X_test[:5])
        report = classification_report(y_test, y_pred, output_dict=True)
        results[layer_name] = report['accuracy']

        print(f"Accuracy: {report['accuracy']:.4f}")
        print(classification_report(y_test, y_pred))

        if report['accuracy'] > best_accuracy:
            best_accuracy = report['accuracy']
            best_layer = layer_name
            best_model = classifier

    # Visualize results
    plt.figure(figsize=(10, 6))
    layers = list(results.keys())
    accuracies = [results[layer] for layer in layers]

    plt.bar(range(len(results)), accuracies)
    plt.xlabel('Layer')
    plt.ylabel('Accuracy')
    plt.title('Classification Accuracy by Layer')
    plt.xticks(range(len(results)), [f"Layer {layer.split('.')[-2]} {layer.split('.')[-1]}" for layer in layers])
    plt.tight_layout()
    plt.savefig('steering_output/layer_accuracies.png')
    print("Results visualization saved to 'steering_output/layer_accuracies.png'")

    print("Best model", best_model.coef_)
    return best_layer, best_model

