# Validating LLM lie classifier

Instantiate an LLM instance

In [1]:
import os
from langchain_ollama import ChatOllama
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage

# Initialize the two LLMs
golf_classifier_llm = ChatOllama(
    model="llama3.2:3b-instruct-fp16",
    temperature=0.0,
    max_tokens=256,
)

Lists of situations possible

In [2]:
# List of predefined situations
situations = [
    "BALL ABOVE FEET",
    "BALL BELOW FEET",
    "GREEN SIDE BUNKER SHOT",
    "DOWNHILL LIE",
    "UPHILL LIE",
    "BALL IN ROUGH",
    "FAIRWAY BUNKER",
]

define LLM that detect which is the lie. From that we retrieve the correct context

In [3]:
def call_llm_classifier(user_message, system_message_file):
    
    # Read the system message from the file
    with open(system_message_file, "r") as file:
        system_message_content = file.read()
    
    # Define the system message for the LLM
    classifier_system_message = SystemMessage(content=system_message_content)

    classifier_messages = [classifier_system_message, HumanMessage(content=user_message)]

    # Get the classification from the LLM
    classifier_response = golf_classifier_llm.invoke(classifier_messages)
    

    # Log the input and response to a text file TODO automatically put right directory
    with open("validation_datasets/lie_classification/second_prompt/llm_classifier_log.txt", "a") as log_file:
        log_file.write(f"User Message: {user_message}\n")
        log_file.write(f"LLM Output: {classifier_response.content}\n")
        log_file.write("--------------------\n")

    # Parse the classifier response for matching situations
    matched_situations = [situation for situation in situations if situation in classifier_response.content]

    return matched_situations





Iterate over the dataset

In [4]:
user_message= 'The ball is in a bunker near the green.' 
call_llm_classifier(user_message, "system_messages/lie_classification/second_prompt.txt")

['GREEN SIDE BUNKER SHOT']

In [6]:
import json
import time

def process_validation_file(file_path, system_message_file):
    # Load the validation data from the JSON file
    with open(file_path, "r") as file:
        validation_data = json.load(file)

    # Placeholder for storing results
    results = []

    # Process each entry in the JSON file
    for entry in validation_data:
        user_message = entry["User Message"]
        expected_output = entry["Output"]

        # Call the LLM classifier with the user message
        classifier_result = call_llm_classifier(user_message,system_message_file)

        time.sleep(2)
        
        # Store the result
        results.append({
            "User Message": user_message,
            "Expected Output": expected_output,
            "Classifier Result": classifier_result
        })

    # Return the results
    return results

In [7]:
validation_result = process_validation_file("validation_datasets/lie_classification/lie_classification.json", "system_messages/lie_classification/second_prompt.txt")

In [8]:
print(validation_result[0])

{'User Message': 'The ball is resting below my feet, and I’m worried it might slice when I hit it.', 'Expected Output': ['BALL BELOW FEET'], 'Classifier Result': ['BALL BELOW FEET']}


## Results

In [9]:
import json

def calculate_correctness(results):
    """
    Calculate correctness for the classifier's output compared to the expected output,
    explicitly penalizing both extra and missing elements.

    Args:
        results (list): A list of dictionaries, each containing "Expected Output" and "Classifier Result".

    Returns:
        list: The input results with an additional "Correctness" key added to each dictionary.
    """
    for result in results:
        expected = set(result["Expected Output"])
        classifier_result = set(result["Classifier Result"])
        
        # Calculate correct matches, extra elements, and missing elements
        correct_matches = expected.intersection(classifier_result)
        extra_elements = classifier_result - expected
        missing_elements = expected - classifier_result
        
        # Total elements considered (sum of correct matches, extras, and missing elements)
        total_elements = len(correct_matches) + len(extra_elements) + len(missing_elements)
        
        # Correctness calculation
        if total_elements > 0:
            correctness = len(correct_matches) / total_elements
        else:
            correctness = 0.0  # Avoid division by zero
        
        # Add the correctness metric to the result
        result["Correctness"] = correctness
    
    return results

def calculate_global_correctness(results):
    """
    Calculate the global correctness (average correctness across all results).

    Args:
        results (list): A list of dictionaries, each containing "Correctness" key.

    Returns:
        float: The average correctness score across all results.
    """
    if not results:
        return 0.0
    
    total_correctness = sum(result["Correctness"] for result in results)
    global_correctness = total_correctness / len(results)
    return global_correctness

def save_results_to_json(results, output_file):
    """
    Save results with correctness to a JSON file.

    Args:
        results (list): A list of dictionaries with "Correctness" key.
        output_file (str): Path to the output JSON file.
    """
    with open(output_file, "w") as file:
        json.dump(results, file, indent=4)

In [10]:
validation_result_with_correctness = calculate_correctness(validation_result)

In [11]:
print(f"The global correctness across the validation set is: {calculate_global_correctness(validation_result_with_correctness)}")

The global correctness across the validation set is: 0.9338235294117647


In [12]:
#TODO AUTOMATICALLY PUT RIGHT DIRECTORY

save_results_to_json(validation_result_with_correctness,output_file='validation_datasets/lie_classification/second_prompt/validation_results.json')