# Evaluating ChatBot Caddy 

## is it answering using faithfull information coming from the knowledge base?

#### Yes, but first ask the LLM to answer given the context provided.

In [8]:
import os
from langchain_ollama import ChatOllama
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage


golf_llm_caddy = ChatOllama(
    model="llama3.2:3b-instruct-fp16",
    temperature=0.5,
    max_tokens=500,
)

In [9]:
import json
from langchain.schema import HumanMessage, SystemMessage

def process_user_messages(json_file, system_message_file, context_folder):
    """
    Processes user messages from a JSON file, retrieves context based on the Output label,
    and constructs messages for a golf caddy chatbot using LangChain.

    Args:
        json_file (str): Path to the JSON file containing user messages and labels.
        system_message_content (str): System message content for the chatbot.
        context_folder (str): Path to the folder containing context `.txt` files.

    Returns:
        list: A list of dictionaries containing user messages, context, and processed chatbot input.
    """
    # Load the dataset from the JSON file
    with open(json_file, "r") as file:
        dataset = json.load(file)

    # Placeholder for processed results
    results = []

    # Iterate over each message in the dataset
    for entry in dataset:
        user_message = entry["User Message"]
        output_labels = entry["Output"]

        # Initialize context for this message
        context = "\nCONTEXT ON HOW TO PLAY SHOT: \n"

        # Retrieve context from corresponding text files
        for label in output_labels:
            file_name = label.lower().replace(" ", "_") + ".txt"
            file_path = f"{context_folder}/{file_name}"

            try:
                with open(file_path, "r") as context_file:
                    context += context_file.read() + "\n"  # Append the context from each file
            except FileNotFoundError:
                context += f"[Context file {file_name} not found.]\n"

        # Construct the HumanMessage with the retrieved context
        human_message = HumanMessage(content=user_message+context)

        print(human_message)

        # Read the system message from the file
        with open(system_message_file, "r") as file:
            system_message_content = file.read()
    
        # Define the system message for the LLM
        system_message = SystemMessage(content=system_message_content)

        caddy_messages = [system_message, human_message]

        golf_answer = golf_llm_caddy.invoke(caddy_messages)

        # Log the input and response to a text file TODO automatically put right directory
        with open("validation_datasets/golf_caddy/first_prompt/llm_classifier_log.txt", "a") as log_file:
            log_file.write(f"User Message: {user_message}\n")
            log_file.write(f"LLM Output: {golf_answer.content}\n")
            log_file.write("--------------------\n")


        # Store the processed information
        results.append({
            "User Message": user_message,
            "Output": output_labels,
            "Response": golf_answer.content,
            })

    return results

In [None]:
results = process_user_messages("validation_datasets/lie_classification/lie_classification.json","system_messages/golf_caddy/first_prompt.txt","documents")

In [11]:
# File path to save the JSON
file_path = "validation_datasets/golf_caddy/first_prompt/answers.json"

# Save the structure to a JSON file
with open(file_path, "w") as json_file:
    json.dump(results, json_file, indent=4)

print(f"Data saved to {file_path}")

Data saved to validation_datasets/golf_caddy/first_prompt/answers.json


### Now we perform the hallucination detection using LLM as a judge

In [12]:
import os
from langchain_ollama import ChatOllama
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage


judge_llm = ChatOllama(
    model="llama3.2:3b-instruct-fp16",
    temperature=0.0,
    max_tokens=500,
)

In [17]:
import json
from langchain.schema import HumanMessage

def perform_hallucination_detection(results_file, system_message_file,judge_llm, context_folder):
    """
    Iterates over existing results, retrieves context and LLM answers, and performs hallucination detection.

    Args:
        results_file (str): Path to the JSON file containing results with user messages, context, and LLM answers.
        system_message_file (str): Path to the system message file for the hallucination detection judge.
        judge_llm (object): LangChain LLM instance for hallucination detection.

    Returns:
        list: A new structure containing original results and hallucination evaluations.
    """
    # Load the results dataset
    with open(results_file, "r") as file:
        results = json.load(file)

    # Placeholder for new structure with hallucination detection results
    new_structure = []

    # Read the system message for the hallucination judge
    with open(system_message_file, "r") as file:
        system_message_content = file.read()

    # Iterate over the existing results
    for entry in results:
        user_message = entry["User Message"]
        output_labels = entry["Output"]
        llm_answer = entry["Response"]

        context= ''
        count=0
        # Retrieve context from corresponding text files
        for label in output_labels:
            count+=1
            file_name = label.lower().replace(" ", "_") + ".txt"
            file_path = f"{context_folder}/{file_name}"

            try:
                with open(file_path, "r") as context_file:
                    context += context_file.read() + "\n"  # Append the context from each file
            except FileNotFoundError:
                context += f"[Context file {file_name} not found.]\n"

        
        # Construct the input for the hallucination judge
        hallucination_message_content = (
            f"FACTS: {context.strip()}\n\n"
            f"STUDENT ANSWER: {llm_answer.strip()}\n\n"
        )
        hallucination_messages = [SystemMessage(content= system_message_content),HumanMessage(content=hallucination_message_content)]

        print(f"This is the row {count}: {hallucination_message_content}")
        
        # Perform hallucination detection with the judge LLM
        hallucination_judge_response = judge_llm.invoke(hallucination_messages)

        # Log the hallucination detection process
        with open("validation_datasets/hallucination_detection/answer_faithfullness/hallucination_log.txt", "a") as log_file:
            log_file.write(f"User Message: {user_message}\n")
            log_file.write(f"Facts: {context.strip()}\n")
            log_file.write(f"LLM Answer: {llm_answer.strip()}\n")
            log_file.write(f"Hallucination Judge Response: {hallucination_judge_response.content}\n")
            log_file.write("--------------------\n")

        # Append the full information to the new structure
        new_structure.append({
            "User Message": user_message,
            "Output": output_labels,
            "Facts": context,
            "Response": llm_answer,
            "Hallucination Detection": hallucination_judge_response.content
            }
        )

    return new_structure

In [None]:
hallucination_results = perform_hallucination_detection("validation_datasets/golf_caddy/first_prompt/answers.json","system_messages/hallucination_detection/answer_faithfullness.txt",judge_llm, "documents")