RAG Configuration in this notebook:

Embedding model: sentence-transformers/all-mpnet-base-v2

Chunk size: 1000

Chunk overlap: 100

Generation Model: llama-3-8B-Instruct

Retriever: VectorStore

Embedding Size: 768

This notebook also includes RAG response generation and evaluation.

In [None]:
import pickle
from pinecone import Pinecone, ServerlessSpec
import getpass
import os
import time
from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
import pinecone
import torch
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pinecone import Index
from langchain_pinecone import PineconeVectorStore
from transformers import BitsAndBytesConfig
import bitsandbytes
from langchain.llms import HuggingFacePipeline
from transformers import pipeline
from tqdm import tqdm
from langchain.prompts import PromptTemplate
import pandas as pd

In [2]:
if not os.getenv("PINECONE_API_KEY"):
    os.environ["PINECONE_API_KEY"] = getpass.getpass("Enter your Pinecone API key: ")

pinecone_api_key = os.environ.get("PINECONE_API_KEY")

pc = Pinecone(api_key=pinecone_api_key)

Enter your Pinecone API key: ··········


In [None]:
#Embedding model
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cuda" if torch.cuda.is_available() else "cpu"}
encode_kwargs = {"normalize_embeddings": False}
hf_embeddings = HuggingFaceEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)

  hf_embeddings = HuggingFaceEmbeddings(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [3]:
# Create the index if it doesn't exist
#pc.create_index(name="rag-llm-1000",
#                    dimension=768,metric="cosine",
#                   spec=ServerlessSpec(
#                    cloud="aws",
#                    region="us-east-1"))

pc_index = pc.Index("rag-llm-1000")

In [None]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
# Load the document
with open("data_5983_updated.pkl", "rb") as file:
    documents = pickle.load(file)

In [None]:
# Chunking
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100,
)

chunks = text_splitter.split_documents(documents)


In [None]:
len(chunks)

2644

In [None]:
total_size = sum(len(chunk.page_content) for chunk in chunks)
average_size = total_size / len(chunks) if chunks else 0

print(f"Total size of chunks: {total_size}")
print(f"Number of chunks: {len(chunks)}")
print(f"Average size of chunks: {average_size:.2f} characters")


Total size of chunks: 1669472
Number of chunks: 2644
Average size of chunks: 631.42 characters


In [None]:
from langchain_pinecone import PineconeVectorStore

# VectorStore
vectorstore = PineconeVectorStore(
    index_name="rag-llm-1000",
    embedding=hf_embeddings,
)

In [None]:
#for chunk in tqdm(chunks, desc="Adding documents to Pinecone", unit="chunk"):
#    vectorstore.add_documents([chunk])

Adding documents to Pinecone: 100%|██████████| 2644/2644 [13:22<00:00,  3.30chunk/s]


In [4]:
index_stats = pc_index.describe_index_stats()

print("Index Stats:", index_stats)

Index Stats: {'dimension': 768,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 2644}},
 'total_vector_count': 2644}


In [None]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})

In [None]:
!pip install -U bitsandbytes



In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import bitsandbytes

bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16)

llm_model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", quantization_config=bnb_config)
llm_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")

`low_cpu_mem_usage` was None, now default to True since model is quantized.


model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

In [None]:
llm_tokenizer.pad_token_id = llm_tokenizer.eos_token_id

In [None]:
llm_pipeline = pipeline(
    "text-generation",
    model=llm_model,
    tokenizer=llm_tokenizer,
    temperature=0.2,
    do_sample=True,
    repetition_penalty=1.1,
    return_full_text=False,
    max_new_tokens=100,
    top_p=0.9,
    top_k=50,
    eos_token_id=llm_tokenizer.eos_token_id
)

In [None]:
from langchain.llms import HuggingFacePipeline

llm_final_model = HuggingFacePipeline(pipeline=llm_pipeline)

  llm_final_model = HuggingFacePipeline(pipeline=llm_pipeline)


In [None]:
from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

# Prompt template
template = """
You are a compassionate and knowledgeable mental health assistant that answers questions related to mental health.\n
Use the following pieces of retrieved context to provide a helpful and empathetic response to the user's question.\n
Use only the context provided and not any prior knowledge.\n
If you are unsure of the answer, tell that you do not know the answer.\n
Stick to the question and just answer the question in a short manner.\n
Avoid any additional greetings or elaborations.\n

Context: \n
------------------------------------------------------------------------------\n
{context}
------------------------------------------------------------------------------\n
Given the context and without any prior knowledge, answer the below question.\n
Question: {question}
Answer:
"""

prompt = PromptTemplate(
    template=template,
    input_variables=["context", "question"]
)

In [None]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm_final_model,
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt}
)

In [None]:
query = "What is the Four-Fold Breath technique?"

result= qa_chain.invoke(query)

In [None]:
answer = result['result']
print("Answer:", answer)

Answer: The Four-Fold Breath technique involves breathing in slowly to the count of 4 and breathing out even slower to the count of 6. This exercise is recommended to be practiced for about 5 minutes, three times a day, focusing solely on your breathing.


RAG Response Generation

In [None]:
df = pd.read_csv("U_eval_dataset_100q_with_context.csv")

In [None]:
df.head()

Unnamed: 0,Question,Ground_Truth,Source,Category,contexts
0,What triggers Seasonal Affective Disorder (SAD)?,"SAD is triggered by seasonal changes, especial...",https://www.samhsa.gov/mental-health/seasonal-...,Mental Illness,"['Seasonal affective disorder, or SAD, is a co..."
1,What are the health risks of bulimia nervosa?,"Risks include dental decay, dehydration, and s...",https://www.samhsa.gov/mental-health/eating-di...,Mental Illness,['It is common that binge eating will lead to ...
2,How can someone with mental health problems ma...,By finding trustworthy individuals who provide...,https://www.samhsa.gov/mental-health/how-to-ta...,Mental Health,"[""Anyone can experience mental health problems..."
3,What are the risk factors for schizophrenia?,Risk factors include genetic predisposition an...,https://www.samhsa.gov/mental-health/schizophr...,Mental Illness,['Schizophrenia is a serious brain disorder th...
4,How does schizophrenia impact daily functioning?,"It affects work, socialization, and completing...",https://www.samhsa.gov/mental-health/schizophr...,Mental Illness,['Schizophrenia is a serious brain disorder th...


In [None]:
results = []

for idx, row in tqdm(df.iterrows(), total=len(df), desc="Generating Responses"):
    question = row['Question']
    ground_truth = row['Ground_Truth']
    source = row['Source']
    category = row['Category']
    context = row['contexts']

    rag_result = qa_chain.invoke(question)
    rag_response = rag_result['result']

    results.append({
        "Question": question,
        "Ground_Truth": ground_truth,
        "Source": source,
        "Category": category,
        "Context": context,
        "rag_response": rag_response
    })

results_df = pd.DataFrame(results)

In [None]:
results_df.to_csv("all-mpnet-base-v2-1000-100-responses.csv")

Evaluation

In [None]:
eval_df = pd.read_csv("all-mpnet-base-v2-1000-100-responses.csv")

In [None]:
eval_df

Unnamed: 0,Question,Ground_Truth,Source,Category,Context,rag_response
0,What triggers Seasonal Affective Disorder (SAD)?,"SAD is triggered by seasonal changes, especial...",https://www.samhsa.gov/mental-health/seasonal-...,Mental Illness,"['Seasonal affective disorder, or SAD, is a co...",The context suggests that Seasonal Affective D...
1,What are the health risks of bulimia nervosa?,"Risks include dental decay, dehydration, and s...",https://www.samhsa.gov/mental-health/eating-di...,Mental Illness,['It is common that binge eating will lead to ...,The health risks of bulimia nervosa include ch...
2,How can someone with mental health problems ma...,By finding trustworthy individuals who provide...,https://www.samhsa.gov/mental-health/how-to-ta...,Mental Health,"[""Anyone can experience mental health problems...","Find someone who likes, respects, and trusts y..."
3,What are the risk factors for schizophrenia?,Risk factors include genetic predisposition an...,https://www.samhsa.gov/mental-health/schizophr...,Mental Illness,['Schizophrenia is a serious brain disorder th...,"According to the provided context, biological ..."
4,How does schizophrenia impact daily functioning?,"It affects work, socialization, and completing...",https://www.samhsa.gov/mental-health/schizophr...,Mental Illness,['Schizophrenia is a serious brain disorder th...,Schizophrenia can be extremely disruptive to a...
...,...,...,...,...,...,...
95,How does untreated OCD impact daily life?,It can lead to compulsions that dominate daily...,https://www.samhsa.gov/mental-health/anxiety-d...,Anxiety,"[""counseling can be very effective for ocd . i...","Left untreated, OCD can take over your life."
96,How can parents identify mental health concern...,"By observing signs like extreme sadness, withd...",https://www.samhsa.gov/mental-health/how-to-ta...,Mental Health,"[""As a parent or caregiver, you want the best ...",Parents can identify mental health concerns in...
97,Why do people develop social anxiety disorder?,It often develops due to genetics or negative ...,https://www.samhsa.gov/mental-health/anxiety-d...,Anxiety,['Signs & Symptoms of Social Anxiety Disorder\...,"According to the context, factors such as gene..."
98,What are signs a child may need mental health ...,"Signs include drastic mood swings, extreme fea...",https://www.samhsa.gov/mental-health/how-to-ta...,Mental Health,"[""As a parent or caregiver, you want the best ...","According to the provided context, signs a chi..."


In [None]:
eval_df = eval_df.to_dict(orient='records')

In [None]:
# Evaluation prompt
evaluation_prompt = """
### Task Description:
You are provided with an instruction (or query), a response to evaluate, a reference answer that represents an ideal response, and evaluation criteria. Additionally, you are given responses from a RAG model: a RAG response. Your task is to evaluate and compare the RAG response with the reference answer based on the provided criteria.

1. Assess the quality of the response strictly based on the given rubric and criteria (relevance, semantic similarity, accuracy, factual correctness, completeness, clarity, and conciseness), not general impressions.
2. Write short feedback for each response based on the rubric, highlighting strengths and weaknesses for each criterion.
3. Provide feedback in the specified format.
4. Finally, assign an overall score out of 10 for RAG response.


### The instruction to evaluate:
{Question}

### RAG Response:
{rag_response}

### Reference Answer:
{Ground_Truth}

### Rubric for Individual Criteria:
- **Relevance**: How effectively does the response address the specific requirements or intent of the query?
- **Semantic Similarity**: To what extent does the response capture the meaning or intent of the reference answer, even if the phrasing differs?
- **Accuracy**: Does the response provide correct and logically sound information in relation to the query?
- **Factual Correctness**: Are the facts presented in the response verified and consistent with reliable sources or the reference answer?
- **Completeness**: Does the response thoroughly cover all essential aspects of the query without omitting critical information?
- **Clarity**: Is the response expressed in a manner that is straightforward, unambiguous, and easy to comprehend?
- **Conciseness**: Does the response convey the necessary information succinctly, avoiding extraneous details or redundancy?

### Output Format:
Feedback:
- RAG Response:
    - Relevance: {{feedback}}
    - Semantic Similarity: {{feedback}}
    - Accuracy: {{feedback}}
    - Factual Correctness: {{feedback}}
    - Completeness: {{feedback}}
    - Clarity: {{feedback}}
    - Conciseness: {{feedback}}

### Overall Scores:
- RAG Overall Score: {{integer between 1 and 10}}
"""

In [None]:
import openai

In [None]:
!pip install openai



In [None]:
!pip install httpx==0.27.2



In [None]:
from openai import OpenAI

client = OpenAI(api_key = "YOUR_API_KEY") # Replace with your api key

In [None]:
import re

def evaluate_responses(instruction, rag_response, reference_answer):
    prompt = evaluation_prompt.format(
        Question=instruction,
        rag_response=rag_response,
        Ground_Truth=reference_answer
    )

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant evaluating model responses."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=1000,
        temperature=0.5
    )

    s = response.choices[0].message.content
    print("Evaluation Output:")
    print(s)

    rag_score = re.search(r'RAG Overall Score: (\d+)', s).group(1).strip()

    return int(rag_score)

evaluations = []
for data in tqdm(eval_df, desc="Evaluating responses", unit="sample"):
    instruction = data["Question"]
    rag_response = data["rag_response"]
    reference_answer = data["Ground_Truth"]


    rag_score = evaluate_responses(
        instruction, rag_response, reference_answer
    )

    evaluations.append({
        "Question": instruction,
        "Ground_Truth": reference_answer,
        "Source": data["Source"],
        "Category": data["Category"],
        "Context": data["Context"],
        "rag_response": rag_response,
        "RAG_Score": rag_score,
    })

Evaluating responses:   1%|          | 1/100 [00:02<04:12,  2.55s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response effectively addresses the query by identifying the seasonal changes that trigger SAD. However, it could be more direct in linking the decrease in sunlight specifically to the condition.
    - Semantic Similarity: The response captures the essence of the reference answer, noting the importance of seasonal changes and reduced sunlight, though it elaborates further than necessary.
    - Accuracy: The information provided is accurate, explaining how decreased sunlight can disrupt circadian rhythms and affect serotonin levels.
    - Factual Correctness: The facts presented are consistent with reliable sources regarding SAD, confirming the role of sunlight in triggering the disorder.
    - Completeness: While the response includes relevant details about circadian rhythms and serotonin, it may include more information than necessary for the question, which primarily asks about triggers.
    - Clarity: The response is c

Evaluating responses:   2%|▏         | 2/100 [00:05<04:24,  2.70s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it directly addresses the health risks associated with bulimia nervosa, aligning well with the query.
    - Semantic Similarity: The response captures the intent of the reference answer but includes additional details, which slightly diverges from the brevity of the reference.
    - Accuracy: The information provided is accurate; it correctly identifies health risks associated with bulimia nervosa.
    - Factual Correctness: The facts presented are consistent with known health risks related to bulimia nervosa, making it factually correct.
    - Completeness: The response includes several health risks, which adds to its completeness compared to the reference answer, though it could have mentioned other risks for a more comprehensive overview.
    - Clarity: The response is clear and easy to understand, with no ambiguity in the expression of the health risks.
    - Conciseness: While the response is

Evaluating responses:   3%|▎         | 3/100 [00:08<04:39,  2.88s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it addresses the query about maintaining a support system for individuals with mental health problems. However, it could benefit from a broader perspective on the types of support available.
    - Semantic Similarity: The response captures some key elements of the reference answer, such as the importance of trust and respect, but it does not fully align with the emphasis on confidentiality and growth mentioned in the reference.
    - Accuracy: The advice given is generally sound, emphasizing the importance of mutual respect and trust, which is accurate in the context of building a support system.
    - Factual Correctness: The information presented does not contain any factual inaccuracies but lacks depth in explaining how to find and maintain these relationships.
    - Completeness: The response is somewhat incomplete as it does not cover all essential aspects, such as the importance of confident

Evaluating responses:   4%|▍         | 4/100 [00:10<04:11,  2.62s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it addresses the query about risk factors for schizophrenia, mentioning biological and life experience factors.
    - Semantic Similarity: The response captures the general idea of the reference answer but expands on it, which slightly diverges from the succinctness of the reference.
    - Accuracy: The information provided is accurate in identifying risk factors related to schizophrenia.
    - Factual Correctness: The facts presented align with general knowledge about schizophrenia risk factors, supporting its correctness.
    - Completeness: The response is somewhat complete, as it identifies key risk factors, but it could be more focused on the specific factors mentioned in the reference answer.
    - Clarity: The response is clear and easy to understand, presenting information in a straightforward manner.
    - Conciseness: The response is less concise than the reference answer, as it adds add

Evaluating responses:   5%|▌         | 5/100 [00:14<04:46,  3.01s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is highly relevant as it directly addresses how schizophrenia impacts daily functioning, covering various aspects of life affected by the condition.
    - Semantic Similarity: The response captures the intent of the reference answer but expands on it significantly. While it conveys similar meanings, it does not align closely in phrasing or brevity.
    - Accuracy: The information provided is accurate, detailing the challenges faced by individuals with schizophrenia in daily activities.
    - Factual Correctness: The facts presented are consistent with the understanding of schizophrenia and its effects on daily life, aligning with reliable sources.
    - Completeness: The response is complete, addressing multiple dimensions of daily functioning affected by schizophrenia, including work, socialization, and personal care.
    - Clarity: The response is clear and easy to understand, using straightforward language to

Evaluating responses:   6%|▌         | 6/100 [00:16<04:16,  2.73s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it addresses the management of social anxiety disorder, mentioning therapy, medication, and support groups, which are pertinent to the query.
    - Semantic Similarity: The response captures the essence of the reference answer but uses more elaborate language. It conveys similar ideas but is not as succinct as the reference.
    - Accuracy: The information provided in the response is accurate and aligns with common treatments for social anxiety disorder.
    - Factual Correctness: The facts presented are consistent with reliable sources regarding the management of social anxiety disorder, particularly the mention of therapy and medication.
    - Completeness: The response is fairly complete, covering several methods for managing symptoms, although it could be more concise.
    - Clarity: The response is generally clear, though the phrasing could be simplified to enhance understanding.
    - Concis

Evaluating responses:   7%|▋         | 7/100 [00:19<04:15,  2.75s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response effectively addresses the query by listing common symptoms of anxiety disorders, which is exactly what was asked.
    - Semantic Similarity: The response captures the essence of the reference answer, although it includes more symptoms and details, which slightly diverges from the succinctness of the reference.
    - Accuracy: The symptoms listed are accurate and align with recognized symptoms of anxiety disorders, making the information reliable.
    - Factual Correctness: The symptoms mentioned are factually correct and consistent with established medical understanding of anxiety disorders.
    - Completeness: The response is comprehensive as it includes a wide range of symptoms, providing a fuller picture than the reference answer.
    - Clarity: The structure of the response is clear, with bullet points enhancing readability and understanding of the symptoms.
    - Conciseness: While the response is complete,

Evaluating responses:   8%|▊         | 8/100 [00:21<03:53,  2.54s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it addresses how mental health impacts decision-making, which aligns with the query.
    - Semantic Similarity: The response captures the general meaning of the reference answer but expands on it significantly. It does not directly mirror the phrasing or specific points made in the reference.
    - Accuracy: The information provided about how mental health conditions can affect decision-making is accurate and reflects common understanding in psychology.
    - Factual Correctness: The facts presented are consistent with established knowledge regarding mental health and decision-making processes.
    - Completeness: The response is quite thorough, covering various aspects of how different mental health conditions can influence decision-making, which is more detailed than the reference answer.
    - Clarity: The response is clear and easy to understand, with logical organization of ideas.
    - Conci

Evaluating responses:   9%|▉         | 9/100 [00:25<04:35,  3.03s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it directly addresses the query about ADHD, mentioning key symptoms associated with the condition.
    - Semantic Similarity: The response captures the essence of the reference answer but includes additional details about behavior control that are not explicitly mentioned in the reference.
    - Accuracy: The information provided is accurate regarding ADHD symptoms, but the phrasing could be more precise.
    - Factual Correctness: The facts presented about ADHD align with general knowledge on the condition, but the description of symptoms could be more aligned with clinical definitions.
    - Completeness: The response covers the main symptoms of ADHD but lacks a mention of impulsivity, which is a critical aspect of the disorder.
    - Clarity: The response is mostly clear, but the phrase "not being able to control behavior" could be misleading without context.
    - Conciseness: The response is 

Evaluating responses:  10%|█         | 10/100 [00:27<04:09,  2.77s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it addresses the treatment options for Seasonal Affective Disorder (SAD), which is the main focus of the query.
    - Semantic Similarity: The response captures the essential meaning of the reference answer, although it elaborates slightly more on the treatments mentioned.
    - Accuracy: The information provided is accurate; it mentions light therapy, antidepressants, talk therapy, and vitamin D supplements, which are all valid treatments for SAD.
    - Factual Correctness: The facts presented align with established treatments for SAD and are consistent with the reference answer.
    - Completeness: The response is fairly complete as it includes multiple treatment options, although it could have mentioned the effectiveness of each treatment more clearly.
    - Clarity: The response is clear and easy to understand, with a logical flow of information.
    - Conciseness: The response is somewhat con

Evaluating responses:  11%|█         | 11/100 [00:30<04:06,  2.77s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it addresses the importance of mental health across different life stages, which aligns with the query.
    - Semantic Similarity: The response captures the essence of the reference answer by mentioning emotional and psychological aspects, though it lacks the explicit mention of "social well-being."
    - Accuracy: The information presented is accurate in stating that mental health is important throughout life.
    - Factual Correctness: The response is factually correct, as it aligns with general understanding of mental health's significance.
    - Completeness: The response lacks depth; it does not elaborate on how mental health affects emotional, psychological, and social well-being, which is crucial for completeness.
    - Clarity: The response is clear and easy to understand, conveying the message without ambiguity.
    - Conciseness: The response is concise, providing a brief statement witho

Evaluating responses:  12%|█▏        | 12/100 [00:32<03:54,  2.67s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Semantic Similarity: The RAG response captures the essence of the reference answer, though it includes additional points that enhance the understanding of the topic.
    - Factual Correctness: The facts are consistent with reliable sources and are generally accepted in the mental health field.
    - Conciseness: While the response is somewhat longer than the reference answer, it remains concise and does not include unnecessary details.

### Overall Scores:
- RAG Overall Score: 9


Evaluating responses:  13%|█▎        | 13/100 [00:35<03:46,  2.60s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it directly addresses the query about why people engage in self-harm, providing insight into the emotional aspects involved.
    - Semantic Similarity: The response captures the intent of the reference answer well, discussing the emotional relief self-harm can provide, though it expands slightly on the reasons.
    - Accuracy: The information presented is accurate, reflecting common psychological understanding regarding self-harm and its emotional context.
    - Factual Correctness: The response is factually correct, aligning with established knowledge about self-harm behaviors and their psychological motivations.
    - Completeness: The response is somewhat complete, covering key reasons for self-harm but could be enhanced by mentioning other factors like societal or environmental influences.
    - Clarity: The response is clear and easy to understand, effectively communicating the reasons behind

Evaluating responses:  14%|█▍        | 14/100 [00:37<03:40,  2.56s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is highly relevant as it addresses the role of communication in preventing suicide, which directly aligns with the query.
    - Semantic Similarity: The response captures the essence of the reference answer, emphasizing support and stigma reduction, but it elaborates further on the importance of effective communication.
    - Accuracy: The information presented is accurate and reflects the importance of communication in mental health and suicide prevention.
    - Factual Correctness: The facts are consistent with reliable sources regarding the role of communication in mental well-being and suicide prevention.
    - Completeness: The response is comprehensive, covering various aspects of how communication contributes to preventing suicide, including support, engagement in care, and fostering a nurturing environment.
    - Clarity: The response is clear and well-structured, making it easy to understand the points 

Evaluating responses:  15%|█▌        | 15/100 [00:40<03:47,  2.67s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it discusses therapy, specifically DBT, in the context of addressing self-harm. However, it could be more focused on the general benefits of therapy rather than just one type.
    - Semantic Similarity: The response captures the intent of the reference answer, which is about therapy helping with self-harm, but it does so by emphasizing a specific type of therapy rather than the broader concept of therapy as a whole.
    - Accuracy: The information regarding DBT as an effective intervention for self-harm is accurate. However, it could mislead readers into thinking DBT is the only option.
    - Factual Correctness: The mention of DBT is factually correct, but the suggestion to consult a local therapist may not be universally applicable or feasible for everyone.
    - Completeness: The response provides a specific approach (DBT) but lacks a broader discussion on how therapy, in general, can help with

Evaluating responses:  16%|█▌        | 16/100 [00:43<03:33,  2.54s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it directly addresses the emotional triggers linked to self-harm, which was the focus of the query.
    - Semantic Similarity: The response captures the essence of the reference answer, mentioning the same emotional triggers, although it adds a phrase "according to the text," which slightly alters the phrasing.
    - Accuracy: The information provided is accurate and aligns with common understanding regarding the emotional triggers of self-harm.
    - Factual Correctness: The facts presented are consistent with the reference answer and are generally accepted in discussions about self-harm.
    - Completeness: The response is somewhat complete as it lists the main emotional triggers but does not elaborate on them or mention any additional triggers that could also be relevant.
    - Clarity: The response is clear and easy to understand, effectively communicating the emotional triggers.
    - Concise

Evaluating responses:  17%|█▋        | 17/100 [00:46<04:05,  2.95s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response identifies signs that may indicate suicidal thoughts but does not directly address how to intervene, which is the core of the query. It lacks a clear connection to intervention strategies.
    - Semantic Similarity: The response does not closely align with the reference answer, which emphasizes directly asking about suicidal thoughts and connecting individuals to help. The RAG response focuses on signs rather than intervention.
    - Accuracy: The information regarding signs of potential suicide risk is accurate, but it does not provide actionable steps for intervention, which is critical for the query.
    - Factual Correctness: The signs mentioned are generally recognized indicators of suicidal ideation; however, the lack of intervention strategies diminishes the overall factual correctness in the context of the query.
    - Completeness: The response fails to cover essential aspects of the query, particularly

Evaluating responses:  18%|█▊        | 18/100 [00:48<03:39,  2.68s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it discusses various challenges faced by people with schizophrenia, aligning with the query's intent.
    - Semantic Similarity: The response captures the general intent of the reference answer but goes into more detail, which slightly diverges from the concise nature of the reference.
    - Accuracy: The information presented is accurate regarding the challenges associated with schizophrenia, including difficulties in communication and functioning.
    - Factual Correctness: The facts are consistent with established knowledge about schizophrenia and are presented correctly.
    - Completeness: The response is quite comprehensive, detailing various challenges, but it may be overly detailed compared to the reference answer.
    - Clarity: The response is clear and easy to understand, with a logical flow of information.
    - Conciseness: The response is less concise than the reference answer, provi

Evaluating responses:  19%|█▉        | 19/100 [00:51<03:23,  2.51s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response does not address the query at all, as it is simply "nan". Therefore, it scores very low on relevance.
    - Semantic Similarity: There is no semantic similarity to evaluate since the response does not contain any meaningful content.
    - Accuracy: The response cannot be assessed for accuracy because it lacks any information.
    - Factual Correctness: There are no facts presented in the response, making it impossible to determine factual correctness.
    - Completeness: The response is completely lacking in content, so it fails to cover any aspects of the query.
    - Clarity: The response is not clear at all, as it provides no information or context.
    - Conciseness: While the response is concise in terms of length, it is entirely uninformative, which negates any potential positive aspect of conciseness.

### Overall Scores:
- RAG Overall Score: 1


Evaluating responses:  20%|██        | 20/100 [00:53<03:07,  2.35s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response directly addresses the query by listing physical symptoms linked to panic disorder, making it highly relevant.
    - Semantic Similarity: The response captures the meaning of the reference answer well, although it includes additional symptoms that were not mentioned in the reference.
    - Accuracy: The symptoms listed are accurate and commonly associated with panic disorder, aligning with established medical understanding.
    - Factual Correctness: All symptoms mentioned are factually correct and consistent with reliable sources regarding panic disorder.
    - Completeness: The response is more complete than the reference answer as it provides a broader list of symptoms, which enhances the overall understanding.
    - Clarity: The response is clear and easy to read, with each symptom presented in a straightforward manner.
    - Conciseness: While the response is slightly longer than the reference, it maintains

Evaluating responses:  21%|██        | 21/100 [00:55<03:03,  2.32s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response effectively addresses the query by explaining why panic attacks are frightening, focusing on the emotional and physical aspects involved. 
    - Semantic Similarity: The response captures the essence of the reference answer, discussing intense fear and physical symptoms, but expands on them in a way that adds depth rather than just paraphrasing.
    - Accuracy: The information provided about panic attacks is accurate and aligns with common psychological understanding, highlighting both physical symptoms and emotional experiences.
    - Factual Correctness: The facts presented are consistent with established knowledge about panic attacks and are logically sound.
    - Completeness: The response is thorough, covering various aspects of panic attacks, including the physical sensations and the emotional impact, which provides a more complete picture than the reference answer.
    - Clarity: The response is clear and

Evaluating responses:  22%|██▏       | 22/100 [00:57<02:55,  2.25s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it directly addresses the query about the target populations of the Zero Suicide framework.
    - Semantic Similarity: The response captures some meaning from the reference answer but lacks the broader context of targeting individuals across healthcare systems.
    - Accuracy: The information provided is accurate in stating that the framework targets adults 18 and older; however, it is too narrow.
    - Factual Correctness: The fact that the framework targets adults is correct, but it does not encompass the full scope of the reference answer.
    - Completeness: The response is incomplete as it does not mention the broader populations or healthcare systems that the framework aims to address, which is a key aspect of the reference answer.
    - Clarity: The response is clear and easy to understand, but it is overly simplistic.
    - Conciseness: The response is concise, but this brevity comes at th

Evaluating responses:  23%|██▎       | 23/100 [00:59<02:59,  2.33s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it directly addresses the relationship between insomnia, depression, and feelings of worthlessness, fulfilling the query's intent.
    - Semantic Similarity: The response captures the essence of the reference answer by mentioning racing thoughts and their impact on sleep and emotions, although it expands on these ideas.
    - Accuracy: The information provided is accurate in describing how insomnia can be related to depression and feelings of worthlessness, aligning with psychological understanding.
    - Factual Correctness: The response presents facts that are consistent with established knowledge about depression and insomnia, making it factually correct.
    - Completeness: The response is complete as it covers the connection between insomnia, depression, and feelings of worthlessness while also suggesting potential solutions, which adds depth.
    - Clarity: The response is clear and well-str

Evaluating responses:  24%|██▍       | 24/100 [01:01<02:51,  2.25s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Conciseness: While the response is detailed, it could be considered slightly lengthy. However, it still manages to avoid unnecessary redundancy.

### Overall Scores:
- RAG Overall Score: 9


Evaluating responses:  25%|██▌       | 25/100 [01:04<02:44,  2.19s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is highly relevant as it directly addresses the symptoms of bulimia nervosa, which is the query's intent.
    - Semantic Similarity: The RAG response captures the essence of the reference answer but goes into greater detail, which is somewhat different in focus. However, it still conveys the overall meaning.
    - Accuracy: The information provided is accurate regarding the symptoms associated with bulimia nervosa.
    - Factual Correctness: The facts presented are consistent with known medical information regarding bulimia nervosa and its physical symptoms.
    - Completeness: The RAG response is more complete than the reference answer as it lists multiple symptoms, providing a broader understanding of the condition.
    - Clarity: The response is clear and easy to understand, with symptoms listed in a straightforward manner.
    - Conciseness: While the response is informative, it could be considered less conc

Evaluating responses:  26%|██▌       | 26/100 [01:06<02:49,  2.29s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response somewhat addresses the query by mentioning resources for parents, but it focuses heavily on faith-based organizations and personal advice, which may not be directly relevant to all parents seeking help. 
    - Semantic Similarity: The RAG response captures some of the intent of the reference answer but diverges significantly in content. The reference answer is more straightforward and focused on specific resources.
    - Accuracy: The mention of the 988 Suicide & Crisis Lifeline is accurate; however, the advice about “sweet-talking” counselors lacks professionalism and may not be appropriate.
    - Factual Correctness: The information about the 988 Lifeline is factually correct, but the suggestion to engage counselors informally lacks clarity regarding professional ethics.
    - Completeness: The response includes some resources but misses other relevant options such as healthcare providers and school counselors

Evaluating responses:  27%|██▋       | 27/100 [01:08<02:46,  2.28s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is highly relevant as it directly addresses the symptoms of Generalized Anxiety Disorder, which is the query's focus.
    - Semantic Similarity: The response captures the essence of the reference answer but includes more symptoms. While it maintains the core meaning, the phrasing differs slightly.
    - Accuracy: The symptoms listed are accurate and align with common understandings of Generalized Anxiety Disorder.
    - Factual Correctness: All symptoms mentioned are factually correct and consistent with established medical information about Generalized Anxiety Disorder.
    - Completeness: The response is complete as it includes a comprehensive list of symptoms, going beyond the reference answer to provide additional relevant details.
    - Clarity: The response is clear and easy to understand, presenting the symptoms in a straightforward manner.
    - Conciseness: While the response is complete, it could be co

Evaluating responses:  28%|██▊       | 28/100 [01:10<02:35,  2.15s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it discusses treatment options for schizophrenia, directly addressing the query.
    - Semantic Similarity: The response captures the intent of the reference answer but focuses more on medication and ECT, while the reference mentions therapy and family support, which are also important.
    - Accuracy: The information provided about antipsychotic medications and ECT is accurate and aligns with standard treatment practices.
    - Factual Correctness: The facts presented are consistent with reliable sources regarding schizophrenia treatment.
    - Completeness: The response lacks mention of individual therapy and family support, which are critical components of effective treatment, making it less complete than the reference answer.
    - Clarity: The response is clear and easy to understand, providing a straightforward explanation of treatment methods.
    - Conciseness: The response is succinct and

Evaluating responses:  29%|██▉       | 29/100 [01:13<02:46,  2.35s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response effectively addresses the query by differentiating between panic disorder and occasional panic attacks, which is the main focus of the question.
    - Semantic Similarity: The RAG response captures the essence of the reference answer, discussing both panic disorder and occasional panic attacks, although it could be phrased more similarly to the reference.
    - Accuracy: The information provided is accurate, as it correctly states that panic disorder involves repeated panic attacks while occasional panic attacks may not lead to a disorder.
    - Factual Correctness: The facts presented align with established definitions and descriptions of panic disorder and occasional panic attacks, making it factually correct.
    - Completeness: The response covers the key differences but could be improved by explicitly mentioning the frequency of panic attacks in both conditions, as highlighted in the reference answer.
    -

Evaluating responses:  30%|███       | 30/100 [01:15<02:44,  2.34s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response partially addresses the query by mentioning dialectical behavior therapy (DBT) as a therapeutic approach for eating disorders, but it lacks a direct connection to how psychotherapy aids recovery specifically, which is the focus of the question.
    - Semantic Similarity: The response does not capture the essence of the reference answer well. While it discusses therapy techniques, it does not clearly convey the idea of addressing underlying emotional issues or managing symptoms as stated in the reference.
    - Accuracy: The mention of DBT is accurate in the context of psychotherapy for eating disorders, but the explanation is somewhat vague and does not detail how DBT specifically aids recovery.
    - Factual Correctness: The information about DBT is generally correct, but the lack of clarity in the explanation may lead to misunderstandings about its application in eating disorder recovery.
    - Completeness: T

Evaluating responses:  31%|███       | 31/100 [01:18<02:41,  2.34s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it discusses various treatments for anxiety disorders, which directly addresses the query.
    - Semantic Similarity: The response captures the intent of the reference answer but provides more detail. However, it does not directly mention "stress management techniques," which is included in the reference.
    - Accuracy: The information provided is accurate regarding the treatments mentioned, including therapy options and medication.
    - Factual Correctness: The mention of Acceptance and Commitment Therapy (ACT), Cognitive Behavioral Therapy (CBT), and SSRIs aligns with established treatments for anxiety disorders, making it factually correct.
    - Completeness: The response is somewhat complete as it lists multiple therapies and medication; however, it lacks a mention of stress management techniques, which is an essential aspect of treating anxiety.
    - Clarity: The response is clear and eas

Evaluating responses:  32%|███▏      | 32/100 [01:20<02:47,  2.47s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it directly addresses the role of CBT in managing depression and self-esteem issues. It discusses coping skills and the influence of thought processes on emotions and behaviors, which are key aspects of CBT's role.
    - Semantic Similarity: The response captures the essence of the reference answer well, emphasizing awareness of thought processes and their impact on beliefs and actions, although it expands on this with additional details.
    - Accuracy: The information presented is accurate regarding CBT's role in managing depression and self-esteem issues. It correctly highlights the importance of thought processes and coping skills.
    - Factual Correctness: The facts provided are consistent with established knowledge about CBT and its applications. There are no inaccuracies present in the response.
    - Completeness: The response is comprehensive, covering various aspects of CBT, including i

Evaluating responses:  33%|███▎      | 33/100 [01:23<02:53,  2.60s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response effectively addresses the health risks associated with self-harm, mentioning both psychological and physical risks. However, it could have been more aligned with the specific risks listed in the reference answer.
    - Semantic Similarity: The response captures some of the meaning of the reference answer but expands on it significantly. While it includes relevant information, it does not closely mirror the core points of the reference.
    - Accuracy: The information provided is accurate regarding the risks of self-harm, including the potential for suicide and physical injuries. However, the mention of endorphins and neural mapping, while interesting, may not directly relate to the immediate health risks.
    - Factual Correctness: The facts presented are generally correct and align with established knowledge about self-harm. The inclusion of psychological aspects adds depth but strays from the straightforward r

Evaluating responses:  34%|███▍      | 34/100 [01:26<02:45,  2.51s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it addresses the causes of social anxiety disorder, which is the specific requirement of the query.
    - Semantic Similarity: The response captures the essential meaning of the reference answer, though it includes additional phrasing that slightly alters the directness of the comparison.
    - Accuracy: The information presented is accurate, as it mentions genetics and learned behavior as contributing factors, which aligns with established understanding of social anxiety disorder.
    - Factual Correctness: The response is factually correct, consistent with reliable sources regarding the causes of social anxiety disorder.
    - Completeness: The response is somewhat complete but could benefit from a more explicit mention of "past social traumas" as stated in the reference answer, which would enhance its thoroughness.
    - Clarity: The response is clear and easy to understand, presenting the info

Evaluating responses:  35%|███▌      | 35/100 [01:28<02:47,  2.57s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it addresses the Four-Fold Breath technique and its purpose, aligning with the query's intent.
    - Semantic Similarity: The response captures the essence of the reference answer by discussing a breathing technique, but it diverges in focus, emphasizing practice duration and stress relief rather than relaxation and aiding sleep.
    - Accuracy: The information provided is generally accurate regarding the technique's breathing pattern and its intended benefits.
    - Factual Correctness: The details about the breathing counts and the practice frequency are correct, but the reference answer's mention of aiding sleep is not addressed in the RAG response.
    - Completeness: The response provides a good amount of detail about the technique but omits the mention of its application for relaxation and sleep, which is a key aspect of the reference answer.
    - Clarity: The response is clear and easy to 

Evaluating responses:  36%|███▌      | 36/100 [01:31<02:45,  2.58s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it addresses the influence of family history on mental health, but it could be more focused on the specific aspect of family history rather than broadly discussing other factors.
    - Semantic Similarity: The meaning is somewhat captured, but the response lacks the directness of the reference answer, which succinctly states the influence of family history.
    - Accuracy: The information provided is accurate regarding the influence of family history on mental health, but it introduces additional factors that may dilute the focus.
    - Factual Correctness: The facts presented are correct and align with general knowledge about mental health influences; however, the emphasis on other factors could mislead the reader about the primary focus.
    - Completeness: While the response mentions various contributing factors, it does not fully explore the significance of family history specifically, which i

Evaluating responses:  37%|███▋      | 37/100 [01:33<02:36,  2.48s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response effectively addresses the query about how phobias affect daily life by discussing their impact on daily activities and routines.
    - Semantic Similarity: The response captures the intent of the reference answer, though it provides more detail. It aligns with the idea of limiting daily activities due to fear.
    - Accuracy: The information presented is accurate, illustrating how phobias can lead to avoidance behaviors that disrupt daily life.
    - Factual Correctness: The examples given (phobia of bridges and elevators) are valid and reflect common phobias, confirming the factual correctness of the response.
    - Completeness: The response is complete as it not only mentions avoidance but also provides examples, enhancing understanding of the impact of phobias.
    - Clarity: The response is clear and easy to understand, with straightforward language and structure.
    - Conciseness: While the response is so

Evaluating responses:  38%|███▊      | 38/100 [01:36<02:34,  2.49s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is somewhat relevant as it addresses the benefits of NCCA temperament therapy, but it does not fully capture the intent of the query regarding specific benefits.
    - Semantic Similarity: The response captures some of the intent of the reference answer but lacks the specific focus on self-acceptance and joy, which are key elements in the reference.
    - Accuracy: The information about gaining spiritual insight is accurate, but it does not encompass the broader benefits mentioned in the reference answer.
    - Factual Correctness: The response is factually correct in stating that the therapy provides insight; however, it misses other critical aspects of the therapy's benefits.
    - Completeness: The response is incomplete as it does not address all essential aspects of how NCCA temperament therapy can benefit individuals, particularly self-acceptance and joy.
    - Clarity: The response is clear and easy to un

Evaluating responses:  39%|███▉      | 39/100 [01:38<02:25,  2.38s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response partially addresses the query by discussing how physical health can influence depression, but it lacks a direct connection to the emotional aspects of depression as indicated in the reference answer.
    - Semantic Similarity: There is a low level of semantic similarity with the reference answer as the RAG response focuses more on risk factors rather than the relationship between physical health and emotional symptoms.
    - Accuracy: The information provided is generally accurate regarding the relationship between physical health and depression, but it could be more nuanced in discussing the emotional impact.
    - Factual Correctness: The response presents correct associations between physical health issues and depression, but it does not fully align with the notion of physical structures affecting emotional states as mentioned in the reference answer.
    - Completeness: The response is somewhat incomplete as

Evaluating responses:  40%|████      | 40/100 [01:40<02:26,  2.44s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it addresses the impact of lack of sunlight on mood, which is directly related to Seasonal Affective Disorder (SAD). However, it could be more explicitly tied to SAD.
    - Semantic Similarity: The response captures some of the key ideas from the reference answer, particularly regarding mood and serotonin, but it lacks the mention of melatonin and the direct connection to sleep.
    - Accuracy: The information presented is generally accurate, linking lack of sunlight to serotonin levels and circadian rhythms, although it does not explicitly mention melatonin, which is important in the context of SAD.
    - Factual Correctness: The response is factually correct regarding the effects of sunlight on serotonin and circadian rhythms, but it misses the comprehensive view provided in the reference answer, particularly the role of melatonin.
    - Completeness: The response lacks completeness as it does n

Evaluating responses:  41%|████      | 41/100 [01:43<02:28,  2.52s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it addresses the concern of social withdrawal in youth and its implications.
    - Semantic Similarity: The response captures some of the intent of the reference answer but introduces additional concepts (mental health conditions) that are not present in the reference.
    - Accuracy: The information about social withdrawal indicating underlying mental health conditions is accurate; however, it could be seen as overly specific compared to the reference.
    - Factual Correctness: The facts presented are generally correct and align with common understandings of the topic.
    - Completeness: The response provides a broader view by mentioning specific mental health conditions, but it lacks the depth of emotional states like "hopelessness" or "distress" mentioned in the reference.
    - Clarity: The response is clear and easy to understand, effectively communicating the concern.
    - Conciseness: Th

Evaluating responses:  42%|████▏     | 42/100 [01:45<02:17,  2.37s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it addresses the query about ADHD treatment, mentioning key components of treatment plans.
    - Semantic Similarity: The response captures the general intent of the reference answer, though it uses slightly different phrasing and includes "trainings" which is less common terminology in this context.
    - Accuracy: The information provided is accurate as it aligns with common treatment approaches for ADHD.
    - Factual Correctness: The response contains correct information regarding treatment options, but "trainings" may not be as widely recognized or specific as "behavior management strategies."
    - Completeness: The response is somewhat complete but lacks specific mention of behavior management strategies, which are critical in ADHD treatment.
    - Clarity: The response is clear and easy to understand, presenting the information in a straightforward manner.
    - Conciseness: The response i

Evaluating responses:  43%|████▎     | 43/100 [01:47<02:12,  2.33s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it directly addresses the importance of early treatment for schizophrenia, aligning with the query's intent.
    - Semantic Similarity: The response captures the essence of the reference answer, discussing recovery and quality of life, which are key components of the original response.
    - Accuracy: The information presented is accurate; it correctly states that early treatment is beneficial for recovery and quality of life.
    - Factual Correctness: The facts are consistent with general knowledge about schizophrenia and early intervention, aligning well with established medical understanding.
    - Completeness: The response is somewhat complete but lacks specific details about how early treatment affects symptoms or the mechanisms behind improved outcomes, which could enhance its depth.
    - Clarity: The response is clear and easy to comprehend, effectively communicating the main point witho

Evaluating responses:  44%|████▍     | 44/100 [01:50<02:12,  2.37s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response effectively addresses the query by providing signs of binge eating disorder. It includes relevant details about the disorder, aligning well with the question asked.
    - Semantic Similarity: The response captures the core meaning of the reference answer but expands on it with additional context. While it is semantically similar, it introduces more detail than the reference.
    - Accuracy: The information presented is accurate and reflects the characteristics of binge eating disorder appropriately.
    - Factual Correctness: The facts provided are consistent with reliable sources regarding binge eating disorder, aligning well with the reference answer.
    - Completeness: The response is complete, covering multiple signs of binge eating disorder, including feelings associated with the behavior, which adds depth to the understanding.
    - Clarity: The response is clear and easy to understand, with a logical flo

Evaluating responses:  45%|████▌     | 45/100 [01:52<02:05,  2.29s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it directly addresses the impact of binge eating disorder on mental health, discussing related feelings and issues.
    - Semantic Similarity: The response captures the core ideas of the reference answer, particularly the feelings of guilt and shame, but it expands slightly beyond the reference, which may dilute the focus.
    - Accuracy: The information presented is accurate, as it correctly identifies the relationship between binge eating disorder and mental health issues such as anxiety and despair.
    - Factual Correctness: The facts align well with established knowledge about binge eating disorder and its psychological effects, confirming the response's reliability.
    - Completeness: The response is somewhat complete, covering feelings of guilt, shame, loneliness, and isolation but does not explicitly mention reduced self-esteem, which is a key aspect of the reference answer.
    - Clarity

Evaluating responses:  46%|████▌     | 46/100 [01:54<01:59,  2.21s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is highly relevant as it directly addresses the goal of the Garrett Lee Smith Campus program, focusing on mental health services for students.
    - Semantic Similarity: The response captures the core intent of the reference answer but includes additional details that expand on the program's focus, making it slightly less similar in phrasing.
    - Accuracy: The information provided is accurate and logically sound, aligning well with the objectives of the program.
    - Factual Correctness: The response is factually correct and consistent with known information about the Garrett Lee Smith Campus program.
    - Completeness: The response is complete, covering various aspects of the program's goals, including mental health services and specific populations at risk.
    - Clarity: The response is clear and easy to understand, effectively communicating the program's objectives without ambiguity.
    - Conciseness: W

Evaluating responses:  47%|████▋     | 47/100 [01:56<01:56,  2.20s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response effectively addresses the query regarding how obsessive-compulsive disorder manifests by listing specific symptoms and behaviors associated with OCD.
    - Semantic Similarity: While the RAG response provides a broader range of manifestations, it does not closely capture the essence of the reference answer, which emphasizes the connection between obsessions, compulsions, and anxiety reduction.
    - Accuracy: The information presented is accurate and aligns with common knowledge about OCD symptoms.
    - Factual Correctness: The facts in the RAG response are consistent with reliable sources regarding OCD and its manifestations.
    - Completeness: The response is comprehensive, covering various aspects of OCD manifestations, including behaviors in children, which adds depth.
    - Clarity: The response is clear and easy to understand, with a straightforward presentation of symptoms.
    - Conciseness: While the 

Evaluating responses:  48%|████▊     | 48/100 [01:59<01:58,  2.28s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it addresses the query about resources for self-harm prevention, listing various types of therapeutic options. However, it fails to mention crisis helplines, which are also important resources.
    - Semantic Similarity: The response captures the general intent of the reference answer but does not align closely with its phrasing or specific content, particularly the mention of crisis helplines.
    - Accuracy: The information provided is accurate regarding the types of therapies and interventions available for self-harm prevention.
    - Factual Correctness: The response presents correct information about various therapeutic approaches, but it lacks the mention of crisis helplines, which are a critical component of self-harm prevention resources.
    - Completeness: While the response covers several types of therapy, it is not complete as it omits important resources like crisis helplines and supp

Evaluating responses:  49%|████▉     | 49/100 [02:01<02:00,  2.36s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response effectively addresses the query by explaining the community's role in suicide prevention, emphasizing its impact on the community. However, it could more directly connect to the idea of providing support and resources as highlighted in the reference answer.
    - Semantic Similarity: The response captures the general intent of the reference answer but does not fully align with its focus on support, awareness, and resources. The phrasing differs significantly, which diminishes the semantic similarity.
    - Accuracy: The information presented in the response is accurate; it correctly identifies the communal impact of suicide, which aligns with the understanding of community responsibility.
    - Factual Correctness: The points made are factually correct regarding the emotional and social implications of suicide on a community, though it lacks specific references to actionable measures.
    - Completeness: The res

Evaluating responses:  50%|█████     | 50/100 [02:03<01:56,  2.33s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response effectively addresses the query about the main symptoms of schizophrenia by listing relevant symptoms. However, it could be more focused on the main symptoms rather than providing detailed descriptions.
    - Semantic Similarity: The response captures the core symptoms mentioned in the reference answer but expands on them, which may slightly diverge from the succinct nature of the reference.
    - Accuracy: The information provided is accurate and aligns with established knowledge about schizophrenia symptoms.
    - Factual Correctness: The symptoms listed are factually correct and are consistent with reliable sources on schizophrenia.
    - Completeness: The response is more comprehensive than the reference answer, covering additional aspects of symptoms, but it may include some details that are not essential for a basic understanding.
    - Clarity: The response is clear and understandable, with well-structure

Evaluating responses:  51%|█████     | 51/100 [02:05<01:48,  2.21s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response effectively addresses the query by outlining ways family members can support a loved one with schizophrenia, making it relevant to the instruction.
    - Semantic Similarity: The response captures the essence of the reference answer, emphasizing emotional support and treatment adherence, though it expands on these ideas.
    - Accuracy: The information provided is accurate and aligns well with general knowledge about supporting individuals with schizophrenia.
    - Factual Correctness: The response presents correct information regarding the support strategies for schizophrenia, consistent with reliable sources.
    - Completeness: The response is comprehensive, covering multiple aspects of support, including emotional support, education, and treatment adherence, which adds depth beyond the reference answer.
    - Clarity: The response is clearly articulated and easy to understand, with no ambiguous language.
   

Evaluating responses:  52%|█████▏    | 52/100 [02:08<01:46,  2.21s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it addresses the treatment of eating disorders, aligning with the query's intent.
    - Semantic Similarity: There is some semantic similarity, but the RAG response elaborates more than the reference answer, which is more concise. The core concepts are present but not in the same succinct manner.
    - Accuracy: The treatments mentioned (Behavioral Analysis and DBT) are accurate and recognized methods for treating eating disorders, but they are not the only options available.
    - Factual Correctness: The information provided is factually correct, as both DBT and professional help are valid components of treatment for eating disorders.
    - Completeness: The response provides a more detailed view of treatments, including specific therapeutic approaches, which adds to its completeness compared to the reference answer.
    - Clarity: The response is generally clear, but the use of technical terms 

Evaluating responses:  53%|█████▎    | 53/100 [02:12<02:10,  2.78s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response effectively addresses the query by discussing how panic disorder affects relationships, particularly through avoidance behaviors. This is relevant to the question asked.
    - Semantic Similarity: The response captures the essence of the reference answer regarding the tension and avoidance in relationships, though it expands on the concept with additional details.
    - Accuracy: The information presented is accurate in describing the effects of panic disorder on relationships, particularly the avoidance aspect.
    - Factual Correctness: The response aligns with general knowledge about panic disorder and its impact on social interactions, thus maintaining factual correctness.
    - Completeness: The response offers a more comprehensive view of the issue by explaining the potential consequences of avoidance, such as isolation and disconnection, which adds depth to the answer.
    - Clarity: The response is clear

Evaluating responses:  54%|█████▍    | 54/100 [02:15<02:19,  3.02s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Semantic Similarity: The response captures some elements of support but does not closely reflect the specific actions mentioned in the reference answer, such as listening without judgment or removing means of self-harm. 
    - Accuracy: The mention of medication is accurate in the context of mental health treatment but does not directly address immediate support strategies for someone in crisis, which could lead to misunderstanding the urgency of the situation.
    - Factual Correctness: While the response contains accurate information regarding self-care and support, it lacks critical information about direct intervention strategies, making it incomplete in context.
    - Completeness: The response does not cover essential aspects such as asking about suicidal thoughts or staying with the person, which are crucial in providing immediate help. 
    - Clarity: The response is clear in its language but could be misleading as it emphasize

Evaluating responses:  55%|█████▌    | 55/100 [02:18<02:11,  2.92s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response effectively addresses the query by outlining how parents should approach conversations about mental health with their children. It provides practical steps that are relevant to the topic.
    - Semantic Similarity: The response captures the essence of the reference answer by emphasizing the importance of creating a safe environment and using appropriate language, though it expands on the details more than the reference.
    - Accuracy: The information provided is accurate and logically sound, as it reflects common practices for discussing mental health with children.
    - Factual Correctness: The advice given is consistent with widely accepted guidelines for mental health conversations, making it factually correct.
    - Completeness: The response is quite comprehensive, covering several essential aspects of the topic, such as communication style and emotional awareness during discussions.
    - Clarity: The re

Evaluating responses:  56%|█████▌    | 56/100 [02:20<02:03,  2.81s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response effectively addresses the query about treatments for phobias by mentioning exposure therapy and self-help resources. However, it could have benefited from including cognitive-behavioral techniques, which are also significant treatments.
    - Semantic Similarity: The response captures the essence of the reference answer by discussing exposure therapy, but it lacks the mention of cognitive-behavioral techniques, which is a key component of the reference.
    - Accuracy: The information provided about exposure therapy is accurate and reflects common therapeutic practices for phobias.
    - Factual Correctness: The mention of seeking professional help aligns with established practices in treating phobias, making the response factually correct.
    - Completeness: While the response includes exposure therapy and self-help resources, it omits cognitive-behavioral techniques, which are critical for a comprehensive und

Evaluating responses:  57%|█████▋    | 57/100 [02:22<01:50,  2.58s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it addresses the purpose of the Native Connections program, focusing on suicide prevention and mental health support for youth.
    - Semantic Similarity: The response captures the essence of the reference answer, highlighting the focus on suicide prevention and trauma, although it uses different phrasing.
    - Accuracy: The information provided in the response is accurate and aligns with the intended goals of the Native Connections program.
    - Factual Correctness: The details about the program's objectives are consistent with reliable sources, confirming the focus on youth and mental health.
    - Completeness: The response is somewhat complete but could benefit from explicitly mentioning "AI/AN youth" to fully align with the reference answer.
    - Clarity: The response is clear and easy to understand, effectively communicating the main points without ambiguity.
    - Conciseness: The respon

Evaluating responses:  58%|█████▊    | 58/100 [02:25<01:45,  2.51s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response effectively addresses the query by distinguishing between social anxiety disorder and shyness, focusing on the impact of social anxiety on daily life.
    - Semantic Similarity: The response captures the essence of the reference answer, though it provides a more detailed explanation. It aligns well with the core idea of persistent fear and its interference with daily functioning.
    - Accuracy: The information presented is accurate; it correctly identifies the key differences between social anxiety disorder and shyness.
    - Factual Correctness: The facts are consistent with reliable sources and align with the reference answer, providing a correct understanding of both conditions.
    - Completeness: The response is thorough, covering essential aspects such as the emotional impact and behavioral consequences of social anxiety disorder compared to shyness.
    - Clarity: The response is clear and easy to unders

Evaluating responses:  59%|█████▉    | 59/100 [02:28<01:49,  2.66s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response effectively addresses the query by explaining why consistent treatment is important for individuals with schizophrenia. It stays focused on the topic.
    - Semantic Similarity: The response captures the essence of the reference answer but expands on it with additional details. While it does not directly mirror the phrasing, it conveys a similar meaning.
    - Accuracy: The information presented is accurate regarding the management of schizophrenia and the importance of consistent treatment.
    - Factual Correctness: The facts mentioned about managing symptoms, preventing relapses, and the importance of treatment adherence are consistent with established knowledge about schizophrenia.
    - Completeness: The response is comprehensive, covering various aspects of consistent treatment, including its impact on personal goals and relationships, which adds depth beyond the reference answer.
    - Clarity: The respon

Evaluating responses:  60%|██████    | 60/100 [02:30<01:41,  2.55s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it addresses the query about triggers for eating disorders, mentioning factors that could lead to such conditions.
    - Semantic Similarity: The response captures the essence of the reference answer, though it lacks some of the specific terms used in the reference, such as "societal pressures."
    - Accuracy: The information presented is accurate, as it correctly identifies factors that can trigger eating disorders.
    - Factual Correctness: The factors mentioned (genetics, hormones, co-morbid mental disorders) are factually correct and align with established understanding of eating disorders.
    - Completeness: The response is somewhat incomplete as it does not mention societal pressures, which is a significant trigger according to the reference answer.
    - Clarity: The response is clear and straightforward, making it easy to understand the factors listed.
    - Conciseness: The response is

Evaluating responses:  61%|██████    | 61/100 [02:33<01:48,  2.77s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response partially addresses the query by mentioning the interaction between biology and environment, but it fails to directly identify specific triggers of generalized anxiety disorder (GAD) as requested.
    - Semantic Similarity: The response captures some of the broader concepts related to GAD but does not align closely with the specific triggers mentioned in the reference answer.
    - Accuracy: The statement about the interaction between biology and environment is accurate, but it does not accurately reflect the specific triggers listed in the reference answer.
    - Factual Correctness: While the information regarding biology and environment is generally correct, it lacks the specificity needed to fully answer the question about triggers.
    - Completeness: The response is incomplete as it does not enumerate or detail the specific triggers that lead to GAD, which is a critical aspect of the query.
    - Clarity: 

Evaluating responses:  62%|██████▏   | 62/100 [02:36<01:45,  2.77s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is somewhat relevant as it mentions therapy, but it does not directly address NCCA temperament therapy, which is the focus of the query.
    - Semantic Similarity: The response does not capture the intent of the reference answer well, as it diverges into discussing Cognitive Behavioral Therapy rather than addressing temperament therapy specifically.
    - Accuracy: The mention of Cognitive Behavioral Therapy (CBT) does not accurately relate to NCCA temperament therapy, leading to a lack of accuracy in the context of the query.
    - Factual Correctness: The response fails to present correct information about NCCA temperament therapy, as it incorrectly associates it with CBT without explaining its actual focus.
    - Completeness: The response lacks completeness as it does not provide any information about NCCA temperament therapy itself, which is essential to the query.
    - Clarity: The response is clear in it

Evaluating responses:  63%|██████▎   | 63/100 [02:41<02:00,  3.27s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it addresses the topic of interventions for children facing mental health issues. It provides specific strategies that can be employed, which aligns with the query's intent.
    - Semantic Similarity: The response does not closely align with the reference answer, which is more concise and limited in scope. While it captures the general theme of interventions, it diverges significantly in detail and approach.
    - Accuracy: The information provided is generally accurate and reflects common practices in addressing mental health issues in children.
    - Factual Correctness: The interventions mentioned, such as promoting social and emotional competency and ensuring access to school-based mental health supports, are factually correct and supported by current mental health practices.
    - Completeness: The response is comprehensive and covers a wide range of interventions, including prevention and tr

Evaluating responses:  64%|██████▍   | 64/100 [02:43<01:51,  3.09s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it discusses the importance of observing behavior changes in children, but it does not directly address the core reason as succinctly as the reference answer.
    - Semantic Similarity: The response somewhat captures the intent of the reference answer by implying the need for early identification of issues, but it lacks the concise articulation of "timely interventions."
    - Accuracy: The information provided is generally accurate, highlighting the importance of recognizing behavioral changes and seeking help, but it could be more focused on the specific query.
    - Factual Correctness: The response presents correct information about consulting pediatricians and schools, which aligns with general practices in child development, but it does not specify mental health issues as clearly as the reference.
    - Completeness: While the response covers several aspects of observing behavior changes, it

Evaluating responses:  65%|██████▌   | 65/100 [02:46<01:43,  2.95s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response effectively addresses the query by listing relevant factors that contribute to mental health conditions. It is directly aligned with the instruction.
    - Semantic Similarity: The response captures the core meaning of the reference answer well, though the phrasing is slightly different. Overall, the intent is preserved.
    - Accuracy: The information provided is accurate and reflects common understanding of factors affecting mental health.
    - Factual Correctness: The facts presented are consistent with reliable sources and align well with the reference answer.
    - Completeness: The response includes the main factors mentioned in the reference answer, but it could benefit from a more detailed explanation or additional context for each factor.
    - Clarity: The response is clear and easy to understand, with no ambiguous language.
    - Conciseness: The response is concise and avoids unnecessary details, pr

Evaluating responses:  66%|██████▌   | 66/100 [02:48<01:33,  2.76s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response effectively addresses the query by discussing the importance of early and consistent treatment for mental health conditions, which is relevant to the question.
    - Semantic Similarity: The response captures the essence of the reference answer well, though it adds slightly more detail about the types of treatment involved.
    - Accuracy: The information provided is accurate in stating that early and consistent treatment can help manage mental health conditions.
    - Factual Correctness: The response is factually correct, as it aligns with common understanding of mental health treatment strategies.
    - Completeness: The response is somewhat complete, mentioning both medication and psychotherapy, but it could elaborate more on how these treatments help manage conditions.
    - Clarity: The response is clear and straightforward, making it easy to understand the main point being conveyed.
    - Conciseness: The

Evaluating responses:  67%|██████▋   | 67/100 [02:51<01:26,  2.61s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it addresses the causes of depression in Seasonal Affective Disorder (SAD), specifically mentioning the lack of sunlight.
    - Semantic Similarity: The response captures the intent of the reference answer by discussing the impact of seasonal changes on mood, although it does not use the same phrasing.
    - Accuracy: The information provided is accurate regarding the effects of sunlight on circadian rhythms and serotonin levels, which are related to mood regulation.
    - Factual Correctness: The facts presented are consistent with established knowledge about SAD and its relationship to sunlight exposure.
    - Completeness: The response could be seen as slightly incomplete, as it does not explicitly mention the seasonal changes or the specific time of year (fall and winter) that are typically associated with SAD.
    - Clarity: The response is clear and easy to understand, with no ambiguous lang

Evaluating responses:  68%|██████▊   | 68/100 [02:53<01:20,  2.53s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The RAG response is relevant as it addresses the specific aspects of social anxiety disorder, focusing on fear of embarrassment and avoidance behavior.
    - Semantic Similarity: The response captures the essence of the reference answer, particularly the focus on fear and impairment in social situations, though it lacks some nuances present in the reference.
    - Accuracy: The information presented is accurate regarding the characteristics of social anxiety disorder, aligning with recognized definitions.
    - Factual Correctness: The facts are consistent with reliable sources on social anxiety disorder, confirming the presence of excessive fear and its implications on social interactions.
    - Completeness: The response is somewhat complete but could benefit from mentioning the impact on work life, which is a significant aspect highlighted in the reference answer.
    - Clarity: The response is clear and easy to understan

Evaluating responses:  69%|██████▉   | 69/100 [02:55<01:17,  2.51s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response directly addresses the query by explaining how the 988 Suicide & Crisis Lifeline supports individuals in crisis, making it highly relevant.
    - Semantic Similarity: The response captures the core meaning of the reference answer, though it provides more detail. It aligns well with the intent of the reference.
    - Accuracy: The information provided about the Lifeline's services is accurate, detailing the availability of trained counselors and various communication methods.
    - Factual Correctness: The facts presented are consistent with known information about the 988 Lifeline and its operations, ensuring factual correctness.
    - Completeness: The response is comprehensive, covering multiple aspects of the Lifeline's support mechanisms, including the types of issues addressed and communication options.
    - Clarity: The response is clearly articulated, with straightforward language that makes it easy to u

Evaluating responses:  70%|███████   | 70/100 [02:58<01:15,  2.51s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is highly relevant as it directly addresses lifestyle changes that can help manage Seasonal Affective Disorder (SAD).
    - Semantic Similarity: The response captures the intent of the reference answer but expands on it significantly, which makes it less semantically similar despite both discussing the importance of light and sleep.
    - Accuracy: The information provided is accurate and reflects common recommendations for managing SAD.
    - Factual Correctness: The suggestions made in the response are consistent with recognized methods for managing SAD, such as exposure to natural light and maintaining healthy habits.
    - Completeness: The response is quite complete, offering a wide range of lifestyle changes, whereas the reference answer is quite limited in scope.
    - Clarity: The response is clear and easy to understand, with each suggestion presented in a straightforward manner.
    - Conciseness: Whil

Evaluating responses:  71%|███████   | 71/100 [03:00<01:13,  2.53s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it discusses self-harm behaviors and their initiation during teenage years, which aligns with the query about causes. However, it could more directly address the specific causes mentioned in the reference answer.
    - Semantic Similarity: The response captures the essence of emotional distress as a cause but does not explicitly link it to coping with negative feelings, which is a key aspect of the reference answer.
    - Accuracy: The information presented about self-harm behaviors is accurate, particularly regarding the age of onset and the coping mechanisms involved.
    - Factual Correctness: The facts provided are consistent with general understanding and knowledge about self-harm, making them reliable and valid.
    - Completeness: The response is somewhat complete, as it covers aspects of self-harm behaviors, but it lacks a concise mention of emotional distress as a cause, which is critical

Evaluating responses:  72%|███████▏  | 72/100 [03:03<01:10,  2.50s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it directly addresses the query about common self-harm behaviors.
    - Semantic Similarity: The response captures the intent of the reference answer well, though it provides a more extensive list of examples rather than a concise summary.
    - Accuracy: The behaviors listed are accurate indicators of self-harm and align with common understanding.
    - Factual Correctness: The information provided is factually correct and consistent with what is generally known about self-harm behaviors.
    - Completeness: The response is quite complete, offering a variety of behaviors that indicate self-harm, which enhances its thoroughness.
    - Clarity: The response is clear and easy to understand, with each behavior presented in a straightforward manner.
    - Conciseness: The response is somewhat lengthy, listing multiple examples, which could be seen as excessive given the instruction for common behavior

Evaluating responses:  73%|███████▎  | 73/100 [03:05<01:06,  2.45s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response effectively addresses the query about common symptoms of depression in individuals with Seasonal Affective Disorder (SAD), listing several pertinent symptoms.
    - Semantic Similarity: The response captures the essence of the reference answer, though it includes additional symptoms that are not mentioned in the reference. This demonstrates a broader understanding of the topic.
    - Accuracy: The symptoms listed are accurate and align well with established knowledge about SAD.
    - Factual Correctness: The facts presented are consistent with reliable sources regarding SAD and its symptoms, confirming the response's accuracy.
    - Completeness: The response is quite complete, covering a wide range of symptoms associated with SAD, including some that are not mentioned in the reference answer.
    - Clarity: The response is clear and straightforward, making it easy for the reader to understand the symptoms liste

Evaluating responses:  74%|███████▍  | 74/100 [03:08<01:07,  2.62s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Semantic Similarity: The response captures the intent of the reference answer well, although it provides a more extensive list of signs. It aligns with the core concepts presented in the reference.
    - Conciseness: While the response is detailed, it could be considered slightly less concise due to the length of the list. However, it effectively conveys necessary information.

### Overall Scores:
- RAG Overall Score: 9


Evaluating responses:  75%|███████▌  | 75/100 [03:10<01:02,  2.51s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response effectively addresses the query by defining mental health and its components, which is directly relevant to the question asked.
    - Semantic Similarity: The response captures the essence of the reference answer, though it expands on it with additional details. The core meaning is preserved.
    - Accuracy: The information provided is accurate and aligns well with established definitions of mental health.
    - Factual Correctness: The details included in the response are factually correct and consistent with reliable sources regarding mental health.
    - Completeness: The response is more complete than the reference answer as it elaborates on how mental health affects behavior and decision-making, providing a fuller picture.
    - Clarity: The response is clear and easy to understand, with no ambiguous language or complex jargon that might confuse the reader.
    - Conciseness: While the response is slightly 

Evaluating responses:  76%|███████▌  | 76/100 [03:13<01:01,  2.54s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it directly addresses how therapy, specifically CBT, helps individuals understand their thought processes.
    - Semantic Similarity: The response captures the essence of the reference answer but is somewhat more detailed. It reflects the same meaning but with added emphasis on CBT's role.
    - Accuracy: The information provided regarding CBT's role in understanding thought processes is accurate and aligns with established psychological principles.
    - Factual Correctness: The facts presented are consistent with reliable sources regarding CBT and its effects on thought processes.
    - Completeness: The response is fairly complete, covering the relationship between thoughts, beliefs, and behaviors, though it could briefly mention other forms of therapy for broader context.
    - Clarity: The response is clear and easy to understand, effectively communicating the main ideas without ambiguity.
  

Evaluating responses:  77%|███████▋  | 77/100 [03:16<00:59,  2.59s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response effectively addresses the query about overcoming self-harm tendencies by discussing professional help and alternative coping mechanisms. It is relevant to the question asked.
    - Semantic Similarity: The response captures the essence of the reference answer by mentioning therapy and coping mechanisms, but it elaborates more on the types of therapy, which slightly diverges from the brevity of the reference.
    - Accuracy: The information provided about DBT and the importance of recognizing self-harm as a temporary solution is accurate and aligns with established therapeutic practices.
    - Factual Correctness: The facts presented are consistent with reliable sources regarding self-harm treatment and coping strategies, making the response factually correct.
    - Completeness: The response is thorough and provides a comprehensive view of how to overcome self-harm tendencies, covering multiple aspects that the 

Evaluating responses:  78%|███████▊  | 78/100 [03:18<00:53,  2.45s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it directly addresses how the Native Connections initiative supports youth, focusing on suicide prevention and mental health, which aligns with the intent of the query.
    - Semantic Similarity: The response captures the essence of the reference answer by mentioning trauma and mental health, but it is more detailed and does not encapsulate the brevity of the reference answer.
    - Accuracy: The information provided is accurate and reflects the goals of the Native Connections initiative as it pertains to youth support.
    - Factual Correctness: The facts presented are consistent with the known objectives of the Native Connections initiative, making the response factually correct.
    - Completeness: The response is complete in that it outlines several aspects of support, such as community-based suicide prevention and culturally responsive models, thus covering more ground than the reference answ

Evaluating responses:  79%|███████▉  | 79/100 [03:20<00:49,  2.37s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Semantic Similarity: The response captures the essence of the reference answer by emphasizing early intervention, but it is more detailed and somewhat diverges from the succinctness of the reference.
    - Accuracy: The information provided about the symptoms of Major Depressive Disorder and the importance of early detection is accurate and aligns with common medical understanding.
    - Factual Correctness: The facts presented are consistent with reliable sources regarding depression and its diagnosis, making the response factually correct.
    - Completeness: The response is fairly complete, discussing the variability of symptoms and the benefits of early detection, although it could be more focused on the core point of preventing worsening symptoms.
    - Clarity: The response is clear and easy to understand, with no ambiguous language.
    - Conciseness: The response could be more concise; it includes some extraneous details about 

Evaluating responses:  80%|████████  | 80/100 [03:22<00:47,  2.36s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it directly addresses the aim of the Zero Suicide framework, focusing on suicide prevention strategies for adults.
    - Semantic Similarity: The response captures the intent of the reference answer well, though it introduces additional details about health equity principles that are not mentioned in the reference.
    - Accuracy: The information presented is accurate regarding the Zero Suicide framework's goals.
    - Factual Correctness: The response aligns well with known facts about the Zero Suicide framework, confirming its focus on comprehensive strategies.
    - Completeness: The response provides a more detailed view of the framework by mentioning health equity principles, which adds to its completeness compared to the reference answer.
    - Clarity: The response is clear and straightforward, making it easy to understand the framework's objectives.
    - Conciseness: While the response is

Evaluating responses:  81%|████████  | 81/100 [03:24<00:42,  2.25s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it directly addresses the query regarding how increased substance use can indicate suicide risk in adults.
    - Semantic Similarity: The response captures the core idea of the reference answer, focusing on the link between substance use and emotional pain, though it lacks the specific mention of coping mechanisms.
    - Accuracy: The information provided is accurate, correctly linking increased substance use to underlying mental health issues.
    - Factual Correctness: The facts presented align with common understandings in mental health discussions and are consistent with the reference answer.
    - Completeness: The response is somewhat incomplete as it does not explicitly mention the aspect of coping with emotional pain, which is a critical part of the reference answer.
    - Clarity: The response is clear and easy to understand, effectively communicating its message.
    - Conciseness: The r

Evaluating responses:  82%|████████▏ | 82/100 [03:30<00:58,  3.26s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Semantic Similarity: The response captures the intent of the reference answer but provides a broader list of signs, which may slightly deviate from the more concise nature of the reference.
    - Factual Correctness: The facts provided are consistent with reliable sources on mental health, aligning well with general knowledge in the field.
    - Completeness: The response is quite complete, listing multiple signs of mental health problems, though it could be argued that it includes some less common signs that may not be as universally recognized.
    - Clarity: The response is clear, but the length and complexity of the list may make it slightly overwhelming for some readers.
    - Conciseness: The response could be more concise; it presents many signs, which could be streamlined for better readability without losing essential information.

### Overall Scores:
- RAG Overall Score: 8


Evaluating responses:  83%|████████▎ | 83/100 [03:32<00:49,  2.93s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it discusses ways caregivers can communicate about mental health with children, aligning well with the query.
    - Semantic Similarity: The response does not capture the essence of the reference answer, which emphasizes simplicity and comfort. It is more detailed and deviates from the concise nature of the reference.
    - Accuracy: The information provided is accurate and logically sound, offering appropriate strategies for discussing mental health with children.
    - Factual Correctness: The response contains correct and reliable suggestions for discussing mental health, consistent with common advice for caregivers.
    - Completeness: The response is complete, covering various aspects of the discussion process, including age-appropriateness and emotional awareness.
    - Clarity: The response is clear and easy to understand, making it accessible for caregivers.
    - Conciseness: The response

Evaluating responses:  84%|████████▍ | 84/100 [03:35<00:48,  3.00s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response effectively addresses the query by differentiating phobias from general anxiety, which is the main focus of the question.
    - Semantic Similarity: The response captures the essence of the reference answer, emphasizing the irrational nature of phobias and the ongoing nature of general anxiety, though it includes additional details that slightly expand on the original meaning.
    - Accuracy: The information provided is accurate, correctly stating that phobias are irrational fears and that general anxiety involves excessive worry.
    - Factual Correctness: The facts presented align well with psychological definitions and concepts related to phobias and general anxiety, confirming its correctness.
    - Completeness: The response offers a thorough explanation, including treatment options for both conditions, which adds depth to the answer, although it is not strictly necessary for the comparison.
    - Clarity: 

Evaluating responses:  85%|████████▌ | 85/100 [03:38<00:42,  2.82s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response effectively addresses the query by explaining how the 988 Lifeline provides crisis support, making it relevant to the question asked.
    - Semantic Similarity: The response captures the essence of the reference answer by mentioning the connection to trained counselors and the 24/7 service, though it elaborates further than the reference.
    - Accuracy: The information provided is accurate regarding the services offered by the 988 Lifeline and aligns with known facts about the organization.
    - Factual Correctness: The details about the Lifeline being free, confidential, and available 24/7 are factually correct and consistent with reliable sources.
    - Completeness: The response is complete as it covers multiple aspects of the Lifeline's services, including specialized lines and additional resources, which adds depth to the answer.
    - Clarity: The response is clear and well-structured, making it easy to 

Evaluating responses:  86%|████████▌ | 86/100 [03:40<00:38,  2.74s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is somewhat relevant as it discusses therapy as a coping technique for social anxiety, but it lacks mention of other common techniques that are typically included, such as mindfulness and gradual exposure.
    - Semantic Similarity: The response captures some of the intent of the reference answer by discussing therapy, but it does not align well with the broader range of techniques mentioned in the reference.
    - Accuracy: The information about therapy is accurate; however, the response misses other effective coping techniques, which reduces its overall accuracy in addressing the query.
    - Factual Correctness: The mention of neurofeedback and biofeedback is accurate but not widely recognized as common coping techniques for social anxiety, which could mislead readers.
    - Completeness: The response is incomplete as it does not cover a variety of coping techniques, omitting key methods such as mindfulness a

Evaluating responses:  87%|████████▋ | 87/100 [03:43<00:35,  2.75s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response mentions Dialectical Behavior Therapy (DBT) as a support option, which is relevant to the query about support for binge eating disorder. However, it does not address other potential support options, making it partially relevant.
    - Semantic Similarity: The response does not capture the essence of the reference answer, which highlights support groups and tailored nutrition counseling. The focus on DBT does not align well with the simpler concepts presented in the reference.
    - Accuracy: The mention of DBT is accurate in the context of treatment for binge eating disorder, but it does not encompass the broader range of support options available.
    - Factual Correctness: The information regarding DBT is factually correct, but the lack of mention of other support options diminishes the overall factual coverage.
    - Completeness: The response is incomplete as it only discusses DBT and omits other critical su

Evaluating responses:  88%|████████▊ | 88/100 [03:46<00:32,  2.72s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response directly addresses the importance of early intervention in children's mental health, aligning well with the query's intent. However, it could be more focused on the specific reasons why early intervention is critical.
    - Semantic Similarity: The response captures the essence of the reference answer, emphasizing prevention and development, but it does so in a more detailed manner. The core message is similar, though the phrasing differs.
    - Accuracy: The information provided is accurate and logically sound, explaining the benefits of early mental health intervention in children.
    - Factual Correctness: The facts presented are consistent with established knowledge about mental health interventions and their benefits, thus maintaining factual correctness.
    - Clarity: The response is clear and easy to understand, with a straightforward explanation of the importance of early mental health intervention.
  

Evaluating responses:  89%|████████▉ | 89/100 [03:48<00:28,  2.60s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is highly relevant to the query as it directly addresses the role of extreme mood swings in suicide risk, making it pertinent to the question asked.
    - Semantic Similarity: The response captures the essence of the reference answer by discussing emotional pain and its implications but does not directly mention the impact on relationships, which is a key point in the reference.
    - Accuracy: The information provided is accurate regarding the association between mood swings and suicide risk, aligning well with psychological understanding.
    - Factual Correctness: The response presents factually correct information consistent with reliable sources about the signs of suicide risk.
    - Completeness: The response is comprehensive, covering multiple aspects related to mood swings and their implications for suicide risk, including associated signs and the importance of seeking help.
    - Clarity: The response i

Evaluating responses:  90%|█████████ | 90/100 [03:50<00:24,  2.49s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is highly relevant as it directly addresses the query about how schools can help prevent self-harm among students by outlining multiple strategies and interventions.
    - Semantic Similarity: The response captures the intent of the reference answer, though it provides a more detailed approach rather than a direct semantic match. The broader strategies mentioned align with the idea of providing mental health support.
    - Accuracy: The information provided is accurate and reflects common practices in schools regarding mental health support and intervention.
    - Factual Correctness: The facts presented are consistent with established understandings of mental health support in educational settings and are logically sound.
    - Completeness: The response is comprehensive, covering various aspects of prevention strategies, including training for educators, crisis support, and promoting help-seeking behavior, whi

Evaluating responses:  91%|█████████ | 91/100 [03:52<00:21,  2.42s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is highly relevant as it directly addresses the differences between SAD and general depression, aligning well with the query's intent.
    - Semantic Similarity: The response captures the essence of the reference answer, highlighting the seasonal aspect of SAD compared to general depression, though it elaborates more than the reference.
    - Accuracy: The information presented is accurate, correctly noting the seasonal triggers of SAD and the various factors contributing to general depression.
    - Factual Correctness: The facts are consistent with established knowledge about SAD and general depression, making the response factually correct.
    - Completeness: The response is complete, covering key aspects such as symptoms, triggers, and the seasonal pattern of SAD, which enhances understanding.
    - Clarity: The response is clear and easy to understand, with well-structured sentences that convey the differe

Evaluating responses:  92%|█████████▏| 92/100 [03:55<00:18,  2.36s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant to the query as it discusses practices that can improve emotional and spiritual well-being. However, it could be more aligned with the simplicity aspect mentioned in the instruction.
    - Semantic Similarity: The response does not capture the essence of the reference answer, which emphasizes accessibility in daily life practices, while the RAG response focuses on specific techniques.
    - Accuracy: The information provided is accurate and reflects common practices that contribute to emotional and spiritual well-being.
    - Factual Correctness: The facts presented, such as the benefits of meditation, sleep, and exercise, are consistent with established knowledge in well-being.
    - Completeness: The response covers several practices but could benefit from a more holistic view that aligns with the simplicity and accessibility highlighted in the reference answer.
    - Clarity: The response is clear

Evaluating responses:  93%|█████████▎| 93/100 [03:57<00:16,  2.31s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it addresses the psychological factors contributing to eating disorders, aligning well with the query.
    - Semantic Similarity: The response captures the intent of the reference answer but goes into more detail, which slightly diverges from the succinctness of the reference.
    - Accuracy: The information provided is accurate and logically sound, identifying various psychological and social factors related to eating disorders.
    - Factual Correctness: The response presents verified and consistent facts about the contributing factors to eating disorders, consistent with reliable sources.
    - Completeness: The response is comprehensive, covering a wide range of factors, including genetic, psychological, and sociocultural aspects, which enhances its completeness compared to the reference answer.
    - Clarity: The response is clear and easy to understand, with well-structured sentences that co

Evaluating responses:  94%|█████████▍| 94/100 [04:00<00:14,  2.46s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it addresses support strategies for individuals with schizophrenia, but it could be more aligned with the specific strategies mentioned in the reference answer.
    - Semantic Similarity: There is some semantic similarity in that both responses discuss support strategies, but the RAG response is broader and does not closely match the key points from the reference answer.
    - Accuracy: The information provided in the RAG response is accurate and reflects common strategies for managing schizophrenia.
    - Factual Correctness: The strategies mentioned are factually correct and commonly recommended for individuals with schizophrenia.
    - Completeness: The response covers a variety of strategies but lacks the mention of therapy and family involvement, which are critical components noted in the reference answer.
    - Clarity: The response is clear and easy to understand, presenting the strategies 

Evaluating responses:  95%|█████████▌| 95/100 [04:02<00:12,  2.47s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it addresses the concern about a child's mental health and suggests consulting professionals. However, it focuses heavily on specific behaviors rather than outlining steps for caregivers.
    - Semantic Similarity: The response captures some intent from the reference answer by suggesting consultation with professionals but lacks the broader steps mentioned in the reference, such as maintaining communication.
    - Accuracy: The information regarding behaviors indicating potential mental health issues is accurate but does not fully align with the broader context of caregiver steps.
    - Factual Correctness: The response correctly identifies behaviors that could indicate mental health issues in children; however, it lacks the broader context of how caregivers should respond.
    - Completeness: The response is incomplete as it does not cover all essential aspects of the query, particularly the impo

Evaluating responses:  96%|█████████▌| 96/100 [04:05<00:09,  2.44s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is somewhat relevant as it addresses the impact of untreated OCD on life, but it lacks specific details that directly relate to daily life activities.
    - Semantic Similarity: The response captures the general idea of the reference answer but does not convey the same depth of meaning regarding how OCD specifically affects daily activities.
    - Accuracy: The statement is accurate in that it implies untreated OCD can dominate life, but it does not elaborate on what that entails, which may lead to misunderstandings.
    - Factual Correctness: The response is factually correct but lacks the necessary context to fully understand the implications of untreated OCD.
    - Completeness: The response is incomplete as it does not provide specific examples or details about how OCD affects daily life, which is essential to fully address the query.
    - Clarity: The response is clear in its wording, but its vagueness reg

Evaluating responses:  97%|█████████▋| 97/100 [04:07<00:07,  2.51s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is highly relevant as it directly addresses how parents can identify mental health concerns in children by listing specific signs and behaviors to look for.
    - Semantic Similarity: The RAG response captures the intent of the reference answer, expanding on it with additional details, while still aligning with the core message regarding signs of mental health issues.
    - Accuracy: The information provided is accurate and aligns with established indicators of mental health concerns in children, such as prolonged sadness and behavioral changes.
    - Factual Correctness: The facts presented in the RAG response are consistent with reliable knowledge on the topic, making it factually correct.
    - Completeness: The response is comprehensive, covering a range of signs and suggesting consultation with professionals, which adds depth to the answer.
    - Clarity: The response is clear and easy to understand, provid

Evaluating responses:  98%|█████████▊| 98/100 [04:10<00:04,  2.45s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it addresses the query about the causes of social anxiety disorder, mentioning genetics and learned behavior.
    - Semantic Similarity: The response captures the essence of the reference answer, although it is slightly more verbose and includes additional details that were not present in the reference.
    - Accuracy: The information provided is accurate, as it correctly identifies genetics and negative social experiences as potential causes of social anxiety disorder.
    - Factual Correctness: The facts presented are consistent with common understandings in psychology regarding social anxiety disorder, aligning with the reference answer.
    - Completeness: The response is somewhat complete but could be improved by mentioning that negative experiences can include more specific examples, as suggested by the reference answer.
    - Clarity: The response is clear and understandable, though the phr

Evaluating responses:  99%|█████████▉| 99/100 [04:12<00:02,  2.55s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it directly addresses the signs that indicate a child may need mental health support, aligning well with the query.
    - Semantic Similarity: The response captures the intent of the reference answer but offers a more detailed list rather than a summary, which somewhat diverges from the more concise nature of the reference.
    - Accuracy: The information provided is accurate and aligns with common indicators of mental health issues in children.
    - Factual Correctness: The facts presented are consistent with recognized signs of mental health concerns and are generally supported by reliable sources.
    - Completeness: The response is thorough, covering multiple signs of mental health issues, which adds to its completeness compared to the reference answer.
    - Clarity: The response is clear and easy to understand, with each sign listed in a straightforward manner.
    - Conciseness: While the 

Evaluating responses: 100%|██████████| 100/100 [04:14<00:00,  2.55s/sample]

Evaluation Output:
Feedback:
- RAG Response:
    - Relevance: The response is relevant as it addresses the query about biological factors contributing to mental health conditions.
    - Semantic Similarity: The meaning of the RAG response closely aligns with the reference answer, although it is slightly less formal in phrasing.
    - Accuracy: The information provided is accurate, identifying genes and brain chemistry as biological factors.
    - Factual Correctness: The facts presented are correct and consistent with established knowledge in mental health.
    - Completeness: The response is somewhat limited in scope; while it mentions two important factors, it does not explore other potential biological factors that could be relevant.
    - Clarity: The response is clear and easy to understand, without ambiguity.
    - Conciseness: The response is concise, effectively communicating the necessary information without unnecessary elaboration.

### Overall Scores:
- RAG Overall Score: 7





In [None]:
import json

with open("evaluation_results_rag_1000_100_sentence_transformers.json", "w") as f:
    json.dump(evaluations, f, indent=4)

In [None]:
results = pd.DataFrame(evaluations)

In [None]:
results[['RAG_Score']].describe()

Unnamed: 0,RAG_Score
count,100.0
mean,7.18
std,1.546452
min,1.0
25%,7.0
50%,8.0
75%,8.0
max,9.0


In [None]:
average_rag_score = results['RAG_Score'].mean()

print("Average RAG Score:", average_rag_score)

Average RAG Score: 7.18
