In [1]:
!wget https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py

In [27]:
import numpy as np
import minsearch
import json
from tqdm.auto import tqdm

import pandas as pd
from langchain_core.prompts import PromptTemplate,ChatPromptTemplate
from pydantic import BaseModel, Field
from typing import List

from langchain.output_parsers import PydanticOutputParser,RetryOutputParser
from langchain_core.exceptions import OutputParserException

from langchain_community.llms import Ollama

from datasets import load_dataset
from collections import defaultdict
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import re

In [5]:
QUERY_GENERATED_FILE = '../data/queries_generated.parquet'
GEMMA2_QUERY_FILE = "../data/gemma2_queries.parquet"
LLAMA3_QUERY_FILE = "../data/llama3_queries.parquet"
QUERY_EVALUATED_FILE = '../data/queries_evaluated.parquet'
RAG_EVALUDATION_RESULT_FILE = '../data/Evaluation_results_2.csv'

## Ingestion

In [111]:
df_org = pd.read_parquet(QUERY_EVALUATED_FILE)
df_org['true_answer'] = df_org['answers'].apply(
    lambda answer_list: ' '.join([answer['answer'] for answer in answer_list[:2] if 'answer' in answer]))

def prepare_df(df_org):
    
    ground_truth_gemma_df = df_org[['question_id','gemma_queries']]
    ground_truth_gemma_df =  ground_truth_gemma_df.explode(['gemma_queries'])
    
    ground_truth_llama_df = df_org[['question_id','llama3_queries']]
    ground_truth_llama_df =  ground_truth_llama_df.explode(['llama3_queries'])
    
    quest_ans_df = df_org[['question_id','question_title','question','true_answer']]
    
    df = df_org.drop(['gemma_queries','gemma_queries_llama3_feedback','gemma_queries_llama3_score',
                      'llama3_queries','llama3_queries_gemma_score','llama3_queries_gemma_feedback',
                     'true_answer'], axis=1)

    df_exploded = df.assign(answers=df['answers']).explode('answers')

    # Filter to only consider the first 2 answers for each query
    df_exploded['answer_index'] = df_exploded.groupby('question_id').cumcount()
    df_filtered = df_exploded[df_exploded['answer_index'] < 2].drop('answer_index', axis=1)

    # Expand the answers dictionary into separate columns
    df_expanded = df_filtered['answers'].apply(pd.Series)

    # Concatenate the original DataFrame with the expanded answers
    raw_df = pd.concat([df_filtered.drop('answers', axis=1), df_expanded], axis=1)
    
    return raw_df, quest_ans_df, ground_truth_gemma_df, ground_truth_llama_df



df,quest_ans_df, ground_truth_gemma_df, ground_truth_llama_df, = prepare_df(df_org)
df.to_csv("../services/app/Mental_wellness_data.csv",index=False)
documents = df.to_dict(orient='records')
documents[0]

{'question_id': 0,
 'question_title': 'Do I have too many issues for counseling',
 'question': 'I have so many issues to address. I have a history of sexual abuse, I’m a breast cancer survivor and I am a lifetime insomniac. I have a long history of depression and I’m beginning to have anxiety. I have low self esteem but I’ve been happily married for almost 35 years. I’ve never had counseling about any of this. Do I have too many issues to address in counseling',
 'question_link': 'https://counselchat.com/questions/do-i-have-too-many-issues-for-counseling',
 'topic': 'depression',
 'answer': 'It is very common for people to have multiple issues that they want to (and need to) address in counseling. I have had clients ask that same question and through more exploration, there is often an underlying fear that they "can\'t be helped" or that they will "be too much for their therapist." I don\'t know if any of this rings true for you. But, most people have more than one problem in their liv

### Create embeddings using pretrained models

In [95]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("multi-qa-MiniLM-L6-cos-v1")
q = "How can i get emotional support?"
v = model.encode(q)
len(v)

384

## RAG flow with MINSEARCH (text search)

In [102]:
from minsearch import Index

index = Index(
    text_fields=["question_title","question", "answer"],
    keyword_fields=["question_id"]
)

index.fit(documents)

def minsearch_search(q, boost):
    if boost is None:
        boost = {}
        
    results = index.search(
        query = q,
        filter_dict = {},
        boost_dict = boost,
        num_results = 5
    )
    return results

### LLM Config / Embedding setup

In [92]:
import os
from dotenv import load_dotenv
import openai

load_dotenv()
openai.api_key = os.getenv('OPENAI_API_KEY')

In [None]:
def build_prompt(query, search_results):
    prompt_template = """
    You're a therapist AI assistant focusing on responding to depression related user queries.
    Use only the facts from the CONTEXT when answering the QUESTION.

    QUESTION: {question}

    CONTEXT:
    {context}
    """.strip()

    context = ""
    #print(search_results)
    for doc in search_results:
        context += f"""
        Question Title: {doc['question_title']}
        Question: {doc['question']}
        Answer: {doc['answer']}
        Therapist : {doc['therapist_info']}
        """ 

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

def openai_4o(prompt):
    """
    Send the prompt to OpenAI and get the model's response. This uses detailed context
    to improve the quality of the AI's answer.
    """
    response = openai.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content


def openai_3_5(prompt):
    """
    Send the prompt to OpenAI and get the model's response. This uses detailed context
    to improve the quality of the AI's answer.
    """
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content


def llama3(prompt):
    """
    Send the prompt to llama3 from ollama and get the model's response. This uses detailed context
    to improve the quality of the AI's answer.
    """
    model = Ollama(model="llama3", temperature=0.7)

    # Generate response
    response = model.invoke(prompt, max_tokens=512) 
    print(response)
    print("*****")
    answer = re.sub(r'.*answer:', '', response).strip()
    answer = answer.replace('*','')
    
    return answer

def gemma2(prompt):
    """
    Send the prompt to gemma2 from ollama and get the model's response. This uses detailed context
    to improve the quality of the AI's answer.
    """
    model = Ollama(model="gemma2:2b", temperature=0.7)

    # Generate response
    response = model.invoke(prompt, max_tokens=512)
    response = response.replace('*','')
    return response


def rag(query, retrieval_search_function_name=minsearch_search, llm_name=llama3,boost = {}):
    if(retrieval_search_function_name==minsearch_search):
        search_results = retrieval_search_function_name(query,boost)
    else:
        search_results = retrieval_search_function_name(query)
    prompt = build_prompt(query, search_results)
    answer = llm_name(prompt)
    return answer

In [45]:
query = "How can i come out of depression?"
rag(query, minsearch_search)

I understand that you're struggling with depression and are looking for ways to come out of it. From the context, I want to emphasize that acceptance is a crucial first step. As Sherry Katz mentioned earlier, "accept your nervousness and restless sleep" as well as "accept too about feeling down." It's essential to acknowledge and accept your emotions rather than trying to fight them.

In addition to acceptance, it might be helpful to focus on small, manageable changes in your daily life. This could include finding something that remains the same after a breakup, like a hobby or a routine, as Lauren Ostrowski suggested. You may also want to consider reaching out to friends or family members who are supportive and can provide emotional validation.

Remember that depression is not something you can simply "snap out of." It's a serious condition that requires understanding, empathy, and professional help. If you're struggling with suicidal thoughts or feelings of worthlessness, please know

'I understand that you\'re struggling with depression and are looking for ways to come out of it. From the context, I want to emphasize that acceptance is a crucial first step. As Sherry Katz mentioned earlier, "accept your nervousness and restless sleep" as well as "accept too about feeling down." It\'s essential to acknowledge and accept your emotions rather than trying to fight them.\n\nIn addition to acceptance, it might be helpful to focus on small, manageable changes in your daily life. This could include finding something that remains the same after a breakup, like a hobby or a routine, as Lauren Ostrowski suggested. You may also want to consider reaching out to friends or family members who are supportive and can provide emotional validation.\n\nRemember that depression is not something you can simply "snap out of." It\'s a serious condition that requires understanding, empathy, and professional help. If you\'re struggling with suicidal thoughts or feelings of worthlessness, pl

## RAG flow with elasticsearch (vector search)

In [96]:
from elasticsearch import Elasticsearch

def connect_to_es():
    for _ in range(10):  # Retry up to 10 times
        try:
            es = Elasticsearch("http://localhost:9200", basic_auth=('elastic', 'DkIedPPSCb'))
            if es.ping():
                return es
        except Exception as e:
            print(f"Connection failed, retrying... ({e})")
            time.sleep(10)
    raise Exception("Failed to connect to Elasticsearch after several retries")
    

index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "answer": {"type": "text"},
            "question": {"type": "text"},
            "question_title": {"type": "text"},
            "therapist_info": {"type": "text"},
            "question_id": {"type": "keyword"},
            "question_title_vector": {
                "type": "dense_vector",
                "dims": 384,
                "index": True,
                "similarity": "cosine"
            },
            "question_vector": {
                "type": "dense_vector",
                "dims": 384,
                "index": True,
                "similarity": "cosine"
            },
            "answer_vector": {
                "type": "dense_vector",
                "dims": 384,
                "index": True,
                "similarity": "cosine"
            },
            "question_answer_vector": {
                "type": "dense_vector",
                "dims": 384,
                "index": True,
                "similarity": "cosine"
            },
        }
    }
}


def create_embeddings_index(es_client,index_name, filtered_documents):
    for doc in tqdm(documents):
        question_title = doc['question_title']
        question = doc['question']
        answer =  doc['answer']
        qa = question_title + '    ' + question + '    ' + answer
        
        doc['question_title_vector'] = model.encode(question_title)
        doc['question_vector'] = model.encode(question)
        doc['answer_vector'] = model.encode(answer)
        doc['question_answer_vector'] = model.encode(qa)

    for doc in tqdm(documents):
        es_client.index(index=index_name, document=doc)
        

In [97]:
es_client = connect_to_es()
es_client.info()

index_name = "mental_wellness_therapist"

es_client.indices.delete(index=index_name, ignore_unavailable=True)
es_client.indices.create(index=index_name, body=index_settings)

create_embeddings_index(es_client,index_name, documents)

  0%|          | 0/99 [00:00<?, ?it/s]

  0%|          | 0/99 [00:00<?, ?it/s]

### Vector Search ElasticSearch

In [98]:
def elastic_search_knn(field, vector):
    knn = {
        "field": field,
        "query_vector": vector,
        "k": 5,
        "num_candidates": 10000
    }
    
    search_query = {
    "knn": knn,
    "_source": ["answer","question", "question_title", "therapist_info", "question_id"]
    }
    
    es_results = es_client.search(
        index=index_name,
        body=search_query
    )
    
    result_docs = []
    
    for hit in es_results['hits']['hits']:
        result_docs.append(hit['_source'])
    return result_docs

def question_answer_vector_knn(question):
    v_q = model.encode(question)
    return elastic_search_knn('question_answer_vector', v_q)


def elastic_search_knn_combined(vector):
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": [
                    {
                        "script_score": {
                            "query": {
                                "match_all": {}  # No specific term, matching all documents
                            },
                            "script": {
                                "source": """
                                    cosineSimilarity(params.query_vector, 'question_title_vector') + 
                                    cosineSimilarity(params.query_vector, 'question_vector') + 
                                    cosineSimilarity(params.query_vector, 'answer_vector') + 
                                    cosineSimilarity(params.query_vector, 'question_answer_vector') + 
                                    1
                                """,
                                "params": {
                                    "query_vector": vector
                                }
                            }
                        }
                    }
                ]
            }
        },
        "_source": ["answer", "question", "question_title", "therapist_info", "question_id"] 
    }

    es_results = es_client.search(
        index=index_name,
        body=search_query
    )
    
    result_docs = []
    
    for hit in es_results['hits']['hits']:
        result_docs.append(hit['_source'])

    return result_docs

def question_answer_vector_knn_combined(question):
    v_q = model.encode(question)
    return elastic_search_knn_combined(v_q)

In [47]:
## Use elasticsearch for retrival and Generate answer with llama3
query = "How can i come out of depression?"
answer = rag(query,question_answer_vector_knn, llama3)
answer

I'm so glad you're reaching out for support! Based on our conversation, it seems like you've been experiencing a long list of difficulties, including feeling lonely, isolated, and struggling to cope with your emotions. It's understandable that you might be feeling overwhelmed and unsure about how to move forward.

In terms of coming out of depression, I want to emphasize the importance of self-care and taking things one step at a time. Remember that you're not alone in this process, and it's okay to ask for help when you need it. Consider reaching out to a therapist or counselor who can provide you with a safe and supportive space to work through your emotions.

It might also be helpful to focus on small, achievable goals and celebrate your successes along the way. This could include things like taking care of yourself physically (e.g., eating well, exercising), practicing mindfulness or meditation, or engaging in activities that bring you joy and fulfillment.

Remember, depression is 

"I'm so glad you're reaching out for support! Based on our conversation, it seems like you've been experiencing a long list of difficulties, including feeling lonely, isolated, and struggling to cope with your emotions. It's understandable that you might be feeling overwhelmed and unsure about how to move forward.\n\nIn terms of coming out of depression, I want to emphasize the importance of self-care and taking things one step at a time. Remember that you're not alone in this process, and it's okay to ask for help when you need it. Consider reaching out to a therapist or counselor who can provide you with a safe and supportive space to work through your emotions.\n\nIt might also be helpful to focus on small, achievable goals and celebrate your successes along the way. This could include things like taking care of yourself physically (e.g., eating well, exercising), practicing mindfulness or meditation, or engaging in activities that bring you joy and fulfillment.\n\nRemember, depress

In [48]:
## Use elasticsearch for retrival and Generate answer with gemma2
query = "How can i come out of depression?"
answer = rag(query,question_answer_vector_knn_combined, gemma2)
answer

"It's understandable to feel lost when struggling with depression.  While it's important to seek professional help, here are some steps you can take: \n\n Connect with others: While isolating may be tempting, try spending time with friends or family. Even small interactions can provide a sense of support and connection.\n Explore your interests: Rediscovering activities you enjoy can rekindle your motivation and spark joy in life.  Even if it's just for short periods. \n Practice self-care: Prioritize basic needs like sleep, nutrition, and exercise. These simple steps can have a significant impact on mood and energy levels. \n Reflect and seek professional help: Consider journaling about your thoughts and feelings. If you find yourself overwhelmed or unable to cope, don't hesitate to reach out to a therapist. They can provide guidance and support during this difficult time.\n\n\nRemember, reaching out for help is a sign of strength, not weakness. Be patient with yourself and know that 

In [99]:
## Use elasticsearch for retrival and Generate answer with openai
query = "How can i come out of depression?"
answer = rag(query,question_answer_vector_knn, openai_3_5)
answer

"To come out of depression, it's important to focus on self-care and find ways to rest and relax to regain emotional and physical strength. It's also beneficial to recognize your own strength in dealing with difficult situations and to consider seeking professional help such as counseling or talking to a doctor for further support. Remember that addressing depression is a process, and being gentle and kind towards yourself can help release tension and improve your overall well-being."

## Retrieval evaluation for minsearch and elasticsearch

In [49]:
def hit_rate(relevance_total):
    cnt = 0

    for line in relevance_total:
        if True in line:
            cnt = cnt + 1

    return cnt / len(relevance_total)
    
def mrr(relevance_total):
    total_score = 0.0

    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank] == True:
                total_score = total_score + 1 / (rank + 1)

    return total_score / len(relevance_total)


def evaluate(ground_truth, search_function, col_name, boost=None):
    relevance_total = []
    for q in tqdm(ground_truth):
        doc_id = q['question_id']
        if(search_function ==minsearch_search):
            results = search_function(q[col_name], boost)
        else:
            results = search_function(q[col_name])
            
        relevance = [d['question_id'] == doc_id for d in results]
        relevance_total.append(relevance)

    return {
        'hit_rate': hit_rate(relevance_total),
        'mrr': mrr(relevance_total),
    }

In [50]:
ground_truth = ground_truth_gemma_df.to_dict(orient='records')

print(evaluate(ground_truth, minsearch_search,'gemma_queries'))
print(evaluate(ground_truth, question_answer_vector_knn,'gemma_queries'))
print(evaluate(ground_truth, question_answer_vector_knn_combined,'gemma_queries'))


ground_truth = ground_truth_llama_df.to_dict(orient='records')
print(evaluate(ground_truth, minsearch_search,'llama3_queries'))
print(evaluate(ground_truth, question_answer_vector_knn,'llama3_queries'))
print(evaluate(ground_truth, question_answer_vector_knn_combined,'llama3_queries'))

  0%|          | 0/214 [00:00<?, ?it/s]

{'hit_rate': 0.6121495327102804, 'mrr': 0.6551401869158877}


  0%|          | 0/214 [00:00<?, ?it/s]

{'hit_rate': 0.8598130841121495, 'mrr': 1.0060747663551404}


  0%|          | 0/214 [00:00<?, ?it/s]

{'hit_rate': 0.8598130841121495, 'mrr': 1.0137071651090346}


  0%|          | 0/260 [00:00<?, ?it/s]

{'hit_rate': 0.5038461538461538, 'mrr': 0.5165384615384616}


  0%|          | 0/260 [00:00<?, ?it/s]

{'hit_rate': 0.7846153846153846, 'mrr': 0.8545512820512824}


  0%|          | 0/260 [00:00<?, ?it/s]

{'hit_rate': 0.7884615384615384, 'mrr': 0.8842307692307694}


## RAG Answer Generation

### Generate response with Elasticsearch retrival, Gemma2 and llama3

In [58]:
# ## Use elasticsearch for retrival on each row and generate the answer with Gemma2 and llama3
tqdm.pandas()  # To use progress bar with pandas

quest_ans_df['answer_gamma2'] = quest_ans_df.progress_apply(
    lambda row: rag(row['question'], question_answer_vector_knn_combined, gemma2),  axis=1)


quest_ans_df['answer_llama3'] = quest_ans_df.progress_apply(
    lambda row: rag(row['question'], question_answer_vector_knn_combined, llama3),  axis=1)

quest_ans_df_doc = quest_ans_df.to_dict(orient = 'records')




## RAG Evaluation

### LLM as judge using Gemma2 and OpenAI

In [70]:
Evaluation_template_answer_answer = """
You are an expert evaluator for a Retrieval-Augmented Generation (RAG) system.
Your task is to analyze the relevance of the generated answer compared to the original answer provided.
Based on the relevance and similarity of the generated answer to the original answer, you will classify
it as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Original Answer: {true_answer}
Generated Question: {question}
Generated Answer: {answer_llm}

Please analyze the content and context of the generated answer in relation to the original
answer and provide your evaluation in parsable JSON without using code blocks:

{{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a brief explanation for your evaluation]"
}}
""".strip()


Evaluation_template_quest_answer = """
You are an expert evaluator for a Retrieval-Augmented Generation (RAG) system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: {question}
Generated Answer: {answer_llm}

Please analyze the content and context of the generated answer in relation to the question
and provide your evaluation in parsable JSON without using code blocks:

{{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a brief explanation for your evaluation]"
}}
""".strip()

### Evaluation with OpenAI 

In [108]:
## Answer Genration and Evaluation with OpenAI for a single text query passeed
def extract_llm_response_relevance_score(query):
    answer_llm = rag(query,question_answer_vector_knn, openai_3_5)
    print(answer_llm)
    print("********")
    
    prompt = Evaluation_template_quest_answer.format(question=query, answer_llm=answer_llm)
    response_relevance_llm = openai_4o(prompt)
    print(response_relevance_llm)
    print("********")
    
    ## Cleanup the response
    cleaned_temp = response_relevance_llm.replace('```', '').replace('\n', '').replace('json', '').rstrip(',').strip()
    data = json.loads(cleaned_temp)
    relevance = data.get("Relevance")
    relevance_expl =data.get("Explanation")
    
    return answer_llm,relevance,relevance_expl
    
    
query = "How can i come out of depression?"
answer_llm,relevance,relevance_expl= extract_llm_response_relevance_score(query)
answer_llm,relevance,relevance_expl

To come out of depression, it's important to prioritize self-care and relaxation to regain your emotional and physical strength. Find ways to rest and relax that work for you. Upsetting incidents can be a way to push you towards new activities and people in your life. Remember, sometimes these events are meant to nudge you in a different direction. Start by taking care of yourself and finding activities that bring you peace and happiness.
********
{
  "Relevance": "PARTLY_RELEVANT",
  "Explanation": "The generated answer provides general advice on self-care and relaxation to help with depression, which is partly relevant to the question. However, it does not offer specific strategies or resources for overcoming depression, making it only partially relevant."
}
********


("To come out of depression, it's important to prioritize self-care and relaxation to regain your emotional and physical strength. Find ways to rest and relax that work for you. Upsetting incidents can be a way to push you towards new activities and people in your life. Remember, sometimes these events are meant to nudge you in a different direction. Start by taking care of yourself and finding activities that bring you peace and happiness.",
 'PARTLY_RELEVANT',
 'The generated answer provides general advice on self-care and relaxation to help with depression, which is partly relevant to the question. However, it does not offer specific strategies or resources for overcoming depression, making it only partially relevant.')

In [2]:
def extract_relevance(input_doc,template_name,answer_col):
    """
    Extracts relevance and explanation from the input document based on a provided evaluation template.

    Args:
        input_doc (list of dict): A list of dictionaries where each dictionary represents a record.
        template_name (str): Evaluation Template name 
        answer_col (str): Column name of LLM generated answer column 

    Returns:
        tuple: A tuple containing two lists:
            - relevance_type (list): A list of relevance values extracted from the response.
            - explanation_type (list): A list of explanation values extracted from the response.

    The function processes each record in `input_doc`, generates a prompt using `template_name`, 
    calls the `gemma2` function to get an answer, and then extracts the relevant fields from the 
    returned JSON data. The extracted relevance and explanation are appended to their respective lists.
    """
    
    relevance_type = []
    explanation_type = []
    ans = ""

    for rec in tqdm(input_doc):
        rec['answer_llm'] = rec[answer_col]
        prompt = template_name.format(**rec)
        ans = gemma2(prompt)
        
        cleaned_temp = ans.replace('```', '').replace('\n', '').replace('json', '').rstrip(',').strip()
        data = json.loads(cleaned_temp)

        # Extract the relevant fields
        relevance_type.append(data.get("Relevance"))
        explanation_type.append(data.get("Explanation"))
        
    return relevance_type,explanation_type


### Evaluation with gemma2 

In [88]:
# Testing for a single record
record =  quest_ans_df_doc[0]
question = record['question']
answer_llm = record['answer_llama3']
true_answer = record['true_answer']

# prompt = Evaluation_template_answer_answer.format(**record)
prompt = Evaluation_template_answer_answer.format(true_answer=true_answer, question=question, answer_llm=answer_llm)
ans = gemma2(prompt)
ans

'```json\n{\n  "Relevance": "RELEVANT",\n  "Explanation": "The generated answer demonstrates a good understanding of the original answer\'s content and provides relevant insights. It acknowledges the prevalence of multiple issues in therapy, highlights the therapist\'s role in prioritizing them, and discusses potential treatment approaches for depression, anxiety, and low self-esteem. This level of detail aligns well with the original question and its context."\n}\n``` \n'

#### Answer vs answer Relevance

In [73]:
# Call the extract_relevance function on each row, and extract the true answer to gemma2 answer relevance 
relevance,explanation = extract_relevance(quest_ans_df_doc, Evaluation_template_answer_answer,'answer_gamma2')
quest_ans_df['true_ans_gamma2_ans_relevance'] = relevance
quest_ans_df['true_ans_gamma2_ans_relevance'].value_counts()

  0%|          | 0/52 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  quest_ans_df['true_ans_gamma2_ans_relevance'] = relevance


true_ans_gamma2_ans_relevance
PARTLY_RELEVANT    30
RELEVANT           22
Name: count, dtype: int64

In [76]:
# Call the extract_relevance function on each row, and extract the true answer to llama3 answer relevance 
relevance,explanation = extract_relevance(quest_ans_df_doc, Evaluation_template_answer_answer,'answer_llama3')
quest_ans_df['true_ans_llama3_ans_relevance'] = relevance
quest_ans_df['true_ans_llama3_ans_relevance'].value_counts()

  0%|          | 0/52 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  quest_ans_df['true_ans_llama3_ans_relevance'] = relevance


true_ans_llama3_ans_relevance
RELEVANT           27
PARTLY_RELEVANT    25
Name: count, dtype: int64

#### Quest vs answer Relevance

In [68]:
# Call the extract_relevance function on each row, and extract the question to gemma2 answer relevance 
relevance2,explanation2 = extract_relevance(quest_ans_df_doc, Evaluation_template_quest_answer,'answer_gamma2')
quest_ans_df['quest_gamma2_ans_relevance'] = relevance2
quest_ans_df['quest_gamma2_ans_relevance'].value_counts()

  0%|          | 0/52 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  quest_ans_df['quest_gamma2_ans_relevance'] = relevance2


quest_gamma2_ans_relevance
RELEVANT           34
PARTLY_RELEVANT    16
NON_RELEVANT        2
Name: count, dtype: int64

In [82]:
# Call the extract_relevance function on each row, and extract the question to llama3 answer relevance 
relevance2,explanation2 = extract_relevance(quest_ans_df_doc, Evaluation_template_quest_answer,'answer_llama3')
quest_ans_df['quest_llama3_ans_relevance'] = relevance2
quest_ans_df['quest_llama3_ans_relevance'].value_counts()

  0%|          | 0/52 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  quest_ans_df['quest_llama3_ans_relevance'] = relevance2


quest_llama3_ans_relevance
RELEVANT           41
PARTLY_RELEVANT    10
NON_RELEVANT        1
Name: count, dtype: int64

In [83]:
quest_ans_df = quest_ans_df.drop(['true_answer'], axis=1)
quest_ans_df.to_csv(RAG_EVALUDATION_RESULT_FILE, index=False)


In [86]:
quest_ans_df.head(5)

Unnamed: 0,question_id,question_title,question,answer_gamma2,answer_llama3,quest_gamma2_ans_relevance,true_ans_gamma2_ans_relevance,true_ans_llama3_ans_relevance,quest_llama3_ans_relevance
0,0,Do I have too many issues for counseling,I have so many issues to address. I have a his...,It's common for people to have multiple issues...,"Based on the provided context, I would like to...",PARTLY_RELEVANT,PARTLY_RELEVANT,PARTLY_RELEVANT,PARTLY_RELEVANT
1,1,My apartment manager won't let me keep an emot...,I have been diagnosed with general anxiety and...,You mentioned having been diagnosed with gener...,I understand your frustration and concern abou...,RELEVANT,RELEVANT,RELEVANT,RELEVANT
2,2,I feel like my mother doesn't support me,My mother is combative with me when I say I do...,It sounds like you are dealing with a difficul...,I understand your frustration and concern abou...,PARTLY_RELEVANT,PARTLY_RELEVANT,PARTLY_RELEVANT,PARTLY_RELEVANT
3,3,Why do I feel like I don't belong anywhere,There are many people willing to lovingly prov...,It seems like you're struggling with a sense o...,I understand that you're feeling like you don'...,RELEVANT,PARTLY_RELEVANT,RELEVANT,RELEVANT
4,4,How can I help my girlfriend,My girlfriend just quit drinking and she becam...,Your girlfriend is experiencing depression aft...,I understand your concern about your girlfrien...,PARTLY_RELEVANT,PARTLY_RELEVANT,PARTLY_RELEVANT,RELEVANT
