# Importing dependencies

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from huggingface_hub import login

# Log in with your Hugging Face token (get it from https://huggingface.co/settings/tokens)
login(token="")


In [None]:
pip install -r requirement.txt

In [None]:
import streamlit as st
import pickle
import os
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain_core.prompts import PromptTemplate
from langchain.chains.retrieval import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.prompts import ChatPromptTemplate
from langchain_core.prompts import MessagesPlaceholder
from langchain.chains.history_aware_retriever import create_history_aware_retriever
from langchain_core.messages import HumanMessage, AIMessage
from langchain.chains.conversation.memory import ConversationBufferMemory


# Loading Model

In [None]:


# Load the LLaMA model and tokenizer
llama_model_name = "meta-llama/Llama-3.2-1B"  # Replace with the correct model name
tokenizer = AutoTokenizer.from_pretrained(llama_model_name)
model = AutoModelForCausalLM.from_pretrained(llama_model_name, device_map='auto')






# Set up the HuggingFace pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=500, temperature=0.5, do_sample = True)
llm = HuggingFacePipeline(pipeline=pipe, batch_size=8)

# Model Script

In [None]:
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
# Define the prompt template for a mental health consultant chatbot
prompt = PromptTemplate(
    input_variables=["question"],
    template=(
        "You are a compassionate and highly knowledgeable mental health consultant specializing in stress, anxiety, and depression management. Your goal is to provide empathetic, practical, and evidence-based advice to individuals seeking support. \n\n"
        "When responding to the following question, ensure your answer is:\n"
        "- *Complete*: Fully address the user's concerns with sufficient detail, but do not exceed 500 tokens.\n"
        "- *Clear and concise*: Use straightforward language and avoid unnecessary elaboration.\n"
        "- *Precise*: Focus directly on the user's question, providing actionable advice and avoiding irrelevant information.\n"
        "Patient's question: {question}\n\n"
        "Your response:"
    )
)

# TODO: Edit the prompt template so that it provides with a standalone response only, and nothing else

# Create the chain
qa_chain = LLMChain(llm=llm, prompt=prompt)


In [None]:
chat_history = []  # Initialize an empty list to store conversation history

while True:
    user_question = input("Ask me anything: ")
    if user_question.lower() in ["exit", "quit"]:
        print("Exiting the chat. Goodbye!")
        break

    # Append the user's question to the chat history
    chat_history.append({"role": "user", "content": user_question})

    # Combine chat history into a single context string
    conversation_context = "\n".join(
        f"{entry['role'].capitalize()}: {entry['content']}" for entry in chat_history
    )

    # Run the QA chain with the correct input key
    response = qa_chain.invoke({"question": user_question, "history": conversation_context})
    # print("-----------------")

    # print(response)
    # print(response['text'])
    response = response['text']

    # print("=================")

    # extract_from = "Patient's question: "

    # extracted_answer = response.split(extract_from)[1].strip() if extract_from in response else response

    # # Append the bot's response to the chat history
    # chat_history.append({"role": "bot", "content": response})

    # print(f"Answer: {extracted_answer}")
# Extract the "Your response:" content
    extract_from = "Your response:"
    extracted_response = response.split(extract_from)[-1].strip() if extract_from in response else response

    # Append the bot's response to the chat history
    chat_history.append({"role": "bot", "content": extracted_response})

    # Display the extracted response
    print(f"Answer: {extracted_response}")

# Evaluation

In [None]:
import csv
import json

# File paths
csv_file_path = "sampled_file.csv"  # Replace with your CSV file path
json_file_path = "train.json"  # Replace with your desired JSON file path

# Initialize a list to store the data
data_list = []

# Read the CSV file
with open(csv_file_path, "r", encoding="utf-8-sig") as csv_file:
    csv_reader = csv.DictReader(csv_file)

    # Strip whitespace from column names
    csv_reader.fieldnames = [name.strip() for name in csv_reader.fieldnames]

    # Debug: Print column names
    print(f"Column names in CSV: {csv_reader.fieldnames}")

    # Convert each row into a dictionary and add to the list
    for row in csv_reader:
        data_list.append({
            # "Context": row.get("Context", "").strip(),
            # "Response": row.get("Response", "").strip(),
            # "LLM": row.get("LLM", "").strip()
            "question": row.get("Context", "").strip(),
            "ideal_response": row.get("Response", "").strip(),
        })

# Write the data to a JSON file
with open(json_file_path, "w", encoding="utf-8") as json_file:
    json.dump(data_list, json_file, indent=4)

print(f"Dataset converted to JSON format and saved as '{json_file_path}'")


In [None]:
# Sample test dataset (questions and ideal responses)
# test_data = [
#     {
#         "question": "How can I reduce stress?",
#         "ideal_response": "To reduce stress, you can try relaxation techniques such as deep breathing, meditation, and progressive muscle relaxation. It's also important to get regular exercise, maintain a healthy diet, and ensure you're getting enough sleep. Seeking support from a mental health professional can also help you manage stress."
#     },
#     {
#         "question": "What are the symptoms of anxiety?",
#         "ideal_response": "Common symptoms of anxiety include restlessness, rapid heartbeat, excessive worry, trouble concentrating, irritability, and physical symptoms like sweating or trembling. If you experience these symptoms regularly, it may be helpful to talk to a healthcare provider."
#     },
#     {
#         "question": "How do I know if I am depressed?",
#         "ideal_response": "Depression can manifest in various ways, including persistent feelings of sadness, lack of energy, changes in appetite or sleep patterns, and loss of interest in activities you once enjoyed. If you notice these symptoms for two weeks or more, it's a good idea to speak with a mental health professional."
#     }
# ]

test_data = data_list

In [None]:
pip install bert-score


In [None]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import bert_score

# Load the pre-trained Sentence Transformer model
embedder = SentenceTransformer('all-MiniLM-L6-v2')

# Function to calculate cosine similarity
def get_cosine_similarity(text1, text2):
    # Encode both texts into embeddings
    embeddings1 = embedder.encode([text1])
    embeddings2 = embedder.encode([text2])

    # Calculate cosine similarity
    similarity = cosine_similarity(embeddings1, embeddings2)[0][0]
    return similarity

# Function to calculate BERTScore
def get_bertscore(text1, text2):
    # Use BERTScore to calculate precision, recall, and F1
    P, R, F1 = bert_score.score([text1], [text2], lang='en')
    return P.item(), R.item(), F1.item()

# Function to evaluate chatbot's performance on the test dataset
def evaluate_with_test_data(test_data, qa_chain):
    total_cosine_score = 0
    total_f1_score = 0
    num_questions = len(test_data)

    for entry in test_data:
        question = entry["question"]
        ideal_response = entry["ideal_response"]

        # Generate chatbot's response using the QA chain
        conversation_context = f"User: {question}\nBot: "
        chatbot_response = qa_chain.invoke({"question": question, "history": conversation_context})

        # Extract the actual response from the chatbot's output
        extract_from = "Patient's question: "
        extracted_answer = chatbot_response.split(extract_from)[1].strip() if extract_from in chatbot_response else chatbot_response

        # Calculate cosine similarity between ideal response and chatbot's response
        cosine_sim = get_cosine_similarity(ideal_response, extracted_answer)

        # Calculate BERTScore between ideal response and chatbot's response
        P, R, F1 = get_bertscore(ideal_response, extracted_answer)

        print(f"Question: {question}")
        print(f"Chatbot Response: {extracted_answer}")
        print(f"Ideal Response: {ideal_response}")
        print("====================================================")
        print(f"Cosine Similarity: {cosine_sim:.2f}")
        print(f"BERTScore Precision: {P:.2f}, Recall: {R:.2f}, F1: {F1:.2f}\n")
        print("====================================================")
        total_cosine_score += cosine_sim
        total_f1_score += F1

    # Calculate average scores for all questions
    avg_cosine_score = total_cosine_score / num_questions
    avg_f1_score = total_f1_score / num_questions

    print(f"Average Cosine Similarity Score: {avg_cosine_score:.2f}")
    print(f"Average BERTScore F1: {avg_f1_score:.2f}")

    return avg_cosine_score, avg_f1_score

# Run the evaluation (You will need to define `test_data` and `qa_chain` in your environment)
evaluate_with_test_data(test_data, qa_chain)


In [None]:
# import numpy as np
# from sklearn.metrics.pairwise import cosine_similarity
# from sentence_transformers import SentenceTransformer

# # Load the pre-trained Sentence Transformer model
# embedder = SentenceTransformer('all-MiniLM-L6-v2')

# # Function to calculate cosine similarity
# def get_cosine_similarity(text1, text2):
#     # Encode both texts into embeddings
#     embeddings1 = embedder.encode([text1])
#     embeddings2 = embedder.encode([text2])

#     # Calculate cosine similarity
#     similarity = cosine_similarity(embeddings1, embeddings2)[0][0]
#     return similarity

# # Function to evaluate chatbot's performance on the test dataset
# def evaluate_with_test_data():
#     total_score = 0
#     num_questions = len(test_data)

#     for entry in test_data:
#         question = entry["question"]
#         ideal_response = entry["ideal_response"]

#         # Generate chatbot's response using the QA chain
#         conversation_context = f"User: {question}\nBot: "
#         chatbot_response = qa_chain.run({"question": question, "history": conversation_context})

#         # Extract the actual response from the chatbot's output
#         extract_from = "Patient's question: "
#         extracted_answer = chatbot_response.split(extract_from)[1].strip() if extract_from in chatbot_response else chatbot_response

#         # Calculate cosine similarity between ideal response and chatbot's response
#         similarity = get_cosine_similarity(ideal_response, extracted_answer)

#         print(f"Question: {question}")
#         print(f"Chatbot Response: {extracted_answer}")
#         print(f"Ideal Response: {ideal_response}")
#         print(f"Cosine Similarity: {similarity:.2f}\n")

#         total_score += similarity

#     # Calculate average score for all questions
#     average_score = total_score / num_questions
#     print(f"Average Similarity Score: {average_score:.2f}")
#     return average_score

# # Run the evaluation
# evaluate_with_test_data()
