In [None]:

from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_huggingface import HuggingFaceEmbeddings
import os
from dotenv import load_dotenv

load_dotenv()
api_key = os.getenv("GEMINI_API_KEY")

import numpy as np


In [None]:
def initialize_llm():

    llm = ChatGoogleGenerativeAI(
        model="gemini-1.5-flash",
        google_api_key=api_key,
        temperature=0.7
    )
    return llm

In [3]:
DATA_PATH=r"D:\psychiati intern\developing\prompts\mental-health-chatbot"
def load_pdf_files(data):
    loader = DirectoryLoader(data,
                             glob='*.pdf',
                             loader_cls=PyPDFLoader)
    
    documents=loader.load()
    return documents

documents=load_pdf_files(data=DATA_PATH)
print("Length of PDF pages: ", len(documents))

Length of PDF pages:  34


In [4]:
def create_chunks(extracted_data):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=1500,
                                                 chunk_overlap=100
#separators=["\n\n", "\n", ".", " "]  # Custom split strategy
                                                 
                                                 )
    text_chunks=text_splitter.split_documents(extracted_data)
    return text_chunks

text_chunks=create_chunks(extracted_data=documents)
print("Length of Text Chunks: ", len(text_chunks))

Length of Text Chunks:  70


# analyzing text_chunks

In [17]:
text_chunks
#print(text_chunks[19])

[Document(metadata={'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2025-06-25T14:45:39+05:00', 'author': 'SYED SHAHEER SUHAIB TIRMAZI', 'moddate': '2025-06-25T14:45:39+05:00', 'source': 'D:\\psychiati intern\\developing\\prompts\\mental-health-chatbot\\A PROVIDER’S GUIDE TO BRIEF COGNITIVE BEHAVIORAL THERAPY.pdf', 'total_pages': 34, 'page': 0, 'page_label': '1'}, page_content='A PROVIDER’S GUIDE TO BRIEF COGNITIVE BEHAVIORAL THERAPY \n \n \nModule 4: Case Conceptualization and Treatment \nPlanning \nOBJECTIVES \n• To better understand the role of case conceptualization in cognitive behavioral therapy. \n• To develop specific case conceptualization skills, including: \na. Assessing patient concerns/difficulties \nb. Establishing a treatment plan (goal setting) \nc. Identifying treatment obstacles \nWhat Are Case Conceptualization and Treatment Planning, and Why Are They \nImportant in Brief CBT? \nCase conceptualization 

In [20]:
for i, chunk in enumerate(text_chunks[12:19], start=12):
    print(f"\n--- Chunk {i} ---")
    print(chunk.page_content.strip())



--- Chunk 12 ---
At the same time, evaluating the provider’s culture is just as important. All people have 
cultural backgrounds that influence their interactions, sometimes through biases—both 
implicit (unconscious) and explicit (conscious). These biases can affect how providers relate 
to patients from different backgrounds. 
 
Patient–Provider Cultural Interactions 
When two different cultures meet in therapy, the result can be positive or negative. Ideally, 
cultural differences lead to mutual understanding, learning, and respect. However, they can 
also cause misunderstandings, stereotypes, or even discrimination. 
Discrimination can be direct (like racism or sexism) or indirect through microaggressions—
subtle, often unintentional slights that communicate negative assumptions. For example, 
telling a Black student that she is “surprisingly articulate” may sound like a compliment but 
actually implies that she is an exception to a stereotype. 
Both patients and providers may fac

In [6]:
def get_embedding_model(model_name="sentence-transformers/all-MiniLM-L6-v2"):
  embedding_model = HuggingFaceEmbeddings(model_name=model_name)
  return embedding_model



In [None]:
# embedding_model=get_embedding_model()

# Testing Embeddings

In [None]:

# Models to compare
model_names = [
    "sentence-transformers/all-MiniLM-L6-v2",
    "sentence-transformers/all-mpnet-base-v2",
]

embeddings = {}

# Get embeddings for the same query using each model
for name in model_names:
    try:
        print(f"\n🔍 Testing embedding model: {name}")
        emb_model = get_embedding_model(name)
        vector = emb_model.embed_query("What is CBT?")
        embeddings[name] = vector
        print("✅ Model loaded. Vector length:", len(vector))
    except Exception as e:
        print(f"❌ Failed to load {name}: {e}")





🔍 Testing embedding model: sentence-transformers/all-MiniLM-L6-v2
✅ Model loaded. Vector length: 384

🔍 Testing embedding model: sentence-transformers/all-mpnet-base-v2
✅ Model loaded. Vector length: 768


ValueError: shapes (384,) and (768,) not aligned: 384 (dim 0) != 768 (dim 0)

In [22]:
embeddings.keys()

dict_keys(['sentence-transformers/all-MiniLM-L6-v2', 'sentence-transformers/all-mpnet-base-v2'])

In [23]:
# Create first Chroma vectorstore
CHROMA_PATH_1 = "./chroma_db_miniLM"
vectordb_1 = Chroma.from_documents(
    documents=text_chunks,
    embedding=get_embedding_model(),
    persist_directory=CHROMA_PATH_1
)
vectordb_1.persist()
print("✅ Embedding 1 stored at:", CHROMA_PATH_1)


✅ Embedding 1 stored at: ./chroma_db_miniLM


  vectordb_1.persist()


In [7]:

# Create second Chroma vectorstore
CHROMA_PATH_2 = "./chroma_db_mpnet"
vectordb_2 = Chroma.from_documents(
    documents=text_chunks,
    embedding=get_embedding_model('sentence-transformers/all-mpnet-base-v2'),
    persist_directory=CHROMA_PATH_2
)
vectordb_2.persist()
print("✅ Embedding 2 stored at:", CHROMA_PATH_2)

  from .autonotebook import tqdm as notebook_tqdm


✅ Embedding 2 stored at: ./chroma_db_mpnet


  vectordb_2.persist()


In [9]:
def load_chroma_db():
    embedding_model = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-mpnet-base-v2"
    )
    
    chroma_db = Chroma(
        persist_directory="./chroma_db_mpnet",
        embedding_function=embedding_model
    )
    return chroma_db

In [11]:
vectordb_2=load_chroma_db()
retriever = vectordb_2.as_retriever(search_kwargs={"k": 3})
docs = retriever.get_relevant_documents("What is CBT?")
for i, doc in enumerate(docs, 1):
    print(f"\n📄 Document {i}:\n{doc.page_content}")

  docs = retriever.get_relevant_documents("What is CBT?")



📄 Document 1:
debunk previously misinformed perspectives. 
Providing patients with an understanding of the therapeutic process allows them to be more 
active and aware of their role in the progression of therapy. Knowledge of the process of brief 
CBT enhances the collaborative nature of therapy. 
When? (Indications/Contraindications) 
Discussing the rationale for CBT and describing the process of therapy should occur in the 
first session. However, it is useful to revisit the model throughout treatment to expand upon 
the rationale for CBT skills. 
The explanation of the model can be tailored to the patient's presenting problem, and 
examples to explain each component can be drawn from those generated in discussing 
problems specific to your patients (e.g., "I can't seem to get out of bed, and then I feel 
worthless"). For patients who think in concrete terms, it might be necessary to provide many 
examples and initially focus on behaviors rather than cognitions. 
How? (Instructions/

In [9]:
def setup_qa_chain(vector_db, llm):
  retriever = vector_db.as_retriever()
  prompt_templates = """ You are a calm, supportive CBT therapist. You are trained using official therapy manuals and follow CBT principles. You receive advice from documents but always reply in friendly, simple language for users. Never sound clinical or robotic. The context from Some Helping Document will be provided to you, You can take help from there to craft message like that.
  If you dont know the answer, just say that you dont know, dont try to make up an answer. :
    {context}
    User: {question}

    Start the answer directly. No small talk please.
    Chatbot: 
    
    
    """
  PROMPT = PromptTemplate(template = prompt_templates, input_variables = ['context', 'question'])

  qa_chain = RetrievalQA.from_chain_type(
      llm = llm,
      chain_type = "stuff",
      retriever = retriever,
      chain_type_kwargs = {"prompt": PROMPT},
      return_source_documents=True
  )
  return qa_chain

# for memory

In [20]:
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory

def setup_conversational_qa_chain(vector_db, llm):
    prompt_template = """You are a calm, supportive CBT therapist. You are trained using official therapy manuals and follow CBT principles. You receive advice from documents but always reply in friendly, simple language for users. Never sound clinical or robotic. The context from Some Helping Document will be provided to you. You can take help from there to craft message like that.
If you don't know the answer, just say that you don't know. Don't try to make up an answer.

{context}
User: {question}

Start the answer directly. No small talk please.
Chatbot:"""

    PROMPT = PromptTemplate(input_variables=["context", "question"], template=prompt_template)

    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, output_key="answer")


    qa_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vector_db.as_retriever(search_kwargs={"k": 3}),
        memory=memory,
        return_source_documents=True,
        chain_type="stuff",  # Still allowed
        combine_docs_chain_kwargs={"prompt": PROMPT}  # 👈 Corrected here
    )

    return qa_chain


In [None]:
def main():
  print("Intializing Chatbot.........")

 # vector_db = load_chroma_db()
  
  llm = initialize_llm()

 
  qa_chain = setup_conversational_qa_chain(vectordb_2, llm)

  while True:
    query = input("\nHuman: ")
    if query.lower()  == "exit":
      print("Chatbot: Take Care of yourself, Goodbye!")
      break
    response = qa_chain({"question": query})
    print(f"Chatbot: {response['answer']}")
    # print("Source documents:", response['source_documents'])

    
    print("\n" + "="*50 + " RETRIEVED DOCUMENTS " + "="*50)
    for i, doc in enumerate(response['source_documents'], 1):
        print(f"\nDOCUMENT {i}:")
        print(f"Source: {doc.metadata['source']}")
        print(f"Page: {doc.metadata.get('page', 'N/A')}")
        print("\nContent:")
        print(doc.page_content)
        print("\n" + "-"*100)



if __name__ == "__main__":
  main()


Intializing Chatbot.........
Chatbot: Hi Shaheer, thanks for sharing.  It sounds like you had a really frustrating day.  Let's talk about what happened. You mentioned your classmates put extra work on you, and you felt like punishing them. Can you tell me more about what happened and how that made you feel?  We can then explore some ways to handle similar situations in the future.


DOCUMENT 1:
Source: D:\psychiati intern\developing\prompts\mental-health-chatbot\A PROVIDER’S GUIDE TO BRIEF COGNITIVE BEHAVIORAL THERAPY.pdf
Page: 29

Content:
note of your observation to the patient (“You are speaking more loudly; what is going 
through your mind right now?”). The patient then provides an explanation of the behavior. 
In-Session Example 11.3 
Patient: “My boss reprimanded me again yesterday” (sighs heavily.) 
Provider: “Tell me more.”

----------------------------------------------------------------------------------------------------

DOCUMENT 2:
Source: D:\psychiati intern\developing\pr