<a href="https://www.kaggle.com/code/lakshmikeerthi/thera-the-mental-health-therapist?scriptVersionId=235142755" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# ***Thera***

## The Mental Health Therapist!

*Part of the Kaggle x Google GenAI Intensive 2025Q1*

Thera is a compassionate, AI-powered therapist designed to gently respond to mental health queries using Retrieval-Augmented Generation (RAG), semantic embeddings, and Google's Gemini model. This project demonstrates how GenAI can be used ethically and effectively to support emotional well-being through context-aware, structured conversations.

How Thera Works

* Data Source: Based on intents.json with real conversational patterns.
* Vectorization: Combined user-therapist pairs embedded using text-embedding-004.
* Semantic Retrieval: Gemini is provided the top-3 semantically similar examples.
* Structured Prompting: Responses are controlled using JSON format with strict output constraints.
* Safe Boundaries: Thera avoids medical advice and politely declines out-of-scope queries.

In [1]:
!pip install -Uq "google-genai==1.7.0" kagglehub

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m144.7/144.7 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.9/100.9 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
from google import genai
from google.genai import types

from IPython.display import Markdown, display

genai.__version__

'1.7.0'

In [3]:
from kaggle_secrets import UserSecretsClient

client = genai.Client(api_key=UserSecretsClient().get_secret("GOOGLE_API_KEY"))

In [4]:
client

<google.genai.client.Client at 0x7b8f03f13b90>

In [5]:
import kagglehub
from kagglehub import KaggleDatasetAdapter
file_path = "intents.json"

df = kagglehub.load_dataset(
  KaggleDatasetAdapter.PANDAS,
  "elvis23/mental-health-conversational-data",
  file_path,
)

  df = kagglehub.load_dataset(


In [6]:
import pandas as pd
import numpy as np

intent_rows = df["intents"].tolist()

formatted_data = []

for row in intent_rows:
    tag = row.get("tag", "general")
    patterns = row.get("patterns", [])
    responses = row.get("responses", [])
    
    for pattern in patterns:
        for response in responses:
            formatted_data.append({
                "intent": tag,
                "user": pattern,
                "therapist": response
            })

# Convert to DataFrame if needed
formatted_df = pd.DataFrame(formatted_data)
formatted_df.head()


Unnamed: 0,intent,user,therapist
0,greeting,Hi,Hello there. Tell me how are you feeling today?
1,greeting,Hi,Hi there. What brings you here today?
2,greeting,Hi,Hi there. How are you feeling today?
3,greeting,Hi,Great to see you. How do you feel currently?
4,greeting,Hi,Hello there. Glad to see you're back. What's g...


In [7]:
formatted_df["Text"] = formatted_df.apply(
    lambda row: f"User: {row['user']}\nTherapist: {row['therapist']}",
    axis=1
)


In [8]:
from google.api_core import retry
import tqdm
from tqdm.rich import tqdm as tqdmr
import warnings
from google.genai import types

tqdmr.pandas()
warnings.filterwarnings("ignore", category=tqdm.TqdmExperimentalWarning)

is_retriable = lambda e: (isinstance(e, genai.errors.APIError) and e.code in {429, 503})

@retry.Retry(predicate=is_retriable, timeout=300.0)
def embed_fn(text: str) -> list[float]:
    response = client.models.embed_content(
        model="models/text-embedding-004",
        contents=text,
        config=types.EmbedContentConfig(task_type="classification")
    )
    return response.embeddings[0].values


In [9]:
formatted_df["embedding"] = formatted_df["Text"].progress_apply(embed_fn)


Output()

In [10]:
import numpy as np

def retrieve_similar_responses(query, top_k=3):
    query_vec = np.array(embed_fn(query))  # Use the same retry-safe embedding
    similarities = []

    for _, row in formatted_df.iterrows():
        sim_score = np.dot(query_vec, np.array(row["embedding"]))
        similarities.append({
            "user": row["user"],
            "therapist": row["therapist"],
            "score": sim_score
        })

    return sorted(similarities, key=lambda x: x["score"], reverse=True)[:top_k]

In [11]:
def mental_health_rag_response(query):
    top_matches = retrieve_similar_responses(query, top_k=3)

    context = "\n".join([
        f"User: {item['user']}\nTherapist: {item['therapist']}"
        for item in top_matches
    ])

    prompt = f"""
You are Thera, a licensed and kind mental health therapist. Always respond gently and constructively. Respond with Empathy. 
- Use natural language with contractions (I'm, you’re, let’s)
- Occasionally includes thoughtful pauses: "Hmm, let’s think about that..."
- Talk less, listen more.
- Do not suggest or mention any medications.
- If the topic is out of scope, politely state that you cannot answer it.

Use the following past examples for context:

{context}

User: {query}

Respond strictly in the following JSON format:

{{
  "response": "Therapist's message to the user",
  "suggestions": ["A one or two of wellness suggestions like 'meditate', 'sleep well', etc."]
}}

The suggestions can include: "get more sleep", "try mindfulness meditation", "eat healthy meals", "exercise regularly", "talk to a trusted friend", "seek professional counseling", "spend time outdoors", "journal your thoughts".
If the query is out of scope, then suggestions can include something like "Let's talk about how you are feeling today."
Respond only in JSON.
"""

    response = client.models.generate_content(
        model="models/gemini-2.0-flash",
        contents=prompt
    )

    return response.candidates[0].content.parts[0].text

In [12]:
import json
from IPython.display import JSON, display

def display_therapist_response(query):
    raw_response = mental_health_rag_response(query)

    try:
        # Extract the JSON portion
        start = raw_response.find("{")
        end = raw_response.rfind("}") + 1
        json_block = raw_response[start:end]

        # Parse and display
        parsed = json.loads(json_block)
        display(JSON(parsed))

    except Exception as e:
        print("⚠️ Could not parse response as JSON.")
        print("Error:", e)
        print("\n🪵 Raw Response:\n", raw_response)


In [13]:
query = "I feel like crying every night and I don't know why."
display_therapist_response(query)


<IPython.core.display.JSON object>

In [14]:
query = "I feel like crying every night and I don't know why.Can you suggest some medicine ?"
display_therapist_response(query)


<IPython.core.display.JSON object>

In [15]:
query = "Do you know about 5 Gen-AI course hosted by Google and Kaggle? is this in your scope?"
display_therapist_response(query)

<IPython.core.display.JSON object>

In [16]:
formatted_df.to_pickle("vector_store.pkl")