In [1]:
# Install necessary packages
!pip install torch==2.0.1+cu118 torchvision==0.15.2+cu118 torchaudio==2.0.2+cu118 --index-url https://download.pytorch.org/whl/cu118
!pip install transformers accelerate sentencepiece protobuf scikit-learn ipywidgets tqdm unsloth trl datasets

Looking in indexes: https://download.pytorch.org/whl/cu118
Collecting torch==2.0.1+cu118
  Using cached https://download.pytorch.org/whl/cu118/torch-2.0.1%2Bcu118-cp310-cp310-linux_x86_64.whl (2267.3 MB)
Collecting triton==2.0.0 (from torch==2.0.1+cu118)
  Using cached https://download.pytorch.org/whl/triton-2.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (63.3 MB)
Installing collected packages: triton, torch
  Attempting uninstall: triton
    Found existing installation: triton 3.1.0
    Uninstalling triton-3.1.0:
      Successfully uninstalled triton-3.1.0
  Attempting uninstall: torch
    Found existing installation: torch 2.5.1
    Uninstalling torch-2.5.1:
      Successfully uninstalled torch-2.5.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
unsloth 2024.12.12 requires torch>=2.4.0, but you have torch 2.0.1+cu118 which is incomp

In [None]:
import torch
import os
import warnings
from unsloth import FastLanguageModel

# ----- Transformers for T5 -----
from transformers import (
    T5Tokenizer,
    T5ForConditionalGeneration
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [None]:
#########################################
# 1) LOAD THE T5 DIAGNOSIS MODEL
#########################################
T5_MODEL_PATH = "./flan_t5_xl_gpu_mental_health"  # Adjust as needed

print("Loading T5 model for diagnosis classification...")
t5_tokenizer = T5Tokenizer.from_pretrained(T5_MODEL_PATH, legacy=False)
t5_model = T5ForConditionalGeneration.from_pretrained(T5_MODEL_PATH)

# Move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
t5_model.to(device)
t5_model.eval()

def classify_mental_health(text: str, max_target_length=10) -> str:
    """
    Use the fine-tuned T5 model to classify mental health concern.
    Returns a diagnosis label such as "Anxiety", "Work-related stress", etc.
    """
    # Prepare input
    prep_text = "classify: " + text
    inputs = t5_tokenizer(
        prep_text,
        truncation=True,
        max_length=512,
        return_tensors="pt"
    ).to(device)

    # Generate output
    with torch.no_grad():
        outputs = t5_model.generate(
            **inputs,
            max_length=max_target_length,
            num_beams=4,
            early_stopping=True
        )
    label = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return label.strip()


In [None]:
#########################################
# 2) LOAD THE LLaMA-BASED RESPONSE MODEL
#########################################
from unsloth import FastLanguageModel
LLAMA_MODEL_PATH = "./LLAMA_3_3_70B_A100"  # Adjust as needed

print("Loading LLaMA-based model for response generation...")

# -------------
# EXAMPLE USING Unsloth "FastLanguageModel"
# -------------
try:
    # We are using 4-bit or 8-bit quantization for memory savings
    # Adjust as needed. Replace with your actual model name or path.
    load_in_4bit = True
    dtype = torch.float16
    max_seq_length = 2048

    # Load the base LLaMA model + LoRA adapters
    llama_model, llama_tokenizer = FastLanguageModel.from_pretrained(
        model_name     = LLAMA_MODEL_PATH,
        max_seq_length = max_seq_length,
        dtype          = dtype,
        load_in_4bit   = load_in_4bit,
    )

    # Make sure the model is in inference mode for speed
    llama_model.eval()
    llama_model.to(device)

    # If using Unsloth's recommended optimization for inference
    FastLanguageModel.for_inference(llama_model)

    def generate_empathetic_response(user_message: str, diagnosis_label: str) -> str:
        """
        Use the LLaMA-based model to generate a step-by-step plan that acknowledges
        the user's situation, provides actionable steps, and includes safety info if needed.
        """
        prompt = f"""Diagnosis: {diagnosis_label}

User's Input:
{user_message}

Your Response:
"""
        inputs = llama_tokenizer([prompt], return_tensors="pt").to(device)
        with torch.no_grad():
            outputs = llama_model.generate(
                **inputs,
                max_new_tokens=512,
                do_sample=True,
                top_p=0.9,
                temperature=0.8
            )
        # Decode and return
        return llama_tokenizer.decode(outputs[0], skip_special_tokens=True)

except Exception as e:
    print("Error loading LLaMA-based model with Unsloth. Details:", str(e))
    # You could fallback to Hugging Face Transformers or another method here.
    llama_model = None
    llama_tokenizer = None


In [None]:
#########################################
# 3) COMBINE THE PIPELINE
#########################################
def mental_health_assistant_pipeline(user_text: str) -> str:
    """
    1) Classify user_text into a mental health diagnosis using T5.
    2) Generate an empathetic, step-by-step response using LLaMA-based model.
    3) Return the final, combined output.
    """
    if not user_text or not user_text.strip():
        return "Invalid input. Please provide some text describing your concern."

    # Step 1: Classify with T5
    diagnosis_label = classify_mental_health(user_text)

    # Step 2: Generate response with LLaMA-based model
    if llama_model is None:
        # If the LLaMA model failed to load, return an error or fallback message
        return f"Diagnosis: {diagnosis_label}\n\nResponse generation is currently unavailable."
    
    response_text = generate_empathetic_response(user_text, diagnosis_label)

    # Step 3: Format final output
    final_output = (
        f"Diagnosis: {diagnosis_label}\n\n"
        f"Response:\n{response_text}"
    )
    return final_output


In [None]:
#########################################
# 4) EXAMPLE USAGE
#########################################
if __name__ == "__main__":
    # Example input from a user
    example_input = "Lately, I've been experiencing heightened levels of anxiety, particularly in social situations. Meeting new people or speaking in public causes intense feelings of nervousness, sweating, and racing thoughts. I would like to explore ways to overcome this social anxiety and improve my ability to connect with others on a deeper level."
    result = mental_health_assistant_pipeline(example_input)
    print("=================================================")
    print("FINAL OUTPUT FROM PIPELINE:")
    print(result)
    print("=================================================")