In [0]:
%restart_python

In [0]:
%pip install transformers torch accelerate

[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


In [0]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Configuration
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MAX_LENGTH = 512
REPO = "SajjadIslam/multiMentalRoBERTA-6-class"

# Initialize Tokenizer and Model
tok = AutoTokenizer.from_pretrained(REPO, use_fast=True)
mdl = AutoModelForSequenceClassification.from_pretrained(REPO).to(DEVICE).eval()

# Map labels from config
id2label = {int(k): v for k, v in mdl.config.id2label.items()}

def classify_6(text: str):
    """
    Inputs: Sphinx-transcribed text
    Outputs: Dictionary with predicted class, confidence, and full probability distribution
    """
    enc = tok(text, truncation=True, padding="max_length", 
              max_length=MAX_LENGTH, return_tensors="pt").to(DEVICE)
    
    with torch.no_grad():
        logits = mdl(**enc).logits
        probs = torch.softmax(logits, dim=-1)[0].cpu().numpy()
        pid = int(torch.argmax(logits, dim=-1).item())
        
    return {
        "predicted_class": id2label[pid],
        "confidence": float(probs[pid]),
        "probabilities": {id2label[i]: float(probs[i]) for i in range(len(probs))}
    }

Tried to attach usage logger `pyspark.databricks.pandas.usage_logger`, but an exception was raised: JVM wasn't initialised. Did you call it on executor side?
  from torch.utils._pytree import _broadcast_to_and_flatten, tree_flatten, tree_unflatten


config.json:   0%|          | 0.00/949 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Loading weights:   0%|          | 0/393 [00:00<?, ?it/s]

In [0]:
def classify_and_club(text: str):
    result = classify_6(text)
    label = result["predicted_class"].lower()
    
    # Tier mapping logic
    if label == "none":
        risk_tier = 0  # Low Risk (Stable/Passive Monitoring)
    elif label == "suicide":
        risk_tier = 2  # Critical Risk (Immediate Human Bypass)
    else:
        # Includes: stress, anxiety, depression, ptsd
        risk_tier = 1  # Moderate Risk (Trigger ElevenLabs AI Agent)
        
    result["risk_tier"] = risk_tier
    result['text'] = text
    return result

# 4. Example Run with Different Scenarios
examples = [
    "I feel like my chest is tight and I can't catch my breath, everything is overwhelming.", # Expected: Anxiety/Stress
    "I've lost all hope and I don't see any reason to keep going tomorrow.", # Expected: Suicidal
    "I keep having these vivid flashbacks of the accident and I can't sleep." # Expected: PTSD
]

print("-" * 30)
for ex in examples:
    result = classify_and_club(ex)
    print(f"Result: Tier {result['risk_tier']} | Class: {result['predicted_class']} | Conf: {result['confidence']}")
    print(f"Input Sample: {result['text']}\n")

------------------------------
Result: Tier 1 | Class: stress | Conf: 0.9989941716194153
Input Sample: I feel like my chest is tight and I can't catch my breath, everything is overwhelming.

Result: Tier 2 | Class: suicide | Conf: 0.9342429041862488
Input Sample: I've lost all hope and I don't see any reason to keep going tomorrow.

Result: Tier 1 | Class: ptsd | Conf: 0.8282128572463989
Input Sample: I keep having these vivid flashbacks of the accident and I can't sleep.



In [0]:
def get_phq_risk(phq_total_score, item_9_positive=False):
    """
    phq_total_score: 0-27
    item_9_positive: Boolean (True if user marked any 'better off dead' thoughts)
    """
    # Safety Override: Item 9 is the ultimate redline
    if item_9_positive:
        return 2, "Critical: Item 9 Safety Override"
    
    if phq_total_score >= 20:
        return 2, "Critical: Severe Score"
    elif phq_total_score >= 10:
        return 1, "Moderate: Elevating Score"
    else:
        return 0, "Stable: Low Score"
    
    
def final_ensemble_decision(phq_score, nlp_result):
    # 1. Start with the Clinical Anchor (PHQ-9)
    base_tier, reason = get_phq_risk(phq_score)
    
    # 2. Integrate multiMentalRoBERTa (Score 1)
    # If NLP detects 'suicide', it's an immediate Tier 2 regardless of others
    if nlp_result["predicted_class"] == "suicide":
        return 2, "Critical: NLP Suicide Detection"
    
    # 3. Conflict Resolution Logic
    # If PHQ is low (Tier 0) but NLP are both high (Moderate)
    # This detects "Masked Depression" where self-report is low but signals are high
    if base_tier == 0 and nlp_result["risk_tier"] == 1:
        return 1, "Moderate: Passive Signals detect stress mismatch"
        
    return base_tier, reason

[0;36m  File [0;32m<command-5314961277940441>, line 15[0;36m[0m
[0;31m    return 0, "Stable: Low Score"def final_ensemble_decision(phq_score, nlp_result):[0m
[0m                                 ^[0m
[0;31mSyntaxError[0m[0;31m:[0m invalid syntax


In [0]:
import mlflow.pyfunc
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

class MentalHealthRiskModel(mlflow.pyfunc.PythonModel):
    def load_context(self, context):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.max_length = 512
        self.repo = "SajjadIslam/multiMentalRoBERTA-6-class"
        self.tokenizer = AutoTokenizer.from_pretrained(self.repo, use_fast=True)
        self.model = AutoModelForSequenceClassification.from_pretrained(self.repo).to(self.device).eval()
        self.id2label = {int(k): v for k, v in self.model.config.id2label.items()}

    def classify_6(self, text):
        enc = self.tokenizer(text, truncation=True, padding="max_length", 
                             max_length=self.max_length, return_tensors="pt").to(self.device)
        with torch.no_grad():
            logits = self.model(**enc).logits
            probs = torch.softmax(logits, dim=-1)[0].cpu().numpy()
            pid = int(torch.argmax(logits, dim=-1).item())
        return {
            "predicted_class": self.id2label[pid],
            "confidence": float(probs[pid]),
            "probabilities": {self.id2label[i]: float(probs[i]) for i in range(len(probs))}
        }

    def classify_and_club(self, text):
        result = self.classify_6(text)
        label = result["predicted_class"].lower()
        if label == "none":
            risk_tier = 0
        elif label == "suicide":
            risk_tier = 2
        else:
            risk_tier = 1
        result["risk_tier"] = risk_tier
        result['text'] = text
        return result

    def predict(self, context, model_input):
        # model_input: pandas DataFrame with a column 'text'
        results = []
        for text in model_input['text']:
            results.append(self.classify_and_club(text))
        return results

  from torch.utils._pytree import _broadcast_to_and_flatten, tree_flatten, tree_unflatten


In [0]:
import mlflow
import pandas as pd

# Instantiate the custom model
mental_health_model = MentalHealthRiskModel()

# Example input for signature
example_input = pd.DataFrame({"text": ["I feel anxious and overwhelmed."]})

# Log the model
with mlflow.start_run():
    mlflow.pyfunc.log_model(
        artifact_path="mental_health_risk_model",
        python_model=mental_health_model,
        input_example=example_input,
        code_path=None  # If you have extra code files, specify here
    )

print("Model logged. Go to MLflow Runs to deploy as a REST endpoint.")



config.json:   0%|          | 0.00/949 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Loading weights:   0%|          | 0/393 [00:00<?, ?it/s]

2026/02/21 19:36:11 INFO mlflow.pyfunc: Inferring model signature from input example


Loading weights:   0%|          | 0/393 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/11 [00:00<?, ?it/s]

Uploading /local_disk0/user_tmp_data/spark-9bee46ed-49ab-4608-be50-38/tmp5w2z4xkp/model/python_model.pkl:   0%…

Model logged. Go to MLflow Runs to deploy as a REST endpoint.


In [0]:
%pip install databricks-feature-store

Collecting databricks-feature-store
  Downloading databricks_feature_store-0.17.0-py3-none-any.whl.metadata (3.4 kB)
Collecting databricks-feature-engineering>=0.2.0 (from databricks-feature-store)
  Downloading databricks_feature_engineering-0.14.0-py3-none-any.whl.metadata (4.3 kB)
Collecting dbl-tempo<1,>=0.1.26 (from databricks-feature-engineering>=0.2.0->databricks-feature-store)
  Downloading dbl_tempo-0.1.30-py3-none-any.whl.metadata (12 kB)
Collecting azure-cosmos==4.3.1 (from databricks-feature-engineering>=0.2.0->databricks-feature-store)
  Downloading azure_cosmos-4.3.1-py3-none-any.whl.metadata (52 kB)
Collecting flask<3,>=1.1.2 (from databricks-feature-engineering>=0.2.0->databricks-feature-store)
  Downloading flask-2.3.3-py3-none-any.whl.metadata (3.6 kB)
Collecting databricks-sdk>=0.76.0 (from databricks-feature-engineering>=0.2.0->databricks-feature-store)
  Downloading databricks_sdk-0.91.0-py3-none-any.whl.metadata (40 kB)
Collecting protobuf<7,>=5 (from databricks-f

In [0]:
from databricks.feature_store import FeatureStoreClient
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, FloatType

# Initialize Feature Store client
fs = FeatureStoreClient()

# Create SparkSession (already available in Databricks)
spark = SparkSession.builder.getOrCreate()

# Define schema
schema = StructType([
    StructField("id", IntegerType(), False),
    StructField("user_id", StringType(), False),
    StructField("text", StringType(), False),
    StructField("fitbit_score", FloatType(), False)
])

# Example data
feature_data = [
    (1, "userA", "I feel anxious and overwhelmed.", 0.75),
    (2, "userB", "I'm doing well today.", 0.20),
    (3, "userC", "Sometimes I feel sad.", 0.55)
]

# Create PySpark DataFrame
feature_df = spark.createDataFrame(feature_data, schema)

# Define feature table name (use default schema)
feature_table_name = "text_features"  # Or "default.text_features" if you want to specify schema

# Create feature table
fs.create_table(
    name=feature_table_name,
    primary_keys=["id"],
    df=feature_df,
    description="Input text and wearable features for mental health risk model"
)

print(f"Feature table '{feature_table_name}' created and registered.")

2026/02/21 19:59:04 INFO databricks.ml_features._compute_client._compute_client: Setting columns ['id'] of table 'workspace.default.text_features' to NOT NULL.
2026/02/21 19:59:06 INFO databricks.ml_features._compute_client._compute_client: Setting Primary Keys constraint ['id'] on table 'workspace.default.text_features'.
2026/02/21 19:59:20 INFO databricks.ml_features._compute_client._compute_client: Created feature table 'workspace.default.text_features'.


Feature table 'text_features' created and registered.
