In [1]:
import pandas as pd
import numpy as np
import re
import string
import joblib

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
import nltk
nltk.download('stopwords')

# ----------------------------
# 1. Load Dataset
# ----------------------------
# Replace this with your own labeled dataset
data = pd.DataFrame({
    'text': [
        "I feel hopeless and sad all the time.",
        "I'm constantly nervous and worried about everything.",
        "I can't concentrate, I’m under too much pressure.",
        "Life is good, I'm happy.",
        "Enjoyed the weekend with friends.",
        "My heart races and I feel panic all the time.",
        "Crying for no reason, just empty.",
        "Stressed about work, deadlines piling up.",
        "Feeling relaxed and at peace."
    ],
    'label': [
        'Depression',
        'Anxiety',
        'Stress',
        'Normal',
        'Normal',
        'Anxiety',
        'Depression',
        'Stress',
        'Normal'
    ]
})

# ----------------------------
# 2. Text Preprocessing
# ----------------------------
def preprocess(text):
    text = text.lower()
    text = re.sub(r"http\S+|www\S+|https\S+", '', text)
    text = re.sub(r'\@w+|\#','', text)
    text = re.sub(r'[^A-Za-z\s]', '', text)
    tokens = text.split()
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]
    stemmer = PorterStemmer()
    tokens = [stemmer.stem(word) for word in tokens]
    return " ".join(tokens)

data['clean_text'] = data['text'].apply(preprocess)

# ----------------------------
# 3. TF-IDF and Train Model
# ----------------------------
X = data['clean_text']
y = data['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

model = LogisticRegression(multi_class='ovr', max_iter=1000)
model.fit(X_train_vec, y_train)

# ----------------------------
# 4. Evaluation
# ----------------------------
y_pred = model.predict(X_test_vec)
print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\n🧾 Classification Report:\n", classification_report(y_test, y_pred))

# ----------------------------
# 5. Save Model
# ----------------------------
joblib.dump(model, "mental_health_model.joblib")
joblib.dump(vectorizer, "vectorizer.joblib")


✅ Accuracy: 0.0

🧾 Classification Report:
               precision    recall  f1-score   support

     Anxiety       0.00      0.00      0.00       1.0
      Normal       0.00      0.00      0.00       0.0
      Stress       0.00      0.00      0.00       1.0

    accuracy                           0.00       2.0
   macro avg       0.00      0.00      0.00       2.0
weighted avg       0.00      0.00      0.00       2.0



[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\sagni\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


['vectorizer.joblib']

In [2]:
import joblib
import numpy as np

# Load model and vectorizer
model = joblib.load("mental_health_model.joblib")
vectorizer = joblib.load("vectorizer.joblib")

def preprocess(text):
    text = text.lower()
    text = re.sub(r"http\S+|www\S+|https\S+", '', text)
    text = re.sub(r'\@w+|\#','', text)
    text = re.sub(r'[^A-Za-z\s]', '', text)
    tokens = text.split()
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]
    stemmer = PorterStemmer()
    tokens = [stemmer.stem(word) for word in tokens]
    return " ".join(tokens)

def predict_state(text):
    clean_text = preprocess(text)
    vec = vectorizer.transform([clean_text])
    prediction = model.predict(vec)[0]
    confidence = np.max(model.predict_proba(vec))
    return prediction, confidence

# 🔍 Test Examples
examples = [
    "I feel so anxious and nervous before the exam.",
    "Everything feels hopeless and I don't see a point in trying anymore.",
    "I’m completely stressed out with all these deadlines!",
    "I had a great day with my family and friends!",
]

for i, sentence in enumerate(examples, 1):
    label, confidence = predict_state(sentence)
    print(f"Example {i}: {sentence}")
    print(f"{label} ({confidence:.2f} confidence)\n")


Example 1: I feel so anxious and nervous before the exam.
Normal (0.43 confidence)

Example 2: Everything feels hopeless and I don't see a point in trying anymore.
Normal (0.39 confidence)

Example 3: I’m completely stressed out with all these deadlines!
Normal (0.45 confidence)

Example 4: I had a great day with my family and friends!
Normal (0.49 confidence)

