# Labeling Different Mental Conditions from Text

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.multiclass import OneVsRestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report

In [2]:
multi_label_data = pd.read_csv('mental_health_conditions.csv')
multi_label_data.head()

Unnamed: 0.1,Unnamed: 0,statement,status
0,0,oh my gosh,Anxiety
1,1,"trouble sleeping, confused mind, restless hear...",Anxiety
2,2,"All wrong, back off dear, forward doubt. Stay ...",Anxiety
3,3,I've shifted my focus to something else but I'...,Anxiety
4,4,"I'm restless and restless, it's been a month n...",Anxiety


In [3]:
multi_label_data = multi_label_data.dropna(subset=['statement'])
label_encoder = LabelEncoder()
multi_label_data['encoded_status'] = label_encoder.fit_transform(multi_label_data['status'])

In [4]:
X = multi_label_data['statement']
y = multi_label_data['encoded_status']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
tfidf_vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

In [6]:
model = OneVsRestClassifier(MultinomialNB())
model.fit(X_train_tfidf, y_train)

In [7]:
def predict_condition(sentence, vectorizer, model, label_encoder):
    sentence_tfidf = vectorizer.transform([sentence])
    probabilities = model.predict_proba(sentence_tfidf)[0]
    condition_probabilities = {
        condition: prob for condition, prob in zip(label_encoder.classes_, probabilities)
    }
    return condition_probabilities

example_sentence = "I'm feeling very stressed and overwhelmed."
condition_probabilities = predict_condition(example_sentence, tfidf_vectorizer, model, label_encoder)

sorted_conditions = sorted(condition_probabilities.items(), key=lambda x: x[1], reverse=True)
for condition, probability in sorted_conditions:
    print(f"{condition}: {probability}")

Depression: 0.34419992252841797
Stress: 0.32326540283717964
Suicidal: 0.10406344965242126
Anxiety: 0.08786336329328841
Normal: 0.08556670159711788
Bipolar: 0.042247117901298585
Personality disorder: 0.012794042190276272
