In [None]:
import pandas as pd
from textblob import TextBlob
import nltk
from nltk.corpus import stopwords
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
nltk.download('stopwords')
nltk.download('punkt')
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
import re
import string
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, GlobalMaxPooling1D, Embedding, Dropout
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
df = pd.read_csv('Sentiment_analysis_dataset.csv')
df = df.dropna(subset=['Statement', 'Status'])
df = df.drop(df[df.duplicated()][df[df.duplicated()]['Status'].isin(['Normal', 'Depression'])].index)
def augment_text(text):
    try:
        blob = TextBlob(text)
        translated = blob.translate(to='fr').translate(to='en')
        return str(translated)
    except Exception as e:
        return text

df['augmented_statement'] = df.apply(lambda row: augment_text(row['Statement'])
                                     if row['Status'] in ['Anxiety', 'Stress','Bipolar','Personality disorder'] else None, axis=1)
translated_rows = df[df['augmented_statement'].notna()]
translated_rows['Statement'] = translated_rows['augmented_statement']

df = pd.concat([df, translated_rows[['Statement', 'Status']]])

df = df.drop(columns=['augmented_statement'])

stemmer = PorterStemmer()

def preprocess_text(text):
    text = text.lower()  # Lowercase text
    text = re.sub(r'\[.*?\]', '', text)  # Remove text in square brackets
    text = re.sub(r'https?://\S+|www\.\S+', '', text)  # Remove links
    text = re.sub(r'<.*?>+', '', text)  # Remove HTML tags
    text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text)  # Remove punctuation
    text = re.sub(r'\n', '', text)  # Remove newlines
    text = re.sub(r'\w*\d\w*', '', text)  # Remove words containing numbers
    return text

stop_words = set(stopwords.words('english'))

def remove_stopwords_and_stem(text):
    tokens = word_tokenize(text)
    tokens = [stemmer.stem(word) for word in tokens if word not in stop_words]
    return ' '.join(tokens)

df['Statement'] = df['Statement'].apply(lambda x: preprocess_text(x))
df['Statement'] = df['Statement'].apply(lambda x: remove_stopwords_and_stem(x))

label_encoder = LabelEncoder()
df['Status'] = label_encoder.fit_transform(df['Status'])

X = df['Statement']
y = df['Status']

tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(X)
X_seq = tokenizer.texts_to_sequences(X)
X_padded = pad_sequences(X_seq, maxlen=100)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  translated_rows['Statement'] = translated_rows['augmented_statement']


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X_padded, y, test_size=0.2, random_state=42)

In [9]:
# Build the CNN model
cnn_model = Sequential()
cnn_model.add(Embedding(input_dim=10000, output_dim=128))  # Removed input_length
cnn_model.add(Conv1D(filters=128, kernel_size=5, activation='relu'))
cnn_model.add(GlobalMaxPooling1D())
cnn_model.add(Dense(128, activation='relu'))
cnn_model.add(Dropout(0.5))
cnn_model.add(Dense(7, activation='softmax'))

# Compile the model
cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = cnn_model.fit(X_train, y_train, epochs=5, validation_split=0.2, batch_size=32)

Epoch 1/5
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 14ms/step - accuracy: 0.5279 - loss: 1.2617 - val_accuracy: 0.7243 - val_loss: 0.7025
Epoch 2/5
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 13ms/step - accuracy: 0.7677 - loss: 0.6280 - val_accuracy: 0.7850 - val_loss: 0.5744
Epoch 3/5
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 12ms/step - accuracy: 0.8490 - loss: 0.4054 - val_accuracy: 0.8019 - val_loss: 0.5390
Epoch 4/5
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 12ms/step - accuracy: 0.9096 - loss: 0.2527 - val_accuracy: 0.7983 - val_loss: 0.5980
Epoch 5/5
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 12ms/step - accuracy: 0.9435 - loss: 0.1627 - val_accuracy: 0.8010 - val_loss: 0.6492


In [11]:
from sklearn.metrics import classification_report, accuracy_score
import numpy as np

# Evaluate the model
loss, accuracy = cnn_model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy}")

# Predictions
y_pred = cnn_model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

# Evaluation
print("Accuracy Score:")
print(accuracy_score(y_test, y_pred_classes))

print("Classification Report:")
print(classification_report(y_test, y_pred_classes))

[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7971 - loss: 0.6407
Test Accuracy: 0.7985143065452576
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
Accuracy Score:
0.7985142857142857
Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.92      0.91      1089
           1       0.93      0.90      0.91       769
           2       0.71      0.65      0.68      2150
           3       0.90      0.91      0.91      2193
           4       0.75      0.86      0.80       331
           5       0.82      0.87      0.84       725
           6       0.62      0.66      0.64      1493

    accuracy                           0.80      8750
   macro avg       0.81      0.82      0.81      8750
weighted avg       0.80      0.80      0.80      8750



In [None]:
def predict_mental_health_status(text, max_len=100):
    # Tokenize the input text (convert text to sequences of integers)
    text_seq = tokenizer.texts_to_sequences([text])
    
    # Pad the sequences to ensure the input matches the expected input length
    text_padded = pad_sequences(text_seq, maxlen=max_len)
    
    prediction = cnn_model.predict(text_padded)  
    
    # Get the class with the highest probability (index of the predicted class)
    predicted_class_index = np.argmax(prediction, axis=1)[0] 
    
    status = label_encoder.inverse_transform([predicted_class_index])
    
    return status[0]

# Example usage
text_1 = "This is really getting out of control I feel exhausted from the constant voices in my head and my heart racing during social situations. I feel like everything I do, could have been done better, and that I am inadequate. My avoidance is getting worse to the point where I am unable to do anything at all. As a medical student, I chose to study this major without truly understanding the challenges that would come with it. Now, during my clinical rotations, every encounter with each patient feels like a nightmare. I constantly feel judged, and I fear harsh criticism is always one step away from being directed at me. This constant mental battle leaves me with no room to use the knowledge that I have worked so hard to achieve."
result = predict_mental_health_status(text_1)
print(f"The predicted mental health status for the given text is: {result}")

text_2 = "I am feeling great and excited about the future."
result = predict_mental_health_status(text_2)
print(f"The predicted mental health status for the given text is: {result}")

text_3 = "I am feeling very anxious and stressed about work."
result = predict_mental_health_status(text_3)
print(f"The predicted mental health status for the given text is: {result}")

text_4 = "I don't see a reason to continue living, I just wanna die."
result = predict_mental_health_status(text_4)
print(f"The predicted mental health status for the given text is: {result}")

text_5 = "I act impulsively and regret it later."
result = predict_mental_health_status(text_5)
print(f"The predicted mental health status for the given text is: {result}")

text_6 = "The problem is, the mistakes I’ve made are the kind I can’t fix because I won’t be on shift to address them. I’m not sure what to say to be helpful or to ease the tension. I honestly feel like all my coworkers hate me, just like everyone else has. Ive already had three panic attacks at work."
result = predict_mental_health_status(text_6)
print(f"The predicted mental health status for the given text is: {result}")

text_7 = "I’m feeling completely overwhelmed right now. The constant voices in my head and my racing heart in social situations are exhausting. It feels like everything I do could have been done better, and that I’m just not good enough. My avoidance is worsening to the point where I can hardly do anything at all. As a medical student, I chose this path without truly grasping the challenges it would bring. Now, during my clinical rotations, every interaction with a patient feels like a nightmare. I constantly feel judged, and I’m always afraid that harsh criticism is just around the corner. This ongoing mental struggle leaves me no space to fully apply the knowledge I’ve worked so hard to gain."
result = predict_mental_health_status(text_7)
print(f"The predicted mental health status for the given text is: {result}")

text_8 = "i dont want to live this life, i just want to lay down without thinking about this life"
result = predict_mental_health_status(text_8)
print(f"The predicted mental health status for the given text is: {result}")

text_9 = "I really hate thinking about positive things and cant implement my ideas in real life"
result = predict_mental_health_status(text_9)
print(f"The predicted mental health status for the given text is: {result}")

text_10 = "One moment, I feel on top of the world, full of energy and ideas, and the next, I'm completely drained and struggling to get out of bed."
result = predict_mental_health_status(text_10)
print(f"The predicted mental health status for the given text is: {result}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
The predicted mental health status for the given text is: Depression
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
The predicted mental health status for the given text is: Normal
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
The predicted mental health status for the given text is: Anxiety
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
The predicted mental health status for the given text is: Suicidal
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
The predicted mental health status for the given text is: Normal
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
The predicted mental health status for the given text is: Bipolar
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
The predicted mental health status for the given text is: Normal
[1m1/1[0m [32m━━━━━━━━━━━━━━━━