In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import re

# Load and preprocess data
df = pd.read_csv('merged_output.csv')

# Load positive and depressed words
with open('positive-words.txt', 'r') as file:
    positive_words = set(file.read().splitlines())

with open('depressedword.txt', 'r') as file:
    depressed_words = set(file.read().splitlines())

# Function to label the sentiment
def label_sentiment(text):
    words = set(text.lower().split())
    positive_count = len(words.intersection(positive_words))
    depressed_count = len(words.intersection(depressed_words))
    
    if depressed_count > positive_count:
        return 2  # Depressed
    elif positive_count > 0:
        return 1  # Positive
    else:
        return 0  # Neutral/Negative

# Create labels based on word counts
df['label'] = df['content'].apply(label_sentiment)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(df['content'], df['label'], test_size=0.2, random_state=42)

# Tokenize the text
max_words = 670000
max_len = 200

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

X_train_pad = pad_sequences(X_train_seq, maxlen=max_len)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len)

# Encode labels
le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

# Convert to categorical
y_train_cat = tf.keras.utils.to_categorical(y_train_encoded, num_classes=3)
y_test_cat = tf.keras.utils.to_categorical(y_test_encoded, num_classes=3)

# Define the RNN (LSTM) model
embedding_dim = 200

model = Sequential([
    Embedding(max_words, embedding_dim, input_length=max_len),
    LSTM(128, return_sequences=True),
    Dropout(0.5),
    LSTM(64),
    Dropout(0.5),
    Dense(3, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Print model summary
print(model.summary())



None


In [6]:
history = model.fit(X_train_pad, y_train_cat, 
                    epochs=3, 
                    batch_size=32, 
                    validation_split=0.1, 
                    verbose=1)


Epoch 1/3
[1m385/385[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m164s[0m 426ms/step - accuracy: 0.9927 - loss: 0.0233 - val_accuracy: 0.9496 - val_loss: 0.2678
Epoch 2/3
[1m385/385[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m168s[0m 436ms/step - accuracy: 0.9947 - loss: 0.0165 - val_accuracy: 0.9496 - val_loss: 0.3301
Epoch 3/3
[1m385/385[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m168s[0m 435ms/step - accuracy: 0.9967 - loss: 0.0134 - val_accuracy: 0.9481 - val_loss: 0.2867


In [8]:
loss, accuracy = model.evaluate(X_test_pad, y_test_cat, verbose=0)
print(f"Test accuracy: {accuracy:.4f}")

Test accuracy: 0.9456


In [12]:
import re
import numpy as np
from keras.preprocessing.sequence import pad_sequences

def predict_sentiment(text, confidence_threshold=0.6):
    # Preprocess the input text
    text = text.strip()  # Remove leading/trailing whitespace
    if not text:
        return "Input text is empty."

    # Tokenize and pad the input text
    sequence = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(sequence, maxlen=max_len)

    # Make the prediction
    prediction = model.predict(padded)
    predicted_class = np.argmax(prediction)
    confidence = prediction[0][predicted_class]

    # Rule-based corrections
    if re.search(r'\bnot\s+happy\b|\bunhappy\b', text.lower()):
        predicted_class = 0  # Correct "not happy" to negative
    
    if re.search(r"\bwish\s+.*?\bnever\s+born\b|\bdon't\s+want\s+to\s+live\b", text.lower()):
        predicted_class = 2  # Correct suicidal thoughts to depressive

    # Handle low confidence predictions
    if confidence < confidence_threshold:
        return f"Uncertain prediction. Confidence: {confidence:.2f}"
    
    # Class interpretations based on confidence
    if predicted_class == 2:
        if confidence >= 0.8:
            return f"The user might be severely depressed. Confidence: {confidence:.2f}"
        else:
            return f"The user might be mildly depressed. Confidence: {confidence:.2f}"
    elif predicted_class == 1:
        if confidence >= 0.8:
            return f"The user is in a very positive mood. Confidence: {confidence:.2f}"
        else:
            return f"The user is in a somewhat positive mood. Confidence: {confidence:.2f}"
    else:
        if confidence >= 0.8:
            return f"The user is in a very negative mood. Confidence: {confidence:.2f}"
        else:
            return f"The user is in a somewhat negative mood. Confidence: {confidence:.2f}"

# Example usage of the predict_sentiment function
examples = [
    "I feel so hopeless and sad all the time.",
    "I'm excited about my new job and looking forward to the future.",
    "I don't know if life is worth living anymore.",
    "I had a great day with my friends and family.",
    "This weather is terrible, and I'm having a bad day.",
    "I want to kill myself.",
    "I'm feeling a bit down today, but I'm sure things will get better.",
    "I feel like nothing is going right for me today.",
    "I'm really struggling to keep up, and it's frustrating.",
    "Today feels like a complete waste; I can’t get anything done.",
    "I'm overwhelmed, and it's dragging my mood down.",
    "I just don't have the energy for anything today.",
    "It's one of those days where nothing feels worth the effort."
]

for example in examples:
    print(f"Text: {example}")
    print(f"Prediction: {predict_sentiment(example)}\n")

Text: I feel so hopeless and sad all the time.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step
Prediction: The user might be severely depressed. Confidence: 1.00

Text: I'm excited about my new job and looking forward to the future.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Prediction: The user is in a very positive mood. Confidence: 1.00

Text: I don't know if life is worth living anymore.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Prediction: The user is in a very positive mood. Confidence: 1.00

Text: I had a great day with my friends and family.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Prediction: The user is in a very positive mood. Confidence: 1.00

Text: This weather is terrible, and I'm having a bad day.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Prediction: The user might be severely depressed. Confidence: 1.00

Text: I want to kill my

In [14]:
import numpy as np
import re

def contains_keywords(text, keywords):
    return any(keyword in text.lower() for keyword in keywords)

def predict_sentiment(text, confidence_threshold=0.6):
    sequence = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(sequence, maxlen=max_len)
    prediction = model.predict(padded)
    predicted_class = np.argmax(prediction)
    confidence = prediction[0][predicted_class]
    
    # Keyword analysis
    positive_keywords = ['excited', 'happy', 'great', 'wonderful', 'fantastic', 'proud', 'thrilled']
    negative_keywords = ['sad', 'hopeless', 'terrible', 'overwhelmed', 'dread']
    depressive_keywords = ['depressed', 'suicidal', 'worthless', 'empty', 'useless']

    if contains_keywords(text, positive_keywords):
        predicted_class = 1  # Force positive classification
    elif contains_keywords(text, depressive_keywords):
        predicted_class = 2  # Force depressive classification
    elif contains_keywords(text, negative_keywords):
        predicted_class = 0  # Force negative classification

    # Rule-based corrections
    if re.search(r'\bnot\s+happy\b|\bunhappy\b', text.lower()):
        predicted_class = 0  # Correct "not happy" to negative
    
    if re.search(r"\bwish\s+.*?\bnever\s+born\b|\bdon't\s+want\s+to\s+live\b", text.lower()):
        predicted_class = 2  # Correct suicidal thoughts to depressive

    if confidence < confidence_threshold:
        return f"Uncertain prediction. Confidence: {confidence:.2f}"
    
    if predicted_class == 2:
        if confidence >= 0.8:
            return f"The user might be severely depressed. Confidence: {confidence:.2f}"
        else:
            return f"The user might be mildly depressed. Confidence: {confidence:.2f}"
    elif predicted_class == 1:
        if confidence >= 0.8:
            return f"The user is in a very positive mood. Confidence: {confidence:.2f}"
        else:
            return f"The user is in a somewhat positive mood. Confidence: {confidence:.2f}"
    else:
        if confidence >= 0.8:
            return f"The user is in a very negative mood. Confidence: {confidence:.2f}"
        else:
            return f"The user is in a somewhat negative mood. Confidence: {confidence:.2f}"

# Test the model with some example conversations
examples = [
    "I feel so hopeless and sad all the time.",
    "I'm excited about my new job and looking forward to the future.",
    "I don't know if life is worth living anymore.",
    "I had a great day with my friends and family.",
    "This weather is terrible, and I'm having a bad day.",
    "I'm feeling a bit down today, but I'm sure things will get better.",
    "I'm really proud of what I accomplished today.",
    "Sometimes I wonder if I'll ever find my way.",
    "I had a productive meeting, but I still have a lot on my plate.",
    "Today was just another ordinary day.",
    "I'm thrilled about my upcoming vacation!",
    "I'm feeling overwhelmed by everything that's happening.",
    "I'm content with where I am in life right now.",
    "I can't shake off this feeling of dread.",
    "I had a fun time at the park with my friends last weekend.",
    "I'm useless",
    "I wish I was never born",
    "I don't want to live anymore",
    "I'm not happy"
]

for example in examples:
    print(f"Text: {example}")
    print(f"Prediction: {predict_sentiment(example)}\n")


Text: I feel so hopeless and sad all the time.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
Prediction: The user is in a very negative mood. Confidence: 1.00

Text: I'm excited about my new job and looking forward to the future.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Prediction: The user is in a very positive mood. Confidence: 1.00

Text: I don't know if life is worth living anymore.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Prediction: The user is in a very positive mood. Confidence: 1.00

Text: I had a great day with my friends and family.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Prediction: The user is in a very positive mood. Confidence: 1.00

Text: This weather is terrible, and I'm having a bad day.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Prediction: The user is in a very negative mood. Confidence: 1.00

Text: I'm feeling a bit do

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import re


In [None]:
# Load and preprocess data
df = pd.read_csv('merged_output.csv')

# Load positive and depressed words
with open('positive-words.txt', 'r') as file:
    positive_words = set(file.read().splitlines())

with open('depressedword.txt', 'r') as file:
    depressed_words = set(file.read().splitlines())