<a href="https://colab.research.google.com/github/suyashmarathe512/suyash/blob/main/Sentimental_Analysis_for_mental_health_using_NLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import csv
warnings.filterwarnings('ignore')

In [61]:
df = pd.read_csv('/content/drive/MyDrive/Combined Data.csv', on_bad_lines='skip')
df= df.sample(n=8627, random_state=42)

df= df.drop(columns=['Unnamed: 0'])
df.reset_index(drop=True, inplace=True)

df.head()

Unnamed: 0,statement,status
0,Just as the the title says. I feel like one is...,Depression
1,a blackened sky encroached tugging behind it m...,Depression
2,"It gives you insomnia, which in turn makes you...",Depression
3,"Hello all, I'm a new submitter to this channel...",Normal
4,Thank God the CB is over for Eid,Normal


In [62]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8627 entries, 0 to 8626
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   statement  8554 non-null   object
 1   status     8627 non-null   object
dtypes: object(2)
memory usage: 134.9+ KB


In [63]:
df.isnull().sum()

Unnamed: 0,0
statement,73
status,0


In [64]:
df.status.unique()

array(['Depression', 'Normal', 'Suicidal', 'Anxiety', 'Bipolar', 'Stress',
       'Personality disorder'], dtype=object)

In [65]:
mode_status = df['statement'].mode()[0]
df['statement'].fillna(mode_status, inplace=True)
print(df.isnull().sum())
print(mode_status[0])

statement    0
status       0
dtype: int64
w


In [66]:
# NLP Processing
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import string

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [67]:
# Text preprocessing function
def clean_text(text):
    text = text.lower() # lowercase
    text = "".join([char for char in text if char not in string.punctuation]) # remove punctuation
    text = " ".join([word for word in text.split() if word not in stopwords.words('english')]) # remove stopwords
    lemmatizer = WordNetLemmatizer() # lemmatize the words
    text = " ".join([lemmatizer.lemmatize(word) for word in text.split()])
    return text

In [68]:
df['clean_statement'] = df['statement'].apply(clean_text)

In [69]:
# Example: TF-IDF Vectorization
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['clean_statement'])
y = df['status']

In [94]:
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import  accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve, auc
from sklearn.feature_extraction.text import TfidfVectorizer

In [71]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [72]:
# Models to evaluate
model = {'SVM': SVC()}

In [73]:
# Hyperparameter tuning and cross-validation

best_accuracy = 0
param_grid = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']}
grid_search = GridSearchCV(SVC(), param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

y_pred = grid_search.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(classification_report(y_test, y_pred))


if accuracy > best_accuracy:
    best_accuracy = accuracy
    best_model = grid_search.best_estimator_

# The line containing 'z' has been removed as it serves no purpose
print(f"\nBest performing model: {type(best_model).__name__} with accuracy: {best_accuracy}")

Accuracy: 0.7271147161066048
                      precision    recall  f1-score   support

             Anxiety       0.78      0.83      0.80       121
             Bipolar       0.82      0.71      0.76        89
          Depression       0.64      0.74      0.69       514
              Normal       0.83      0.94      0.88       540
Personality disorder       0.92      0.26      0.40        43
              Stress       0.62      0.29      0.39        83
            Suicidal       0.65      0.51      0.57       336

            accuracy                           0.73      1726
           macro avg       0.75      0.61      0.64      1726
        weighted avg       0.72      0.73      0.71      1726


Best performing model: SVC with accuracy: 0.7271147161066048


['best_model.joblib']

In [75]:
loaded_model = joblib.load(model_filename)

In [102]:
def interactive_prediction_loop():
    print("Welcome to the prediction model! Type 'exit' to quit.")

    while True:
        user_input = input("Enter a statement for prediction: ")

        # Check for exit command
        if user_input.lower() in ['exit', 'quit']:
            print("Exiting the prediction loop. Goodbye!")
            break

        # Clean the user input
        cleaned_input = clean_text(user_input)

        # Vectorize the new data using the same vectorizer
        X_new = vectorizer.transform([cleaned_input])

        # Make predictions
        new_prediction = loaded_model.predict(X_new)

        # Print the prediction
        print(f"Statement: {user_input}")
        print(f"Prediction: {new_prediction[0]}")  # Assuming the prediction is a single value

# Start the interactive prediction loop
interactive_prediction_loop()

Welcome to the prediction model! Type 'exit' to quit.
Enter a statement for prediction: Well , I am been depressed.
Statement: Well , I am been depressed.
Prediction: Depression
Enter a statement for prediction: I think suicide be better option.
Statement: I think suicide be better option.
Prediction: Suicidal
Enter a statement for prediction: All wrong, back off dear, forward doubt. Stay in a restless and restless place
Statement: All wrong, back off dear, forward doubt. Stay in a restless and restless place
Prediction: Anxiety
Enter a statement for prediction: Work, despair, dread here is to another day of shit! least it is payday though there is not anything I can buy that will make me happy for more than 5minutes... another night of dreading tomorrow
Statement: Work, despair, dread here is to another day of shit! least it is payday though there is not anything I can buy that will make me happy for more than 5minutes... another night of dreading tomorrow
Prediction: Depression
Enter