<a href="https://colab.research.google.com/github/savindumahasen/Depression-Detection-Using-Sentiment-Analysis/blob/main/DepressionDetectionUsingNLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
## import the main libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
## Import dataset and read that dataset

In [3]:
dataset=pd.read_csv("dataset.csv", encoding="latin-1")
dataset.head()

Unnamed: 0.1,Unnamed: 0,statement,status
0,0,oh my gosh,Anxiety
1,1,"trouble sleeping, confused mind, restless hear...",Anxiety
2,2,"All wrong, back off dear, forward doubt. Stay ...",Anxiety
3,3,I've shifted my focus to something else but I'...,Anxiety
4,4,"I'm restless and restless, it's been a month n...",Anxiety


In [4]:
## get the information of dataset

In [5]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53043 entries, 0 to 53042
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  53043 non-null  int64 
 1   statement   52681 non-null  object
 2   status      53043 non-null  object
dtypes: int64(1), object(2)
memory usage: 1.2+ MB


In [6]:
## check the null values of the dataset

In [7]:
dataset.isnull().sum()

Unnamed: 0,0
Unnamed: 0,0
statement,362
status,0


In [8]:
## split the dataset into train and test

In [9]:
from sklearn.model_selection import train_test_split
train_set,test_set=train_test_split(dataset, test_size=0.2, random_state=42)

In [10]:
train_set.shape, test_set.shape

((42434, 3), (10609, 3))

In [11]:
train_set.head()

Unnamed: 0.1,Unnamed: 0,statement,status
52275,52275,How do you make eye contact with people? I rea...,Anxiety
49099,49099,"Balancing school, social life and work How to ...",Stress
10636,10636,weed made me motivated and happy again. motivated,Depression
23796,23796,24/7 i cringe at my past thoughts omg its so p...,Suicidal
48863,48863,,Stress


In [12]:
test_set.head()

Unnamed: 0.1,Unnamed: 0,statement,status
22261,22261,Just as the the title says. I feel like one is...,Depression
41400,41400,a blackened sky encroached tugging behind it m...,Depression
20065,20065,"It gives you insomnia, which in turn makes you...",Depression
30036,30036,"Hello all, I'm a new submitter to this channel...",Normal
780,780,Thank God the CB is over for Eid,Normal


## Data Preprocessing

In [13]:
## Drop the unnecessary columns

In [14]:
train_set_new=train_set.drop('Unnamed: 0', axis=1)
train_set_new.head()

Unnamed: 0,statement,status
52275,How do you make eye contact with people? I rea...,Anxiety
49099,"Balancing school, social life and work How to ...",Stress
10636,weed made me motivated and happy again. motivated,Depression
23796,24/7 i cringe at my past thoughts omg its so p...,Suicidal
48863,,Stress


In [15]:
## Get the information of new_train_set

In [16]:
train_set_new.info()

<class 'pandas.core.frame.DataFrame'>
Index: 42434 entries, 52275 to 15795
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   statement  42157 non-null  object
 1   status     42434 non-null  object
dtypes: object(2)
memory usage: 994.5+ KB


In [17]:
## check the value counts of target variable

In [18]:
train_set_new['status'].value_counts()

Unnamed: 0_level_0,count
status,Unnamed: 1_level_1
Normal,13024
Depression,12304
Suicidal,8635
Anxiety,3109
Bipolar,2297
Stress,2112
Personality disorder,953


In [19]:
train_set['statement'].isnull().sum()

np.int64(277)

In [20]:
## Filling the missing values

In [21]:
statement_column=train_set_new['statement'].mode()[0]
statement_column

'what do you mean?'

In [22]:
train_set_new['statement']=train_set_new['statement'].fillna(statement_column)
train_set_new['statement'].head()

Unnamed: 0,statement
52275,How do you make eye contact with people? I rea...
49099,"Balancing school, social life and work How to ..."
10636,weed made me motivated and happy again. motivated
23796,24/7 i cringe at my past thoughts omg its so p...
48863,what do you mean?


In [24]:
## chech the null values of statement column after filling the null values

In [25]:
train_set_new['statement'].isnull().sum()

np.int64(0)

In [26]:
## text preprocessing

In [28]:
#### import main text preprocessing libraries

In [29]:
import string
import re

In [30]:
## convert the uppercase letters into lowercase letters

In [31]:
train_set_new['statement']=train_set_new['statement'].apply(lambda x:" ".join(x.lower() for x in x.split()))
train_set_new['statement'].head()

Unnamed: 0,statement
52275,how do you make eye contact with people? i rea...
49099,"balancing school, social life and work how to ..."
10636,weed made me motivated and happy again. motivated
23796,24/7 i cringe at my past thoughts omg its so p...
48863,what do you mean?


In [32]:
## Remove the links from text

In [35]:
train_set_new['statement']=train_set_new['statement'].apply(lambda x:" ".join(re.sub(r'https?:\/\/.*[\r\n]*','',x,flags=re.MULTILINE) for x in x.split()))
train_set_new['statement'].head()

Unnamed: 0,statement
52275,how do you make eye contact with people? i rea...
49099,"balancing school, social life and work how to ..."
10636,weed made me motivated and happy again. motivated
23796,24/7 i cringe at my past thoughts omg its so p...
48863,what do you mean?


In [36]:
## remove the punctuation marks

In [37]:
def remove_punctuation(text):
    for punctuations in string.punctuation:
        text=text.replace(punctuations,'')
    return text
train_set_new['statement']=train_set_new['statement'].apply(remove_punctuation)
train_set_new['statement'].head()

Unnamed: 0,statement
52275,how do you make eye contact with people i real...
49099,balancing school social life and work how to d...
10636,weed made me motivated and happy again motivated
23796,247 i cringe at my past thoughts omg its so pa...
48863,what do you mean


In [38]:
## remove the numbers from text

In [39]:
train_set_new['statement']=train_set_new['statement'].str.replace('\d+','',regex=True)
train_set_new['statement'].head()

Unnamed: 0,statement
52275,how do you make eye contact with people i real...
49099,balancing school social life and work how to d...
10636,weed made me motivated and happy again motivated
23796,i cringe at my past thoughts omg its so painf...
48863,what do you mean


In [40]:
## remove the stop words

In [44]:
## import the natural language tool kits
import nltk

In [45]:
## download the stopwords

In [46]:
nltk.download('stopwords', download_dir='model/stopwords')

[nltk_data] Downloading package stopwords to model/stopwords...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [47]:
## load the english stop words

In [50]:
with open("/content/model/stopwords/corpora/stopwords/english", 'r') as file:
  stopwords=file.read().splitlines()

In [51]:
stopwords

['a',
 'about',
 'above',
 'after',
 'again',
 'against',
 'ain',
 'all',
 'am',
 'an',
 'and',
 'any',
 'are',
 'aren',
 "aren't",
 'as',
 'at',
 'be',
 'because',
 'been',
 'before',
 'being',
 'below',
 'between',
 'both',
 'but',
 'by',
 'can',
 'couldn',
 "couldn't",
 'd',
 'did',
 'didn',
 "didn't",
 'do',
 'does',
 'doesn',
 "doesn't",
 'doing',
 'don',
 "don't",
 'down',
 'during',
 'each',
 'few',
 'for',
 'from',
 'further',
 'had',
 'hadn',
 "hadn't",
 'has',
 'hasn',
 "hasn't",
 'have',
 'haven',
 "haven't",
 'having',
 'he',
 "he'd",
 "he'll",
 'her',
 'here',
 'hers',
 'herself',
 "he's",
 'him',
 'himself',
 'his',
 'how',
 'i',
 "i'd",
 'if',
 "i'll",
 "i'm",
 'in',
 'into',
 'is',
 'isn',
 "isn't",
 'it',
 "it'd",
 "it'll",
 "it's",
 'its',
 'itself',
 "i've",
 'just',
 'll',
 'm',
 'ma',
 'me',
 'mightn',
 "mightn't",
 'more',
 'most',
 'mustn',
 "mustn't",
 'my',
 'myself',
 'needn',
 "needn't",
 'no',
 'nor',
 'not',
 'now',
 'o',
 'of',
 'off',
 'on',
 'once',
 'on

In [52]:
train_set_new['statement']=train_set_new['statement'].apply(lambda x:" ".join(x for x in x.split() if x not in stopwords))

In [53]:
train_set_new['statement'].head()

Unnamed: 0,statement
52275,make eye contact people really struggle person...
49099,balancing school social life work balance scho...
10636,weed made motivated happy motivated
23796,cringe past thoughts omg painful want die badl...
48863,mean


In [54]:
## Remove the influectional form from text

In [57]:
from nltk.stem import PorterStemmer

ps=PorterStemmer()
train_set_new['statement']=train_set_new['statement'].apply(lambda x:" ".join(ps.stem(x) for x in x.split()))
train_set_new['statement'].head()

Unnamed: 0,statement
52275,make eye contact peopl realli struggl person a...
49099,balanc school social life work balanc school w...
10636,weed made motiv happi motiv
23796,cring past thought omg pain want die badli cri...
48863,mean


In [58]:
## print the preprocessed train set

In [59]:
train_set_new.head()

Unnamed: 0,statement,status
52275,make eye contact peopl realli struggl person a...,Anxiety
49099,balanc school social life work balanc school w...,Stress
10636,weed made motiv happi motiv,Depression
23796,cring past thought omg pain want die badli cri...,Suicidal
48863,mean,Stress


In [60]:
## convert the preprocessed data into numerical vectors

In [61]:
#### Building the vocabulary

In [63]:
from collections import Counter
vocab=Counter()

In [64]:
vocab

Counter()

In [65]:
for text in train_set_new['statement']:
  vocab.update(text.split())


In [66]:
vocab

Counter({'make': 12327,
         'eye': 1135,
         'contact': 605,
         'peopl': 13575,
         'realli': 13074,
         'struggl': 2675,
         'person': 5499,
         'also': 5725,
         'work': 10768,
         'webcam': 2,
         'whenev': 805,
         'look': 5438,
         'feel': 36621,
         'like': 31690,
         'kind': 2646,
         'burn': 487,
         'induc': 111,
         'horribl': 1123,
         'panic': 1513,
         'told': 3885,
         'therapi': 2238,
         'person\x92': 6,
         'shoulder': 272,
         'instead': 1072,
         'end': 6788,
         'say': 6854,
         'abnorm': 78,
         'abl': 3052,
         'balanc': 165,
         'school': 4033,
         'social': 2231,
         'life': 18005,
         'attend': 237,
         'moment': 1587,
         'even': 15286,
         'possibl': 1478,
         'go': 21061,
         'weed': 423,
         'made': 3774,
         'motiv': 1275,
         'happi': 4707,
         'cring':

In [67]:
## save the vocabulary

In [68]:
def save_vocabulary(vocab,filename):
  data="\n".join(vocab)
  file=open(filename,'w',encoding='latin-1')
  file.write(data)
  file.close()

save_vocabulary(vocab,"model/vocabulary")