In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Import Library

In [None]:
import numpy as np
import pandas as pd
import re
import string
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchtext
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from nltk.tokenize import TweetTokenizer
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix

In [None]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# Hyper-parameters
sequence_length = 28
input_size = 50
hidden_size = 128
num_layers = 3
num_classes = 2
batch_size = 100
num_epochs = 25
learning_rate = 0.001

# Confusion Matrix

In [None]:
def plot_confusion_matrix(cm, classes):
  plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
  plt.title('Confusion matrix')
  plt.colorbar()
  tick_marks = np.arange(len(classes))
  plt.xticks(tick_marks, classes, rotation=45)
  plt.yticks(tick_marks, classes)

  thresh = cm.max() / 2.
  for i in range (cm.shape[0]):
    for j in range (cm.shape[1]):
      plt.text(j, i, cm[i, j], horizontalalignment="center", color="white" if cm[i, j] > thresh else "black")
      plt.tight_layout()
      plt.ylabel('True label')
      plt.xlabel('Predicted label')

# Glove Load

In [None]:
# The first time you run this will download a ~823MB file
glove = torchtext.vocab.GloVe(name="6B", dim=50, max_vectors=20000)

# Read Dataframe

In [None]:
df = pd.read_csv('/content/drive/MyDrive/sca3/Dataset 1.csv', encoding = "ISO-8859-1")

# Data Preprocessing Part

## Stopword Remove

In [None]:
#REMOVING THE STOP WORDS
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
stop = stopwords.words('english')
df['message'] = df['message'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
df

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Unnamed: 0,message,label
0,"real good moment. missssssssss much,",0
1,reading manga http://plurk.com/p/mzp1e,0
2,@comeagainjen http://twitpic.com/2y2lx - http:...,0
3,@lapcat Need send 'em accountant tomorrow. Odd...,0
4,ADD ME ON MYSPACE!!! myspace.com/LookThunder,0
...,...,...
10309,"No Depression G Herbo mood on, i'm done stress...",1
10310,What depression succumbs brain makes feel like...,1
10311,Ketamine Nasal Spray Shows Promise Against Dep...,1
10312,dont mistake bad day depression! everyone 'em!,1


## Url Remove

In [None]:
#REMOVING URL
def remove_urls(text):
    url_pattern = re.compile(r'https?://\S+|www\.\S+')
    return url_pattern.sub(r'', text)
df['message'] = df['message'].apply(lambda text: remove_urls(text))
df

Unnamed: 0,message,label
0,"real good moment. missssssssss much,",0
1,reading manga,0
2,@comeagainjen -,0
3,@lapcat Need send 'em accountant tomorrow. Odd...,0
4,ADD ME ON MYSPACE!!! myspace.com/LookThunder,0
...,...,...
10309,"No Depression G Herbo mood on, i'm done stress...",1
10310,What depression succumbs brain makes feel like...,1
10311,Ketamine Nasal Spray Shows Promise Against Dep...,1
10312,dont mistake bad day depression! everyone 'em!,1


## Mention Remove

In [None]:
def remove_mention(s):
    s = re.sub(r'@[\S]+', ' ', s)
    return s
df['message'] = df['message'].apply(lambda text: remove_mention(text))
df

Unnamed: 0,message,label
0,"real good moment. missssssssss much,",0
1,reading manga,0
2,-,0
3,"Need send 'em accountant tomorrow. Oddly, I ...",0
4,ADD ME ON MYSPACE!!! myspace.com/LookThunder,0
...,...,...
10309,"No Depression G Herbo mood on, i'm done stress...",1
10310,What depression succumbs brain makes feel like...,1
10311,Ketamine Nasal Spray Shows Promise Against Dep...,1
10312,dont mistake bad day depression! everyone 'em!,1


## Number Remove

In [None]:
def remove_number(s):
    s = ''.join(c for c in s if not c.isdigit())
    return s
df['message'] = df['message'].apply(lambda text: remove_number(text))
df

Unnamed: 0,message,label
0,"real good moment. missssssssss much,",0
1,reading manga,0
2,-,0
3,"Need send 'em accountant tomorrow. Oddly, I ...",0
4,ADD ME ON MYSPACE!!! myspace.com/LookThunder,0
...,...,...
10309,"No Depression G Herbo mood on, i'm done stress...",1
10310,What depression succumbs brain makes feel like...,1
10311,Ketamine Nasal Spray Shows Promise Against Dep...,1
10312,dont mistake bad day depression! everyone 'em!,1


## Punctuation Remove

In [None]:
#REMOVING PUNCTUATIONS
def remove_punctuations(text):
    for punctuation in string.punctuation:
        text = text.replace(punctuation, '') 
    return text
df['message'] = df['message'].apply(remove_punctuations)
df

Unnamed: 0,message,label
0,real good moment missssssssss much,0
1,reading manga,0
2,,0
3,Need send em accountant tomorrow Oddly I eve...,0
4,ADD ME ON MYSPACE myspacecomLookThunder,0
...,...,...
10309,No Depression G Herbo mood on im done stressin...,1
10310,What depression succumbs brain makes feel like...,1
10311,Ketamine Nasal Spray Shows Promise Against Dep...,1
10312,dont mistake bad day depression everyone em,1


## Converting Lowercase

In [None]:
#CONVERTING TO LOWERCASE
df['message'] = df['message'].str.lower()
df

Unnamed: 0,message,label
0,real good moment missssssssss much,0
1,reading manga,0
2,,0
3,need send em accountant tomorrow oddly i eve...,0
4,add me on myspace myspacecomlookthunder,0
...,...,...
10309,no depression g herbo mood on im done stressin...,1
10310,what depression succumbs brain makes feel like...,1
10311,ketamine nasal spray shows promise against dep...,1
10312,dont mistake bad day depression everyone em,1


## Stemming

In [None]:
#STEMMING
from nltk.stem.porter import PorterStemmer 

stemmer = PorterStemmer()
def stem_words(text):
    return " ".join([stemmer.stem(word) for word in text.split()])

df['message'] = df['message'].apply(lambda text: stem_words(text))
df

Unnamed: 0,message,label
0,real good moment missssssssss much,0
1,read manga,0
2,,0
3,need send em account tomorrow oddli i even ref...,0
4,add me on myspac myspacecomlookthund,0
...,...,...
10309,no depress g herbo mood on im done stress peop...,1
10310,what depress succumb brain make feel like neve...,1
10311,ketamin nasal spray show promis against depres...,1
10312,dont mistak bad day depress everyon em,1


## Empty Row Remove

In [None]:
df['message'].replace('', np.nan, inplace=True)

In [None]:
df

Unnamed: 0,message,label
0,real good moment missssssssss much,0
1,read manga,0
2,,0
3,need send em account tomorrow oddli i even ref...,0
4,add me on myspac myspacecomlookthund,0
...,...,...
10309,no depress g herbo mood on im done stress peop...,1
10310,what depress succumb brain make feel like neve...,1
10311,ketamin nasal spray show promis against depres...,1
10312,dont mistak bad day depress everyon em,1


In [None]:
df.dropna(subset=['message'], inplace=True)

In [None]:
df

Unnamed: 0,message,label
0,real good moment missssssssss much,0
1,read manga,0
3,need send em account tomorrow oddli i even ref...,0
4,add me on myspac myspacecomlookthund,0
5,sleepi good time tonight though,0
...,...,...
10308,mani suffer depress sad feel noth all persist ...,1
10309,no depress g herbo mood on im done stress peop...,1
10310,what depress succumb brain make feel like neve...,1
10311,ketamin nasal spray show promis against depres...,1
