In [1]:
!git clone https://github.com/tiasa2/Sad_Depression_Classification.git

Cloning into 'Sad_Depression_Classification'...
remote: Enumerating objects: 22, done.[K
remote: Counting objects: 100% (22/22), done.[K
remote: Compressing objects: 100% (18/18), done.[K
remote: Total 22 (delta 7), reused 11 (delta 2), pack-reused 0[K
Unpacking objects: 100% (22/22), done.


In [2]:
!pip install transformers==3

Collecting transformers==3
  Downloading transformers-3.0.0-py3-none-any.whl (754 kB)
[?25l[K     |▍                               | 10 kB 26.1 MB/s eta 0:00:01[K     |▉                               | 20 kB 32.7 MB/s eta 0:00:01[K     |█▎                              | 30 kB 40.0 MB/s eta 0:00:01[K     |█▊                              | 40 kB 26.3 MB/s eta 0:00:01[K     |██▏                             | 51 kB 26.6 MB/s eta 0:00:01[K     |██▋                             | 61 kB 20.8 MB/s eta 0:00:01[K     |███                             | 71 kB 19.7 MB/s eta 0:00:01[K     |███▌                            | 81 kB 21.6 MB/s eta 0:00:01[K     |████                            | 92 kB 23.6 MB/s eta 0:00:01[K     |████▍                           | 102 kB 21.2 MB/s eta 0:00:01[K     |████▊                           | 112 kB 21.2 MB/s eta 0:00:01[K     |█████▏                          | 122 kB 21.2 MB/s eta 0:00:01[K     |█████▋                          | 133 kB 21

In [3]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import transformers
from transformers import AutoModel, BertTokenizerFast

# Use GPU
device = torch.device("cpu")

In [4]:
bert = AutoModel.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

Downloading:   0%|          | 0.00/433 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/440M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

In [5]:
class BERT_Arch(nn.Module):
    def __init__(self, bert):
      super(BERT_Arch, self).__init__()
      self.bert = bert 
      self.dropout = nn.Dropout(0.1)
      self.relu =  nn.ReLU()
      self.fc1 = nn.Linear(768,512)
      self.fc2 = nn.Linear(512,2)
      self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, sent_id, mask):
      _, cls_hs = self.bert(sent_id, attention_mask=mask)
      x = self.fc1(cls_hs)
      x = self.relu(x)
      x = self.dropout(x)
      x = self.fc2(x)
      x = self.softmax(x)
      return x

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
model = BERT_Arch(bert)
model.load_state_dict(torch.load('/content/drive/MyDrive/Sad_Depression_Classification/BERT_Baseline_weights.pt', map_location=device))

<All keys matched successfully>

In [8]:
df = pd.read_csv("./Sad_Depression_Classification/ACL_FINAL_Data.csv")
df.rename(columns = {'Text':'tweet', 'Label':'target'}, inplace = True)
df.target = df.target.astype(int)
df.head(20)

Unnamed: 0.1,Unnamed: 0,tweet,target
0,251,i feel guilty i wont be able to give this litt...,0
1,2295,There’s no point in even trying anymore. It’s ...,1
2,918,i was taught to complain and feel unhappy but ...,0
3,1990,There are probably serial killers who sleep ea...,1
4,1241,"His brown , nearly auburn hair clung to his sc...",0
5,1724,im feeling like the lunches are dull,0
6,726,i feel rotten but no amount of suggesting that...,0
7,1242,I am sad because some relations to friends are...,0
8,1132,The loss of a person I loved very much is some...,0
9,706,i forgive myself that i have accepted and allo...,0


In [9]:
train_text, temp_text, train_labels, temp_labels = train_test_split(df['tweet'], df['target'], 
                                                                    random_state=2018, 
                                                                    test_size=0.2, 
                                                                    stratify=df['target'])

# Using temp_text and temp_labels to create validation and test set
val_text, test_text, val_labels, test_labels = train_test_split(temp_text, temp_labels, 
                                                                random_state=2018, 
                                                                test_size=0.1, 
                                                                stratify=temp_labels)

In [10]:
def test_model(texts, max_seq_len=25):
  tokens_p = tokenizer.batch_encode_plus(
      texts,
      max_length = max_seq_len,
      pad_to_max_length=True,
      truncation=True,
      return_token_type_ids=False
  )
  p_seq = torch.tensor(tokens_p['input_ids'])
  p_mask = torch.tensor(tokens_p['attention_mask'])
  with torch.no_grad():
    preds = model(p_seq.to(device), p_mask.to(device))
    preds = preds.detach().cpu().numpy()
    return preds

In [11]:
def analyze(texts, labels):
  texts = texts.tolist()
  labels = labels.tolist()
  preds = test_model(texts)
  preds = np.argmax(preds, axis = 1)
  print(classification_report(labels, preds))

In [12]:
print("Test")
analyze(test_text, test_labels)

Test
              precision    recall  f1-score   support

           0       0.90      0.92      0.91        39
           1       0.88      0.85      0.87        27

    accuracy                           0.89        66
   macro avg       0.89      0.89      0.89        66
weighted avg       0.89      0.89      0.89        66

