In [None]:
!git clone https://github.com/tiasa2/Sad_Depression_Classification.git

Cloning into 'Sad_Depression_Classification'...
remote: Enumerating objects: 22, done.[K
remote: Counting objects: 100% (22/22), done.[K
remote: Compressing objects: 100% (18/18), done.[K
remote: Total 22 (delta 7), reused 11 (delta 2), pack-reused 0[K
Unpacking objects: 100% (22/22), done.


In [None]:
!pip install transformers==3

Collecting transformers==3
  Downloading transformers-3.0.0-py3-none-any.whl (754 kB)
[?25l[K     |▍                               | 10 kB 16.8 MB/s eta 0:00:01[K     |▉                               | 20 kB 9.4 MB/s eta 0:00:01[K     |█▎                              | 30 kB 8.0 MB/s eta 0:00:01[K     |█▊                              | 40 kB 7.4 MB/s eta 0:00:01[K     |██▏                             | 51 kB 4.1 MB/s eta 0:00:01[K     |██▋                             | 61 kB 4.3 MB/s eta 0:00:01[K     |███                             | 71 kB 4.2 MB/s eta 0:00:01[K     |███▌                            | 81 kB 4.7 MB/s eta 0:00:01[K     |████                            | 92 kB 5.0 MB/s eta 0:00:01[K     |████▍                           | 102 kB 4.0 MB/s eta 0:00:01[K     |████▊                           | 112 kB 4.0 MB/s eta 0:00:01[K     |█████▏                          | 122 kB 4.0 MB/s eta 0:00:01[K     |█████▋                          | 133 kB 4.0 MB/s eta 

In [None]:
# Importing libraries
import pandas as pd
import torch
import transformers
from torch.utils.data import Dataset, DataLoader
from transformers import DistilBertModel, DistilBertTokenizer
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import numpy as np

# Use GPU
device = torch.device("cpu")

In [None]:
class DistillBERTClass(torch.nn.Module):
    def __init__(self):
        super(DistillBERTClass, self).__init__()
        self.l1 = DistilBertModel.from_pretrained("distilbert-base-uncased")
        self.pre_classifier = torch.nn.Linear(768, 768)
        self.dropout = torch.nn.Dropout(0.3)
        self.classifier = torch.nn.Linear(768, 4)

    def forward(self, input_ids, attention_mask):
        output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask)
        hidden_state = output_1[0]
        pooler = hidden_state[:, 0]
        pooler = self.pre_classifier(pooler)
        pooler = torch.nn.ReLU()(pooler)
        pooler = self.dropout(pooler)
        output = self.classifier(pooler)
        return output

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased')
model = DistillBERTClass()
model.to(device)
model.load_state_dict(torch.load('/content/drive/MyDrive/Sad_Depression_Classification/DistilBert_Baseline_weights.pt', map_location=device))

Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/442 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/268M [00:00<?, ?B/s]

<All keys matched successfully>

In [None]:
df = pd.read_csv("./Sad_Depression_Classification/ACL_FINAL_Data.csv")
df.rename(columns = {'Text':'tweet', 'Label':'target'}, inplace = True)
df.target = df.target.astype(int)
df.head(20)

Unnamed: 0.1,Unnamed: 0,tweet,target
0,251,i feel guilty i wont be able to give this litt...,0
1,2295,There’s no point in even trying anymore. It’s ...,1
2,918,i was taught to complain and feel unhappy but ...,0
3,1990,There are probably serial killers who sleep ea...,1
4,1241,"His brown , nearly auburn hair clung to his sc...",0
5,1724,im feeling like the lunches are dull,0
6,726,i feel rotten but no amount of suggesting that...,0
7,1242,I am sad because some relations to friends are...,0
8,1132,The loss of a person I loved very much is some...,0
9,706,i forgive myself that i have accepted and allo...,0


In [None]:
def test_model(texts, MAX_LEN=512):
  tokens_p = tokenizer.batch_encode_plus(
      texts,
      add_special_tokens=True,
      max_length = MAX_LEN,
      pad_to_max_length=True,
      truncation=True,
      return_token_type_ids=False
  )
  p_seq = torch.tensor(tokens_p['input_ids'])
  p_mask = torch.tensor(tokens_p['attention_mask'])
  with torch.no_grad():
    preds = model(p_seq.to(device), p_mask.to(device))
    preds = preds.detach().cpu().numpy()
    return preds

In [None]:
def analyze(texts, labels):
  texts = texts.tolist()
  labels = labels.tolist()
  preds = test_model(texts)
  preds = np.argmax(preds, axis = 1)
  print(classification_report(labels, preds))

In [None]:
train_text, temp_text, train_labels, temp_labels = train_test_split(df['tweet'], df['target'], 
                                                                    random_state=2018, 
                                                                    test_size=0.2, 
                                                                    stratify=df['target'])

# Using temp_text and temp_labels to create validation and test set
val_text, test_text, val_labels, test_labels = train_test_split(temp_text, temp_labels, 
                                                                random_state=2018, 
                                                                test_size=0.1, 
                                                                stratify=temp_labels)

In [None]:
print("Test")
analyze(test_text, test_labels)

Test
              precision    recall  f1-score   support

           0       0.93      1.00      0.96        39
           1       1.00      0.89      0.94        27

    accuracy                           0.95        66
   macro avg       0.96      0.94      0.95        66
weighted avg       0.96      0.95      0.95        66

