### Dependencies

In [269]:
# utils 
import torch

# data
from datasets import load_dataset
from torchtext.data.utils import get_tokenizer
from torchtext.data import Field, LabelField, TabularDataset, BucketIterator

# model
import torch.nn as nn
import torch.nn.functional as F

# training and evaluation
import wandb
import pytorch_lightning as pl
from pytorch_lightning.metrics import Accuracy, Fbeta, Precision, Recall
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

In [32]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [339]:
import torchtext

In [340]:
torchtext.__version__

'0.6.0'

In [341]:
print(torch.__version__, pl.__version__)

1.5.1 1.0.3


### Models

### 1. GRU

In [253]:
class GRUClassfier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, padding_idx, hidden_size=768, num_layers=1, dropout=0.10, num_classes=3):
        super(GRUClassfier, self).__init__()
        
        # embedding layer
        self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dim, padding_idx=padding_idx)
        # gru module
        self.gru = nn.GRU(
            input_size=embedding_dim,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
#             dropout=dropout,
            bidirectional=True
        )
        
        # full connected layer as classifier
        self.fc = nn.Sequential(*[
            nn.Linear(in_features=2*num_layers*hidden_size, out_features=256),
            nn.ReLU(),
            nn.Linear(in_features=256, out_features=num_classes)
        ])
        
    def forward(self, x, hidden=None):
        
        batch_size = x.shape[0]
        
        # get the embedding
        embedded = self.embedding(x)
        
        # pass the embedding and initial hidden states to GRU (cell state will be same as hidden states) 
        _, outputs = self.gru(embedded, hidden)
        
        # outputs.shape -> [2*num_layers, batch_size, hidden_size] convert it into batch_first format
        outputs = outputs.permute(1, 0, 2)
        outputs = outputs.reshape(batch_size, -1)
#         print(outputs.shape)
        
        # last hidden states of the BidirectionalGRU will be passed to classifier will returns logits 
        logits = self.fc(outputs)
        return logits

#### Dataset preparation for GRU

In [310]:
# create fields
TEXT = Field(lower=True, tokenize="spacy", batch_first=True)
LABEL = LabelField(batch_first=True)

In [311]:
fields = [("id", None), ("Tweets", TEXT), ("Label", LABEL)]

In [312]:
# load the train data
train = TabularDataset(
    path="../dataset/train.csv",
    format="CSV",
    fields=fields,
    skip_header=True,

)
# load the test data
test = TabularDataset(
    path="../dataset/test.csv",
    format="CSV",
    fields=fields,
    skip_header=True,

)
# take some data from test for validation during training
test, val = test.split(split_ratio=0.8)

In [313]:
# build the vocabulary
TEXT.build_vocab(train)
LABEL.build_vocab(train)

In [314]:
# print(len(text.vocab), len(target.vocab))

In [315]:
## create data loaders
train_loader, val_loader, test_loader = BucketIterator.splits(
    datasets=(train, val, test),
    batch_size=64,
    device=device,
    shuffle=True,
    sort=False
)

In [316]:
for batch in train_loader:
    print(batch.Tweets.shape, batch.Label.shape)
    break

torch.Size([64, 35]) torch.Size([64])


In [317]:
gru = GRUClassfier(
    vocab_size=len(TEXT.vocab),
    embedding_dim=300,
    padding_idx=TEXT.vocab[TEXT.pad_token],
)

In [264]:
logits = gru(batch.Tweets)
print(logits.shape)

torch.Size([64, 1536])
torch.Size([64, 3])


In [259]:
# nn.GRU?

## Training 

In [333]:
class LightningModel(pl.LightningModule):
    
    def __init__(self, model):
        super(LightningModel, self).__init__()
        
        self.model = model

    def forward(self, x):
        logits  = self.model(x)
        return logits
    
    def configure_optimizers(self):
        return torch.optim.Adam(params=self.parameters(), lr=1e-1)
    
    def train_dataloader(self):
        return train_loader
    
    def training_step(self, batch, batch_idx):
        x, y = batch.Tweets, batch.Label
        y_ = self(x)
        loss = F.cross_entropy(y_, y)
        acc = accuracy_score(y, y_.argmax(dim=1))
        f1 = f1_score(y, y_.argmax(dim=1), average="macro")
        wandb.log({"loss":loss, "accuraccy":acc, "f1_score":f1})
        return {"loss":loss, "accuraccy":acc, "f1_score":f1}
    
    def val_dataloader(self):
        return val_loader
    
    def validation_step(self, batch, batch_idx):
        x, y = batch.Tweets, batch.Label
        y_ = self(x)
        loss = F.cross_entropy(y_, y)
        acc = accuracy_score(y, y_.argmax(dim=1))
        f1 = f1_score(y, y_.argmax(dim=1), average="macro")
        return {"val_loss":loss, "val_accuracy":torch.tensor([acc]), "val_f1":torch.tensor([f1])}
    
    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        avg_acc = torch.stack([x['val_accuracy'] for x in outputs]).mean()
        avg_f1 = torch.stack([x['val_f1'] for x in outputs]).mean()
        wandb.log({"val_loss":avg_loss, "val_accuracy":avg_acc, "val_f1":avg_f1})
        return {"val_loss":avg_loss, "val_accuracy":avg_acc, "val_f1":avg_f1}
    
    def test_dataloader(self):
        return test_loader
    
    def test_step(self, batch, batch_idx):
        x, y = batch.Tweets, batch.Label
        y_ = self(x)
        loss = F.cross_entropy(y_, y)
        acc = accuracy_score(y, y_.argmax(dim=1))
        f1 = f1_score(y, y_.argmax(dim=1), average="macro")
        precision = precision_score(y, y_.argmax(dim=1), average="macro")
        recall = recall_score(y, y_.argmax(dim=1), average="macro")
        return {"test_loss":loss, "test_precision":precision, "test_recall":recall, "test_acc":acc, "test_f1":f1}

In [334]:
model = LightningModel(model=gru)

In [335]:
logger = WandbLogger(
    name="gru",
    save_dir="./models/",
    project="hate-speech-detection",
    log_model=True,
)
early_stopping = EarlyStopping(
    monitor="val_accuracy",
    min_delta=0.005,
)
checkpoints = ModelCheckpoint(
    filepath="./models/{epoch}-{val_accuracy}",
    monitor="val_accuracy",
    save_top_k=1
)

In [336]:
trainer = pl.Trainer(
    logger=logger,
    checkpoint_callback=checkpoints,
    default_root_dir="./models/",
    max_epochs=1
)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores


In [337]:
trainer.fit(model)



VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

[34m[1mwandb[0m: wandb version 0.10.10 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade



  | Name  | Type         | Params
---------------------------------------
0 | model | GRUClassfier | 12 M  


Validation sanity check: 0it [00:00, ?it/s]torch.Size([64, 1536])
Validation sanity check:  50%|█████     | 1/2 [00:00<00:00,  8.24it/s]torch.Size([64, 1536])
Epoch 0:   0%|          | 0/212 [00:00<?, ?it/s]                      torch.Size([64, 1536])




Epoch 0:   0%|          | 1/212 [00:01<06:05,  1.73s/it, loss=1.089, v_num=7hkt]torch.Size([64, 1536])
Epoch 0:   1%|          | 2/212 [00:03<05:21,  1.53s/it, loss=11.069, v_num=7hkt]torch.Size([64, 1536])
Epoch 0:   1%|▏         | 3/212 [00:04<04:39,  1.34s/it, loss=11.908, v_num=7hkt]torch.Size([64, 1536])
Epoch 0:   2%|▏         | 4/212 [00:04<03:50,  1.11s/it, loss=18.305, v_num=7hkt]torch.Size([64, 1536])
Epoch 0:   2%|▏         | 5/212 [00:04<03:19,  1.04it/s, loss=15.485, v_num=7hkt]torch.Size([64, 1536])
Epoch 0:   3%|▎         | 6/212 [00:05<02:59,  1.15it/s, loss=13.110, v_num=7hkt]torch.Size([64, 1536])
Epoch 0:   3%|▎         | 7/212 [00:05<02:44,  1.25it/s, loss=11.378, v_num=7hkt]torch.Size([64, 1536])
Epoch 0:   4%|▍         | 8/212 [00:06<02:33,  1.33it/s, loss=10.081, v_num=7hkt]torch.Size([64, 1536])
Epoch 0:   4%|▍         | 9/212 [00:06<02:23,  1.41it/s, loss=9.065, v_num=7hkt] torch.Size([64, 1536])
Epoch 0:   5%|▍         | 10/212 [00:06<02:17,  1.47it/s, loss=8.

1

In [338]:
trainer.test()

Testing: 0it [00:00, ?it/s]torch.Size([64, 1536])
Testing:   2%|▏         | 1/41 [00:00<00:06,  6.43it/s]torch.Size([64, 1536])
Testing:   5%|▍         | 2/41 [00:00<00:05,  6.57it/s]

  _warn_prf(average, modifier, msg_start, len(result))


torch.Size([64, 1536])
Testing:   7%|▋         | 3/41 [00:00<00:05,  6.88it/s]torch.Size([64, 1536])
Testing:  10%|▉         | 4/41 [00:00<00:05,  6.97it/s]torch.Size([64, 1536])
Testing:  12%|█▏        | 5/41 [00:00<00:05,  7.19it/s]torch.Size([64, 1536])
Testing:  15%|█▍        | 6/41 [00:00<00:04,  7.23it/s]torch.Size([64, 1536])
Testing:  17%|█▋        | 7/41 [00:00<00:04,  6.94it/s]torch.Size([64, 1536])
Testing:  20%|█▉        | 8/41 [00:01<00:04,  6.76it/s]torch.Size([64, 1536])
Testing:  22%|██▏       | 9/41 [00:01<00:04,  6.80it/s]torch.Size([64, 1536])
Testing:  24%|██▍       | 10/41 [00:01<00:04,  6.61it/s]torch.Size([64, 1536])
Testing:  27%|██▋       | 11/41 [00:01<00:04,  6.83it/s]torch.Size([64, 1536])
Testing:  29%|██▉       | 12/41 [00:01<00:04,  6.69it/s]torch.Size([64, 1536])
Testing:  32%|███▏      | 13/41 [00:01<00:03,  7.27it/s]torch.Size([64, 1536])
Testing:  34%|███▍      | 14/41 [00:01<00:03,  7.40it/s]torch.Size([64, 1536])
Testing:  37%|███▋      | 15/41 [00:

[{'test_loss': 0.872868537902832,
  'test_precision': 0.21875,
  'test_recall': 0.3333333333333333,
  'test_acc': 0.65625,
  'test_f1': 0.2641509433962264},
 {'test_loss': 0.8728684186935425,
  'test_precision': 0.21875,
  'test_recall': 0.3333333333333333,
  'test_acc': 0.65625,
  'test_f1': 0.2641509433962264},
 {'test_loss': 0.8453578948974609,
  'test_precision': 0.22916666666666666,
  'test_recall': 0.3333333333333333,
  'test_acc': 0.6875,
  'test_f1': 0.2716049382716049},
 {'test_loss': 1.0694180727005005,
  'test_precision': 0.17708333333333334,
  'test_recall': 0.3333333333333333,
  'test_acc': 0.53125,
  'test_f1': 0.23129251700680273},
 {'test_loss': 0.7790889739990234,
  'test_precision': 0.25396825396825395,
  'test_recall': 0.32653061224489793,
  'test_acc': 0.75,
  'test_f1': 0.28571428571428575},
 {'test_loss': 0.8642175197601318,
  'test_precision': 0.21875,
  'test_recall': 0.3333333333333333,
  'test_acc': 0.65625,
  'test_f1': 0.2641509433962264},
 {'test_loss': 0.8