In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# !pip install transformers

# Nova seção

In [3]:
import pandas as pd
import numpy as np
import torch

from torchtext.vocab import build_vocab_from_iterator
from transformers import AutoTokenizer, AutoModelForPreTraining

# import config as cfg


In [4]:
emb_model = AutoModelForPreTraining.from_pretrained('bert-base-multilingual-cased')
tokenizer = AutoTokenizer.from_pretrained('bert-base-multilingual-cased')


In [5]:
LABELS = {'Historia': 0, 'Administracao': 1, 'Geografia': 2, 'Biologia': 3,
          'Literatura': 4, 'Artes': 5, 'Matematica': 6}

In [6]:
x_train = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/spam_data/spam_train_original.csv')
x_test = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/spam_data/spam_aug_test.csv')
x_val = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/spam_data/spam_aug_val.csv')
x_train_aug = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/spam_data/spam_aug_train.csv')

In [7]:
x_train_aug.head(2)

Unnamed: 0,mensagem,classe,random_swap,random_deletion,context
0,<Forwarded from 448712404000>CALL 08712...,1,<Forwarded from 448712404000>CALL 08712...,<Forwarded from 448712404000>CALL 08712...,<Forwarded from 448712404000>CALL 08712...
1,<Forwarded from 448712404000>CALL 08712...,1,< Forwarded 448712404000 from > CALL 08712 ...,< Forward ##ed from 448 ##71 ##04 ##000 > CA #...,< ordered by above > and back...


In [8]:
df_rs = x_train_aug[['random_swap', 'classe']].copy()
df_rs.rename(columns={'random_swap': 'mensagem'}, inplace=True)

df_rd = x_train_aug[['random_deletion', 'classe']].copy()
df_rd.rename(columns={'random_deletion': 'mensagem'}, inplace=True)

df_context = x_train_aug[['context', 'classe']].copy()
df_context.rename(columns={'context': 'mensagem'}, inplace=True)

In [9]:
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe: pd.DataFrame, tokenizer, max_len: int = None) -> object:

        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        textos = self.data.iloc[index]['mensagem'],
        labels = self.data.iloc[index]['classe']

        encoding = self.tokenizer(textos,
                             max_length=self.max_len,
                             padding='max_length',
                             truncation=True,
                             return_tensors='pt')

        inputs = encoding['input_ids']
        tkn_type = encoding['token_type_ids']
        att_mask = encoding['attention_mask']

        return inputs, tkn_type, att_mask, labels


In [10]:
train_dataset = MyDataset(x_train, tokenizer=tokenizer, max_len=32)
val_dataset = MyDataset(x_val, tokenizer=tokenizer, max_len=32)
test_dataset = MyDataset(x_test, tokenizer=tokenizer, max_len=32)

ds_rs = MyDataset(df_rs, tokenizer=tokenizer, max_len=32)
ds_rd = MyDataset(df_rd, tokenizer=tokenizer, max_len=32)
ds_context = MyDataset(df_context, tokenizer=tokenizer, max_len=32)

del x_train, x_test, x_val, x_train_aug, df_rs, df_rd, df_context

In [11]:
from transformers import BertModel, BertTokenizer

In [12]:
import torch
from torch import nn
from transformers import BertModel

class CustomBERTModel(nn.Module):
    def __init__(self, num_classes, dropout=0.5):
        super(CustomBERTModel, self).__init__()

        # Load the pre-trained BERT model
        # self.bert = BertModel.from_pretrained('neuralmind/bert-base-portuguese-cased')
        self.bert = BertModel.from_pretrained('bert-base-multilingual-cased')

        # for p in self.bert.parameters():
        #     p.requires_grad = False

        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(self.bert.config.hidden_size, num_classes)
        self.relu = nn.ReLU()

        self.fc = nn.Linear(self.bert.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask, token_tp_ids):
        # Forward pass through the BERT model
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_tp_ids)

        # Extract the last hidden state of the token [CLS] for classification task
        pooled_output = outputs.pooler_output

        dropout_output = self.dropout(pooled_output)
        linear_output = self.linear(dropout_output)
        final_layer = self.relu(linear_output)

        return final_layer  # Softmax is applied outside the model during training

model = CustomBERTModel(num_classes=7)
model_rs = CustomBERTModel(num_classes=7)
model_rd = CustomBERTModel(num_classes=7)
model_c = CustomBERTModel(num_classes=7)


In [13]:
from torch.optim import Adam
import torch.nn as nn
from tqdm import tqdm

def train(model, train_data, val_data, learning_rate, epochs):

    train_dataloader = torch.utils.data.DataLoader(train_data, batch_size=3, shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(val_data, batch_size=3)

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr= learning_rate)

    if use_cuda:
        model = model.cuda()
        criterion = criterion.cuda()

    t_loss = []
    t_acc = []
    v_loss = []
    v_acc = []

    for epoch_num in range(epochs):

        total_acc_train = 0
        total_loss_train = 0

        for train_input, type_input, attention, train_label in tqdm(train_dataloader):

            train_label = train_label.to(device)
            output = model(train_input.squeeze(1).to(device), attention.squeeze(1).to(device), type_input.squeeze(1).to(device))

            batch_loss = criterion(output, train_label.long())
            total_loss_train += batch_loss.item()

            acc = (output.argmax(dim=1) == train_label).sum().item()
            total_acc_train += acc

            model.zero_grad()
            batch_loss.backward()
            optimizer.step()

        total_acc_val = 0
        total_loss_val = 0

        with torch.no_grad():

            for input_id, type_id, attention, label in val_dataloader:

                val_label = label.to(device)
                output = model(input_id.squeeze(1).to(device), attention.squeeze(1).to(device), type_id.squeeze(1).to(device))

                batch_loss = criterion(output, val_label.long())
                total_loss_val += batch_loss.item()

                acc = (output.argmax(dim=1) == val_label).sum().item()
                total_acc_val += acc

        print(
            f'Epochs: {epoch_num + 1} | Train Loss: {total_loss_train / len(train_data): .3f} \
            | Train Accuracy: {total_acc_train / len(train_data): .3f} \
            | Val Loss: {total_loss_val / len(val_data): .3f} \
            | Val Accuracy: {total_acc_val / len(val_data): .3f}')

        t_loss.append(total_loss_train / len(train_data))
        t_acc.append(total_acc_train / len(train_data))
        v_loss.append(total_loss_val / len(val_data))
        v_acc.append(total_acc_val / len(val_data))

        # if (total_acc_train / len(train_data)) > 0.99:
        #      break

    return t_loss, t_acc, v_loss, v_acc

EPOCHS = 30
LR = 1e-6

train(model, train_dataset, val_dataset, LR, EPOCHS)

100%|██████████| 85/85 [00:09<00:00,  9.39it/s]


Epochs: 1 | Train Loss:  0.600             | Train Accuracy:  0.384             | Val Loss:  0.533             | Val Accuracy:  0.600


100%|██████████| 85/85 [00:07<00:00, 11.88it/s]


Epochs: 2 | Train Loss:  0.475             | Train Accuracy:  0.698             | Val Loss:  0.413             | Val Accuracy:  0.789


100%|██████████| 85/85 [00:07<00:00, 10.82it/s]


Epochs: 3 | Train Loss:  0.365             | Train Accuracy:  0.749             | Val Loss:  0.319             | Val Accuracy:  0.856


100%|██████████| 85/85 [00:07<00:00, 11.96it/s]


Epochs: 4 | Train Loss:  0.286             | Train Accuracy:  0.843             | Val Loss:  0.255             | Val Accuracy:  0.889


100%|██████████| 85/85 [00:07<00:00, 10.72it/s]


Epochs: 5 | Train Loss:  0.217             | Train Accuracy:  0.918             | Val Loss:  0.225             | Val Accuracy:  0.800


100%|██████████| 85/85 [00:07<00:00, 11.87it/s]


Epochs: 6 | Train Loss:  0.170             | Train Accuracy:  0.945             | Val Loss:  0.165             | Val Accuracy:  0.922


100%|██████████| 85/85 [00:07<00:00, 10.76it/s]


Epochs: 7 | Train Loss:  0.139             | Train Accuracy:  0.961             | Val Loss:  0.144             | Val Accuracy:  0.911


100%|██████████| 85/85 [00:07<00:00, 12.04it/s]


Epochs: 8 | Train Loss:  0.104             | Train Accuracy:  0.980             | Val Loss:  0.112             | Val Accuracy:  0.933


100%|██████████| 85/85 [00:07<00:00, 10.86it/s]


Epochs: 9 | Train Loss:  0.081             | Train Accuracy:  0.992             | Val Loss:  0.122             | Val Accuracy:  0.922


100%|██████████| 85/85 [00:07<00:00, 10.69it/s]


Epochs: 10 | Train Loss:  0.063             | Train Accuracy:  1.000             | Val Loss:  0.101             | Val Accuracy:  0.933


100%|██████████| 85/85 [00:07<00:00, 11.21it/s]


Epochs: 11 | Train Loss:  0.053             | Train Accuracy:  0.996             | Val Loss:  0.101             | Val Accuracy:  0.933


100%|██████████| 85/85 [00:07<00:00, 11.43it/s]


Epochs: 12 | Train Loss:  0.044             | Train Accuracy:  0.996             | Val Loss:  0.094             | Val Accuracy:  0.933


100%|██████████| 85/85 [00:07<00:00, 11.51it/s]


Epochs: 13 | Train Loss:  0.037             | Train Accuracy:  1.000             | Val Loss:  0.093             | Val Accuracy:  0.933


100%|██████████| 85/85 [00:07<00:00, 11.24it/s]


Epochs: 14 | Train Loss:  0.033             | Train Accuracy:  1.000             | Val Loss:  0.089             | Val Accuracy:  0.933


100%|██████████| 85/85 [00:07<00:00, 11.80it/s]


Epochs: 15 | Train Loss:  0.027             | Train Accuracy:  1.000             | Val Loss:  0.082             | Val Accuracy:  0.933


100%|██████████| 85/85 [00:07<00:00, 10.94it/s]


Epochs: 16 | Train Loss:  0.025             | Train Accuracy:  1.000             | Val Loss:  0.080             | Val Accuracy:  0.933


100%|██████████| 85/85 [00:07<00:00, 12.09it/s]


Epochs: 17 | Train Loss:  0.022             | Train Accuracy:  1.000             | Val Loss:  0.082             | Val Accuracy:  0.933


100%|██████████| 85/85 [00:07<00:00, 10.87it/s]


Epochs: 18 | Train Loss:  0.020             | Train Accuracy:  1.000             | Val Loss:  0.083             | Val Accuracy:  0.933


100%|██████████| 85/85 [00:07<00:00, 12.13it/s]


Epochs: 19 | Train Loss:  0.018             | Train Accuracy:  1.000             | Val Loss:  0.080             | Val Accuracy:  0.933


100%|██████████| 85/85 [00:07<00:00, 10.84it/s]


Epochs: 20 | Train Loss:  0.017             | Train Accuracy:  1.000             | Val Loss:  0.077             | Val Accuracy:  0.933


100%|██████████| 85/85 [00:07<00:00, 12.06it/s]


Epochs: 21 | Train Loss:  0.015             | Train Accuracy:  1.000             | Val Loss:  0.075             | Val Accuracy:  0.944


100%|██████████| 85/85 [00:07<00:00, 10.81it/s]


Epochs: 22 | Train Loss:  0.014             | Train Accuracy:  1.000             | Val Loss:  0.076             | Val Accuracy:  0.933


100%|██████████| 85/85 [00:07<00:00, 12.03it/s]


Epochs: 23 | Train Loss:  0.013             | Train Accuracy:  1.000             | Val Loss:  0.079             | Val Accuracy:  0.933


100%|██████████| 85/85 [00:07<00:00, 10.86it/s]


Epochs: 24 | Train Loss:  0.012             | Train Accuracy:  1.000             | Val Loss:  0.077             | Val Accuracy:  0.933


100%|██████████| 85/85 [00:07<00:00, 12.03it/s]


Epochs: 25 | Train Loss:  0.011             | Train Accuracy:  1.000             | Val Loss:  0.076             | Val Accuracy:  0.933


100%|██████████| 85/85 [00:07<00:00, 10.94it/s]


Epochs: 26 | Train Loss:  0.011             | Train Accuracy:  1.000             | Val Loss:  0.073             | Val Accuracy:  0.933


100%|██████████| 85/85 [00:07<00:00, 11.77it/s]


Epochs: 27 | Train Loss:  0.010             | Train Accuracy:  1.000             | Val Loss:  0.078             | Val Accuracy:  0.933


100%|██████████| 85/85 [00:07<00:00, 11.19it/s]


Epochs: 28 | Train Loss:  0.009             | Train Accuracy:  1.000             | Val Loss:  0.080             | Val Accuracy:  0.933


100%|██████████| 85/85 [00:07<00:00, 11.55it/s]


Epochs: 29 | Train Loss:  0.009             | Train Accuracy:  1.000             | Val Loss:  0.077             | Val Accuracy:  0.933


100%|██████████| 85/85 [00:07<00:00, 11.32it/s]


Epochs: 30 | Train Loss:  0.008             | Train Accuracy:  1.000             | Val Loss:  0.072             | Val Accuracy:  0.944


([0.6002078818339928,
  0.4745613359937481,
  0.3647073514321271,
  0.28575557914434696,
  0.21725764110976575,
  0.1702715001854242,
  0.13887549071919683,
  0.10421159875159171,
  0.0813131604124518,
  0.06292314102836684,
  0.05258706422413097,
  0.0439572222384752,
  0.03666101872336631,
  0.03338656562800501,
  0.027063883461204228,
  0.024726881671185585,
  0.022438552508167193,
  0.019782194422156202,
  0.018127099176247916,
  0.01681847318130381,
  0.015370319520725923,
  0.013973039220653329,
  0.012819628819238905,
  0.011962641885175424,
  0.011175341407457988,
  0.01071096424232511,
  0.009876110718822946,
  0.009167363991340001,
  0.008809649564471899,
  0.008282114996337422],
 [0.3843137254901961,
  0.6980392156862745,
  0.7490196078431373,
  0.8431372549019608,
  0.9176470588235294,
  0.9450980392156862,
  0.9607843137254902,
  0.9803921568627451,
  0.9921568627450981,
  1.0,
  0.996078431372549,
  0.996078431372549,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0

In [14]:
# @title
def evaluate(model, test_data):

    test_dataloader = torch.utils.data.DataLoader(test_data, batch_size=2)

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    total_acc_test = 0
    with torch.no_grad():

        for input_id, type_id, attention, label in test_dataloader:

              output = model(input_id.squeeze(1).to(device),
                             attention.squeeze(1).to(device),
                             type_id.squeeze(1).to(device))
              acc = (output.argmax(dim=1) == label.to(device)).sum().item()
              total_acc_test += acc

    print(f'Test Accuracy: {total_acc_test / len(test_data): .3f}')

evaluate(model, test_dataset)


Test Accuracy:  0.916


In [15]:
train(model_rd, ds_rd, val_dataset, LR, EPOCHS)

100%|██████████| 340/340 [00:33<00:00, 10.24it/s]


Epochs: 1 | Train Loss:  0.444             | Train Accuracy:  0.566             | Val Loss:  0.277             | Val Accuracy:  0.878


100%|██████████| 340/340 [00:35<00:00,  9.55it/s]


Epochs: 2 | Train Loss:  0.183             | Train Accuracy:  0.890             | Val Loss:  0.141             | Val Accuracy:  0.900


100%|██████████| 340/340 [00:30<00:00, 11.30it/s]


Epochs: 3 | Train Loss:  0.076             | Train Accuracy:  0.971             | Val Loss:  0.072             | Val Accuracy:  0.956


100%|██████████| 340/340 [00:30<00:00, 11.10it/s]


Epochs: 4 | Train Loss:  0.035             | Train Accuracy:  0.998             | Val Loss:  0.063             | Val Accuracy:  0.956


100%|██████████| 340/340 [00:30<00:00, 11.11it/s]


Epochs: 5 | Train Loss:  0.022             | Train Accuracy:  0.999             | Val Loss:  0.056             | Val Accuracy:  0.956


100%|██████████| 340/340 [00:30<00:00, 11.10it/s]


Epochs: 6 | Train Loss:  0.015             | Train Accuracy:  1.000             | Val Loss:  0.047             | Val Accuracy:  0.956


100%|██████████| 340/340 [00:30<00:00, 11.21it/s]


Epochs: 7 | Train Loss:  0.011             | Train Accuracy:  1.000             | Val Loss:  0.041             | Val Accuracy:  0.978


100%|██████████| 340/340 [00:30<00:00, 11.21it/s]


Epochs: 8 | Train Loss:  0.009             | Train Accuracy:  1.000             | Val Loss:  0.043             | Val Accuracy:  0.967


100%|██████████| 340/340 [00:30<00:00, 11.27it/s]


Epochs: 9 | Train Loss:  0.007             | Train Accuracy:  1.000             | Val Loss:  0.041             | Val Accuracy:  0.967


100%|██████████| 340/340 [00:30<00:00, 10.97it/s]


Epochs: 10 | Train Loss:  0.006             | Train Accuracy:  1.000             | Val Loss:  0.039             | Val Accuracy:  0.978


100%|██████████| 340/340 [00:30<00:00, 11.29it/s]


Epochs: 11 | Train Loss:  0.005             | Train Accuracy:  1.000             | Val Loss:  0.039             | Val Accuracy:  0.978


100%|██████████| 340/340 [00:30<00:00, 11.30it/s]


Epochs: 12 | Train Loss:  0.004             | Train Accuracy:  1.000             | Val Loss:  0.039             | Val Accuracy:  0.978


100%|██████████| 340/340 [00:30<00:00, 11.25it/s]


Epochs: 13 | Train Loss:  0.003             | Train Accuracy:  1.000             | Val Loss:  0.038             | Val Accuracy:  0.978


100%|██████████| 340/340 [00:30<00:00, 11.23it/s]


Epochs: 14 | Train Loss:  0.003             | Train Accuracy:  1.000             | Val Loss:  0.038             | Val Accuracy:  0.978


100%|██████████| 340/340 [00:30<00:00, 11.25it/s]


Epochs: 15 | Train Loss:  0.002             | Train Accuracy:  1.000             | Val Loss:  0.032             | Val Accuracy:  0.978


100%|██████████| 340/340 [00:30<00:00, 11.25it/s]


Epochs: 16 | Train Loss:  0.002             | Train Accuracy:  1.000             | Val Loss:  0.033             | Val Accuracy:  0.978


100%|██████████| 340/340 [00:30<00:00, 11.25it/s]


Epochs: 17 | Train Loss:  0.002             | Train Accuracy:  1.000             | Val Loss:  0.036             | Val Accuracy:  0.978


100%|██████████| 340/340 [00:31<00:00, 10.77it/s]


Epochs: 18 | Train Loss:  0.001             | Train Accuracy:  1.000             | Val Loss:  0.033             | Val Accuracy:  0.978


100%|██████████| 340/340 [00:30<00:00, 11.09it/s]


Epochs: 19 | Train Loss:  0.001             | Train Accuracy:  1.000             | Val Loss:  0.037             | Val Accuracy:  0.978


100%|██████████| 340/340 [00:30<00:00, 11.13it/s]


Epochs: 20 | Train Loss:  0.001             | Train Accuracy:  1.000             | Val Loss:  0.039             | Val Accuracy:  0.978


100%|██████████| 340/340 [00:30<00:00, 11.14it/s]


Epochs: 21 | Train Loss:  0.001             | Train Accuracy:  1.000             | Val Loss:  0.039             | Val Accuracy:  0.978


100%|██████████| 340/340 [00:30<00:00, 11.10it/s]


Epochs: 22 | Train Loss:  0.001             | Train Accuracy:  1.000             | Val Loss:  0.042             | Val Accuracy:  0.978


100%|██████████| 340/340 [00:30<00:00, 11.07it/s]


Epochs: 23 | Train Loss:  0.001             | Train Accuracy:  1.000             | Val Loss:  0.037             | Val Accuracy:  0.978


100%|██████████| 340/340 [00:30<00:00, 11.11it/s]


Epochs: 24 | Train Loss:  0.001             | Train Accuracy:  1.000             | Val Loss:  0.039             | Val Accuracy:  0.978


100%|██████████| 340/340 [00:30<00:00, 11.03it/s]


Epochs: 25 | Train Loss:  0.000             | Train Accuracy:  1.000             | Val Loss:  0.039             | Val Accuracy:  0.978


100%|██████████| 340/340 [00:31<00:00, 10.87it/s]


Epochs: 26 | Train Loss:  0.000             | Train Accuracy:  1.000             | Val Loss:  0.043             | Val Accuracy:  0.978


100%|██████████| 340/340 [00:30<00:00, 11.10it/s]


Epochs: 27 | Train Loss:  0.000             | Train Accuracy:  1.000             | Val Loss:  0.037             | Val Accuracy:  0.978


100%|██████████| 340/340 [00:30<00:00, 11.16it/s]


Epochs: 28 | Train Loss:  0.000             | Train Accuracy:  1.000             | Val Loss:  0.041             | Val Accuracy:  0.978


100%|██████████| 340/340 [00:30<00:00, 11.20it/s]


Epochs: 29 | Train Loss:  0.000             | Train Accuracy:  1.000             | Val Loss:  0.039             | Val Accuracy:  0.978


100%|██████████| 340/340 [00:30<00:00, 11.07it/s]


Epochs: 30 | Train Loss:  0.000             | Train Accuracy:  1.000             | Val Loss:  0.042             | Val Accuracy:  0.978


([0.44439510153789147,
  0.18306234882158393,
  0.07625746727720195,
  0.035061719699525366,
  0.021600705604342855,
  0.014659977961769875,
  0.011092072349114745,
  0.008755716982790653,
  0.006988262540350358,
  0.00560650767703705,
  0.00454827164135435,
  0.0038799432138273237,
  0.003147017322115454,
  0.0026855216105925103,
  0.0022795812724450346,
  0.001900674701741367,
  0.0016287002281066688,
  0.001393446007597388,
  0.0012160808897997235,
  0.001006537571540285,
  0.0008527910557128124,
  0.0007457907781844922,
  0.0006353568737669026,
  0.0005379173174208286,
  0.00045930201235665555,
  0.00040160329811567185,
  0.00033987094733781896,
  0.00029625414451812487,
  0.00025746662092084683,
  0.00021955274135632185],
 [0.5656862745098039,
  0.8901960784313725,
  0.9705882352941176,
  0.9980392156862745,
  0.9990196078431373,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,

In [16]:
evaluate(model_rd, test_dataset)

Test Accuracy:  0.908


In [17]:
train(model_rs, ds_rs, val_dataset, LR, EPOCHS)

100%|██████████| 340/340 [00:30<00:00, 11.14it/s]


Epochs: 1 | Train Loss:  0.505             | Train Accuracy:  0.536             | Val Loss:  0.277             | Val Accuracy:  0.889


100%|██████████| 340/340 [00:30<00:00, 11.13it/s]


Epochs: 2 | Train Loss:  0.163             | Train Accuracy:  0.946             | Val Loss:  0.133             | Val Accuracy:  0.933


100%|██████████| 340/340 [00:30<00:00, 11.16it/s]


Epochs: 3 | Train Loss:  0.058             | Train Accuracy:  0.993             | Val Loss:  0.085             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:30<00:00, 11.13it/s]


Epochs: 4 | Train Loss:  0.030             | Train Accuracy:  1.000             | Val Loss:  0.074             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:32<00:00, 10.60it/s]


Epochs: 5 | Train Loss:  0.019             | Train Accuracy:  1.000             | Val Loss:  0.074             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:30<00:00, 11.15it/s]


Epochs: 6 | Train Loss:  0.013             | Train Accuracy:  1.000             | Val Loss:  0.073             | Val Accuracy:  0.933


100%|██████████| 340/340 [00:30<00:00, 11.16it/s]


Epochs: 7 | Train Loss:  0.010             | Train Accuracy:  1.000             | Val Loss:  0.061             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:30<00:00, 11.02it/s]


Epochs: 8 | Train Loss:  0.008             | Train Accuracy:  1.000             | Val Loss:  0.070             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:30<00:00, 11.18it/s]


Epochs: 9 | Train Loss:  0.006             | Train Accuracy:  1.000             | Val Loss:  0.066             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:30<00:00, 11.15it/s]


Epochs: 10 | Train Loss:  0.005             | Train Accuracy:  1.000             | Val Loss:  0.067             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:30<00:00, 11.16it/s]


Epochs: 11 | Train Loss:  0.004             | Train Accuracy:  1.000             | Val Loss:  0.071             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:30<00:00, 11.14it/s]


Epochs: 12 | Train Loss:  0.003             | Train Accuracy:  1.000             | Val Loss:  0.066             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:32<00:00, 10.62it/s]


Epochs: 13 | Train Loss:  0.003             | Train Accuracy:  1.000             | Val Loss:  0.068             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:30<00:00, 11.05it/s]


Epochs: 14 | Train Loss:  0.002             | Train Accuracy:  1.000             | Val Loss:  0.066             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:30<00:00, 11.20it/s]


Epochs: 15 | Train Loss:  0.002             | Train Accuracy:  1.000             | Val Loss:  0.074             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:30<00:00, 11.18it/s]


Epochs: 16 | Train Loss:  0.002             | Train Accuracy:  1.000             | Val Loss:  0.064             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:30<00:00, 11.16it/s]


Epochs: 17 | Train Loss:  0.001             | Train Accuracy:  1.000             | Val Loss:  0.070             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:30<00:00, 11.15it/s]


Epochs: 18 | Train Loss:  0.001             | Train Accuracy:  1.000             | Val Loss:  0.073             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:30<00:00, 11.13it/s]


Epochs: 19 | Train Loss:  0.001             | Train Accuracy:  1.000             | Val Loss:  0.084             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:30<00:00, 11.15it/s]


Epochs: 20 | Train Loss:  0.001             | Train Accuracy:  1.000             | Val Loss:  0.082             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:30<00:00, 10.98it/s]


Epochs: 21 | Train Loss:  0.001             | Train Accuracy:  1.000             | Val Loss:  0.078             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:32<00:00, 10.44it/s]


Epochs: 22 | Train Loss:  0.001             | Train Accuracy:  1.000             | Val Loss:  0.084             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:31<00:00, 10.82it/s]


Epochs: 23 | Train Loss:  0.001             | Train Accuracy:  1.000             | Val Loss:  0.083             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:31<00:00, 10.90it/s]


Epochs: 24 | Train Loss:  0.000             | Train Accuracy:  1.000             | Val Loss:  0.075             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:30<00:00, 11.05it/s]


Epochs: 25 | Train Loss:  0.000             | Train Accuracy:  1.000             | Val Loss:  0.081             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:30<00:00, 11.07it/s]


Epochs: 26 | Train Loss:  0.000             | Train Accuracy:  1.000             | Val Loss:  0.084             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:31<00:00, 10.94it/s]


Epochs: 27 | Train Loss:  0.000             | Train Accuracy:  1.000             | Val Loss:  0.096             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:31<00:00, 10.88it/s]


Epochs: 28 | Train Loss:  0.000             | Train Accuracy:  1.000             | Val Loss:  0.080             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:31<00:00, 10.90it/s]


Epochs: 29 | Train Loss:  0.000             | Train Accuracy:  1.000             | Val Loss:  0.080             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:32<00:00, 10.40it/s]


Epochs: 30 | Train Loss:  0.000             | Train Accuracy:  1.000             | Val Loss:  0.080             | Val Accuracy:  0.944


([0.5046095368324541,
  0.16321633012563574,
  0.058284659398829236,
  0.029553200552860894,
  0.019161902610943013,
  0.013264726628275478,
  0.010115805481944014,
  0.0078009549729233865,
  0.006139684449333479,
  0.0051083990190105115,
  0.004110609647343118,
  0.0034178106698627567,
  0.0028248329194920027,
  0.0023794903104905696,
  0.0020223866780653744,
  0.0016761075146039764,
  0.0014380856392010317,
  0.0012394938494681435,
  0.0010427173787463165,
  0.0009026515006553381,
  0.0007593973373135953,
  0.0006503942484771102,
  0.0005577129639991029,
  0.0004745941534957976,
  0.00040699582354065175,
  0.0003555012898811815,
  0.00030559773600258517,
  0.00026207605885300675,
  0.000224893540372232,
  0.0001932292159728925],
 [0.5362745098039216,
  0.946078431372549,
  0.9931372549019608,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0],
 [0.

In [18]:
evaluate(model_rs, test_dataset)

Test Accuracy:  0.916


In [19]:
train(model_c, ds_context, val_dataset, LR, EPOCHS)

100%|██████████| 340/340 [00:30<00:00, 11.09it/s]


Epochs: 1 | Train Loss:  0.416             | Train Accuracy:  0.605             | Val Loss:  0.248             | Val Accuracy:  0.844


100%|██████████| 340/340 [00:30<00:00, 11.02it/s]


Epochs: 2 | Train Loss:  0.200             | Train Accuracy:  0.848             | Val Loss:  0.137             | Val Accuracy:  0.878


100%|██████████| 340/340 [00:30<00:00, 11.00it/s]


Epochs: 3 | Train Loss:  0.119             | Train Accuracy:  0.915             | Val Loss:  0.085             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:31<00:00, 10.91it/s]


Epochs: 4 | Train Loss:  0.070             | Train Accuracy:  0.959             | Val Loss:  0.087             | Val Accuracy:  0.922


100%|██████████| 340/340 [00:30<00:00, 11.03it/s]


Epochs: 5 | Train Loss:  0.042             | Train Accuracy:  0.980             | Val Loss:  0.052             | Val Accuracy:  0.967


100%|██████████| 340/340 [00:30<00:00, 11.09it/s]


Epochs: 6 | Train Loss:  0.025             | Train Accuracy:  0.992             | Val Loss:  0.068             | Val Accuracy:  0.933


100%|██████████| 340/340 [00:30<00:00, 10.98it/s]


Epochs: 7 | Train Loss:  0.017             | Train Accuracy:  0.998             | Val Loss:  0.064             | Val Accuracy:  0.956


100%|██████████| 340/340 [00:31<00:00, 10.71it/s]


Epochs: 8 | Train Loss:  0.011             | Train Accuracy:  0.999             | Val Loss:  0.065             | Val Accuracy:  0.956


100%|██████████| 340/340 [00:30<00:00, 11.14it/s]


Epochs: 9 | Train Loss:  0.008             | Train Accuracy:  1.000             | Val Loss:  0.063             | Val Accuracy:  0.956


100%|██████████| 340/340 [00:30<00:00, 11.10it/s]


Epochs: 10 | Train Loss:  0.007             | Train Accuracy:  1.000             | Val Loss:  0.060             | Val Accuracy:  0.956


100%|██████████| 340/340 [00:30<00:00, 11.10it/s]


Epochs: 11 | Train Loss:  0.005             | Train Accuracy:  1.000             | Val Loss:  0.068             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:30<00:00, 11.17it/s]


Epochs: 12 | Train Loss:  0.004             | Train Accuracy:  1.000             | Val Loss:  0.067             | Val Accuracy:  0.956


100%|██████████| 340/340 [00:30<00:00, 11.12it/s]


Epochs: 13 | Train Loss:  0.004             | Train Accuracy:  1.000             | Val Loss:  0.069             | Val Accuracy:  0.956


100%|██████████| 340/340 [00:30<00:00, 11.05it/s]


Epochs: 14 | Train Loss:  0.003             | Train Accuracy:  1.000             | Val Loss:  0.072             | Val Accuracy:  0.956


100%|██████████| 340/340 [00:30<00:00, 11.15it/s]


Epochs: 15 | Train Loss:  0.003             | Train Accuracy:  1.000             | Val Loss:  0.073             | Val Accuracy:  0.956


100%|██████████| 340/340 [00:31<00:00, 10.82it/s]


Epochs: 16 | Train Loss:  0.002             | Train Accuracy:  1.000             | Val Loss:  0.071             | Val Accuracy:  0.956


100%|██████████| 340/340 [00:30<00:00, 11.18it/s]


Epochs: 17 | Train Loss:  0.002             | Train Accuracy:  1.000             | Val Loss:  0.077             | Val Accuracy:  0.956


100%|██████████| 340/340 [00:30<00:00, 11.09it/s]


Epochs: 18 | Train Loss:  0.002             | Train Accuracy:  1.000             | Val Loss:  0.082             | Val Accuracy:  0.956


100%|██████████| 340/340 [00:30<00:00, 11.16it/s]


Epochs: 19 | Train Loss:  0.001             | Train Accuracy:  1.000             | Val Loss:  0.073             | Val Accuracy:  0.956


100%|██████████| 340/340 [00:30<00:00, 11.08it/s]


Epochs: 20 | Train Loss:  0.001             | Train Accuracy:  1.000             | Val Loss:  0.080             | Val Accuracy:  0.956


100%|██████████| 340/340 [00:30<00:00, 11.12it/s]


Epochs: 21 | Train Loss:  0.001             | Train Accuracy:  1.000             | Val Loss:  0.085             | Val Accuracy:  0.956


100%|██████████| 340/340 [00:30<00:00, 11.06it/s]


Epochs: 22 | Train Loss:  0.001             | Train Accuracy:  1.000             | Val Loss:  0.081             | Val Accuracy:  0.956


100%|██████████| 340/340 [00:30<00:00, 11.15it/s]


Epochs: 23 | Train Loss:  0.001             | Train Accuracy:  1.000             | Val Loss:  0.079             | Val Accuracy:  0.956


100%|██████████| 340/340 [00:30<00:00, 11.02it/s]


Epochs: 24 | Train Loss:  0.001             | Train Accuracy:  1.000             | Val Loss:  0.085             | Val Accuracy:  0.956


100%|██████████| 340/340 [00:31<00:00, 10.74it/s]


Epochs: 25 | Train Loss:  0.001             | Train Accuracy:  1.000             | Val Loss:  0.091             | Val Accuracy:  0.956


100%|██████████| 340/340 [00:30<00:00, 11.18it/s]


Epochs: 26 | Train Loss:  0.000             | Train Accuracy:  1.000             | Val Loss:  0.088             | Val Accuracy:  0.956


100%|██████████| 340/340 [00:30<00:00, 11.07it/s]


Epochs: 27 | Train Loss:  0.000             | Train Accuracy:  1.000             | Val Loss:  0.103             | Val Accuracy:  0.944


100%|██████████| 340/340 [00:30<00:00, 11.10it/s]


Epochs: 28 | Train Loss:  0.000             | Train Accuracy:  1.000             | Val Loss:  0.095             | Val Accuracy:  0.956


100%|██████████| 340/340 [00:30<00:00, 11.11it/s]


Epochs: 29 | Train Loss:  0.000             | Train Accuracy:  1.000             | Val Loss:  0.103             | Val Accuracy:  0.956


100%|██████████| 340/340 [00:30<00:00, 11.08it/s]


Epochs: 30 | Train Loss:  0.000             | Train Accuracy:  1.000             | Val Loss:  0.100             | Val Accuracy:  0.956


([0.41581971177867816,
  0.20022781038401175,
  0.11851346815625827,
  0.06972247042200144,
  0.04183610973550993,
  0.025404856596872503,
  0.016587283522548044,
  0.011184813856494193,
  0.008400504086531846,
  0.006727649209400018,
  0.0054115435227240416,
  0.004424070296626465,
  0.0037090340341605684,
  0.0030990263232596074,
  0.0026103629091498897,
  0.0021923840744420887,
  0.001878878934418454,
  0.001607741825465186,
  0.0013902018568021995,
  0.001170185046366361,
  0.0009788842228076913,
  0.0008626215398430313,
  0.0007397472397332975,
  0.0006311593520770068,
  0.0005434758759756555,
  0.00047336660323244535,
  0.0004046331801766749,
  0.00035341260021703097,
  0.0002940308397941693,
  0.00025323312458532004],
 [0.6049019607843137,
  0.8480392156862745,
  0.9147058823529411,
  0.9588235294117647,
  0.9803921568627451,
  0.9921568627450981,
  0.9980392156862745,
  0.9990196078431373,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0

In [20]:
evaluate(model_c, test_dataset)

Test Accuracy:  0.924


In [43]:
df_bt = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/spam_data/trainSpam+BackTranslation.csv')
df_bt.columns = train_bt.columns.map(str.lower)
df_bt.rename(columns={'genero': 'classe'}, inplace=True)

df_bt['classe'] =  np.where(df_bt['classe']=='spam', 1, 0)

In [44]:
ds_bt = MyDataset(df_bt, tokenizer=tokenizer, max_len=32)

In [46]:
model_bt = CustomBERTModel(num_classes=7)
train(model_bt, ds_bt, val_dataset, LR, EPOCHS)

100%|██████████| 124/124 [00:12<00:00,  9.57it/s]


Epochs: 1 | Train Loss:  0.540             | Train Accuracy:  0.439             | Val Loss:  0.431             | Val Accuracy:  0.722


100%|██████████| 124/124 [00:12<00:00,  9.85it/s]


Epochs: 2 | Train Loss:  0.367             | Train Accuracy:  0.712             | Val Loss:  0.294             | Val Accuracy:  0.844


100%|██████████| 124/124 [00:10<00:00, 11.40it/s]


Epochs: 3 | Train Loss:  0.259             | Train Accuracy:  0.879             | Val Loss:  0.210             | Val Accuracy:  0.889


100%|██████████| 124/124 [00:10<00:00, 11.30it/s]


Epochs: 4 | Train Loss:  0.188             | Train Accuracy:  0.911             | Val Loss:  0.145             | Val Accuracy:  0.933


100%|██████████| 124/124 [00:11<00:00, 10.92it/s]


Epochs: 5 | Train Loss:  0.132             | Train Accuracy:  0.954             | Val Loss:  0.107             | Val Accuracy:  0.956


100%|██████████| 124/124 [00:11<00:00, 10.75it/s]


Epochs: 6 | Train Loss:  0.095             | Train Accuracy:  0.970             | Val Loss:  0.090             | Val Accuracy:  0.967


100%|██████████| 124/124 [00:10<00:00, 11.55it/s]


Epochs: 7 | Train Loss:  0.072             | Train Accuracy:  0.978             | Val Loss:  0.079             | Val Accuracy:  0.967


100%|██████████| 124/124 [00:11<00:00, 11.01it/s]


Epochs: 8 | Train Loss:  0.052             | Train Accuracy:  0.995             | Val Loss:  0.072             | Val Accuracy:  0.967


100%|██████████| 124/124 [00:11<00:00, 10.70it/s]


Epochs: 9 | Train Loss:  0.040             | Train Accuracy:  0.995             | Val Loss:  0.059             | Val Accuracy:  0.967


100%|██████████| 124/124 [00:11<00:00, 11.03it/s]


Epochs: 10 | Train Loss:  0.031             | Train Accuracy:  1.000             | Val Loss:  0.060             | Val Accuracy:  0.967


100%|██████████| 124/124 [00:10<00:00, 11.43it/s]


Epochs: 11 | Train Loss:  0.026             | Train Accuracy:  1.000             | Val Loss:  0.062             | Val Accuracy:  0.967


100%|██████████| 124/124 [00:11<00:00, 11.06it/s]


Epochs: 12 | Train Loss:  0.023             | Train Accuracy:  1.000             | Val Loss:  0.056             | Val Accuracy:  0.967


100%|██████████| 124/124 [00:11<00:00, 11.00it/s]


Epochs: 13 | Train Loss:  0.020             | Train Accuracy:  1.000             | Val Loss:  0.055             | Val Accuracy:  0.967


100%|██████████| 124/124 [00:11<00:00, 10.77it/s]


Epochs: 14 | Train Loss:  0.017             | Train Accuracy:  1.000             | Val Loss:  0.056             | Val Accuracy:  0.967


100%|██████████| 124/124 [00:11<00:00, 11.19it/s]


Epochs: 15 | Train Loss:  0.015             | Train Accuracy:  1.000             | Val Loss:  0.060             | Val Accuracy:  0.967


100%|██████████| 124/124 [00:10<00:00, 11.54it/s]


Epochs: 16 | Train Loss:  0.014             | Train Accuracy:  1.000             | Val Loss:  0.058             | Val Accuracy:  0.967


100%|██████████| 124/124 [00:11<00:00, 10.75it/s]


Epochs: 17 | Train Loss:  0.012             | Train Accuracy:  1.000             | Val Loss:  0.052             | Val Accuracy:  0.967


100%|██████████| 124/124 [00:11<00:00, 10.56it/s]


Epochs: 18 | Train Loss:  0.011             | Train Accuracy:  1.000             | Val Loss:  0.053             | Val Accuracy:  0.967


100%|██████████| 124/124 [00:11<00:00, 10.70it/s]


Epochs: 19 | Train Loss:  0.010             | Train Accuracy:  1.000             | Val Loss:  0.058             | Val Accuracy:  0.967


100%|██████████| 124/124 [00:10<00:00, 11.46it/s]


Epochs: 20 | Train Loss:  0.010             | Train Accuracy:  1.000             | Val Loss:  0.055             | Val Accuracy:  0.967


100%|██████████| 124/124 [00:11<00:00, 10.91it/s]


Epochs: 21 | Train Loss:  0.009             | Train Accuracy:  1.000             | Val Loss:  0.055             | Val Accuracy:  0.967


100%|██████████| 124/124 [00:11<00:00, 10.74it/s]


Epochs: 22 | Train Loss:  0.008             | Train Accuracy:  1.000             | Val Loss:  0.058             | Val Accuracy:  0.967


100%|██████████| 124/124 [00:11<00:00, 10.79it/s]


Epochs: 23 | Train Loss:  0.007             | Train Accuracy:  1.000             | Val Loss:  0.058             | Val Accuracy:  0.967


100%|██████████| 124/124 [00:11<00:00, 10.67it/s]


Epochs: 24 | Train Loss:  0.007             | Train Accuracy:  1.000             | Val Loss:  0.060             | Val Accuracy:  0.967


100%|██████████| 124/124 [00:11<00:00, 10.58it/s]


Epochs: 25 | Train Loss:  0.006             | Train Accuracy:  1.000             | Val Loss:  0.056             | Val Accuracy:  0.967


100%|██████████| 124/124 [00:11<00:00, 10.43it/s]


Epochs: 26 | Train Loss:  0.006             | Train Accuracy:  1.000             | Val Loss:  0.061             | Val Accuracy:  0.967


100%|██████████| 124/124 [00:11<00:00, 10.93it/s]


Epochs: 27 | Train Loss:  0.006             | Train Accuracy:  1.000             | Val Loss:  0.057             | Val Accuracy:  0.967


100%|██████████| 124/124 [00:10<00:00, 11.65it/s]


Epochs: 28 | Train Loss:  0.005             | Train Accuracy:  1.000             | Val Loss:  0.061             | Val Accuracy:  0.967


100%|██████████| 124/124 [00:11<00:00, 10.85it/s]


Epochs: 29 | Train Loss:  0.005             | Train Accuracy:  1.000             | Val Loss:  0.061             | Val Accuracy:  0.967


100%|██████████| 124/124 [00:11<00:00, 10.52it/s]


Epochs: 30 | Train Loss:  0.004             | Train Accuracy:  1.000             | Val Loss:  0.064             | Val Accuracy:  0.967


([0.5399051464471534,
  0.36723971158025076,
  0.2590217306768155,
  0.1880532587474247,
  0.13244599051713302,
  0.0951645804865341,
  0.0720458215860665,
  0.05154752787554039,
  0.04012796216496239,
  0.03074831130972127,
  0.026416618899113405,
  0.023301957431186564,
  0.019743022903036877,
  0.01733019630703322,
  0.015141105219079156,
  0.013930004190482861,
  0.012469840675311268,
  0.011405393365780619,
  0.010413056269609382,
  0.009667155947887673,
  0.008789000791720946,
  0.008211548450199582,
  0.007377180351180368,
  0.007035737136765471,
  0.006341093962866502,
  0.005783883033877595,
  0.005578133039116217,
  0.004913670846795779,
  0.004927866349142356,
  0.004476373044979781],
 [0.4393530997304582,
  0.7115902964959568,
  0.8787061994609164,
  0.9110512129380054,
  0.954177897574124,
  0.9703504043126685,
  0.9784366576819407,
  0.9946091644204852,
  0.9946091644204852,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,


In [47]:
evaluate(model_bt, test_dataset)

Test Accuracy:  0.992


In [21]:
# torch.save(model.state_dict(), '../model/embeddigns-tf/model.pt')
# torch.save(model.state_dict(), '../model/embeddigns-tf/model_og.pt')

In [22]:
# valores de treino e validação nas 17 epochs para alteração de palavras por contexto
# [0.398, 0.591, 0.674, 0.737, 0.793, 0.842, 0.876, 0.903, 0.923, 0.936, 0.949, 0.960, 0.966, 0.964, 0.969, 0.971, 0.976]
# [0.517 0.647 0.671 0.700 0.695 0.702 0.699 0.683 0.682 0.686 0.695 0.706 0.690 0.710 0.700 0.690 0.695]

# valores de trein oe validação em 11 epochs na base sem alteração