In [1]:
import torch
import pandas as pd 
import random 
import time
import datetime
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold
from sklearn.metrics import f1_score
import numpy as np 
from torch.utils.data import TensorDataset,Subset
from transformers import BertTokenizer
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from torch.nn import functional as F

In [2]:
from nltk.stem import WordNetLemmatizer
import nltk
from nltk.tokenize import TweetTokenizer

In [3]:
torch.cuda.device_count()

1

In [4]:
torch.cuda.current_device()

0

In [5]:
torch.cuda.is_available()

True

In [6]:
# If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: NVIDIA GeForce RTX 3060 Laptop GPU


In [7]:
# Load the BERT tokenizer.
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

In [8]:
nltk.download('wordnet')
lemmatizer = WordNetLemmatizer()

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\thiag\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [9]:
def tweet_tokenize(texto):
  tokenizer = TweetTokenizer()
  tokens = tokenizer.tokenize(texto.lower())

  tokens = [token for token in tokens]

  return tokens

In [10]:
def preprocess_text(tokens):
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    processed_text = " ".join(tokens)
    processed_text = " ".join(processed_text.split())

    return processed_text

In [11]:
data = pd.read_csv('cyber-troll.csv')

In [12]:
data['content'] = data['content'].apply(tweet_tokenize)
data['content'] = data['content'].apply(preprocess_text)

In [13]:
train_data = data.sample(n=18001, random_state=42)

remaining_data = data.drop(train_data.index)

test_data = remaining_data.sample(n=2000, random_state=42)

In [15]:
sentences = train_data.content.values
labels = train_data.label.values

In [16]:
max_length = 64

In [17]:
# Tokenize all of the sentences and map the tokens to thier word IDs.
input_ids = []
attention_masks = []

# For every sentence...
for sent in sentences:
  
    encoded_dict = tokenizer.encode_plus(
                        sent,                      # Sentence to encode.
                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                        max_length = max_length,           # Pad & truncate all sentences.
                        padding = 'max_length',
                        truncation = True,
                        return_attention_mask = True,   # Construct attn. masks.
                        return_tensors = 'pt',     # Return pytorch tensors.
                   )
    
    # Add the encoded sentence to the list.    
    input_ids.append(encoded_dict['input_ids'])
    
    # And its attention mask (simply differentiates padding from non-padding).
    attention_masks.append(encoded_dict['attention_mask'])

# Convert the lists into tensors.
input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)
labels = torch.tensor(labels)

# Print sentence 0, now as a list of IDs.
print('Original: ', sentences[0])
print('Token IDs:', input_ids[0])

Original:  the nerd inthe shop lead me to believe it wa a rechargeable !
Token IDs: tensor([  101,  1996, 11265,  4103, 20014,  5369,  4497,  2599,  2033,  2000,
         2903,  2009, 11333,  1037, 28667,  8167,  3351,  3085,   999,   102,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0])


In [18]:
batch_size = 16 
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
#helper function to get train and val data loaders for each fold 
def get_data_loaders(dataset,train_indexes,val_indexes):
    train_tensor = Subset(dataset,train_indexes)
    val_tensor = Subset(dataset,val_indexes)
    train_dataloader = DataLoader(
            train_tensor, 
            sampler = RandomSampler(train_tensor), 
            batch_size = batch_size
        )

    val_dataloader = DataLoader(
            val_tensor, 
            sampler = SequentialSampler(val_tensor), 
            batch_size = batch_size 
        )
    return train_dataloader,val_dataloader

In [19]:
# Combine the training inputs into a TensorDataset.
dataset = TensorDataset(input_ids, attention_masks, labels)

In [20]:
sentences = test_data.content.values
input_ids = []
attention_masks = []

In [21]:
for sent in sentences:

    encoded_dict = tokenizer.encode_plus(
                        sent,                      # Sentence to encode.
                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                        max_length = max_length,           # Pad & truncate all sentences.
                        padding = 'max_length',
                        truncation = True,
                        return_attention_mask = True,   # Construct attn. masks.
                        return_tensors = 'pt',     # Return pytorch tensors.
                   )
    
    # Add the encoded sentence to the list.    
    input_ids.append(encoded_dict['input_ids'])
    # And its attention mask (simply differentiates padding from non-padding).
    attention_masks.append(encoded_dict['attention_mask'])

# Convert the lists into tensors.
input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)
# Set the batch size.  
batch_size = 16  
# Create the DataLoader.
prediction_data = TensorDataset(input_ids, attention_masks)
prediction_sampler = SequentialSampler(prediction_data)
prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)

In [22]:
from transformers import BertForSequenceClassification, BertConfig

def get_bert_model():
    model = BertForSequenceClassification.from_pretrained(
      "bert-base-uncased", 
      num_labels = 2,           
      output_attentions = False, 
      output_hidden_states = False, 
    )
    # Tell pytorch to run this model on the GPU.
    if torch.cuda.is_available():
        model.cuda()
    else:
        model.cpu()
    #model.gpu()
    return model

In [23]:
import numpy as np
# Function to calculate the accuracy of our predictions vs labels
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

In [24]:
def format_time(elapsed):
    '''
    Takes a time in seconds and returns a string hh:mm:ss
    '''
    elapsed_rounded = int(round((elapsed)))
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [25]:
# Set the seed value all over the place to make this reproducible.
seed_val = 1000
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

In [26]:
total_folds = 10
current_fold = 0
all_folds_preds = []
epochs = 20
fold=StratifiedKFold(n_splits=total_folds, shuffle=True, random_state=seed_val)

training_stats = []

In [27]:
# Measure the total training time for the whole run.
total_t0 = time.time()
#for each fold..
for train_index, test_index in fold.split(train_data,train_data['label']):
    model = get_bert_model()
    optimizer = torch.optim.AdamW(model.parameters(),lr = 5e-5,eps = 1e-8)
    current_fold = current_fold+1
    train_dataloader,validation_dataloader = get_data_loaders(dataset,train_index,test_index)
    print("")
    print('================= Fold {:} / {:} ================='.format(current_fold,total_folds))
    # For each epoch...
    for epoch_i in range(0, epochs):
        # ========================================
        #               Training
        # ========================================

        # Perform one full pass over the training set.

        print("")
        print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
        print('Training...')

        # Measure how long the training epoch takes.
        t0 = time.time()

        # Reset the total loss for this epoch.
        total_train_loss = 0
        model.train()
        # For each batch of training data...
        for step, batch in enumerate(train_dataloader):

            b_input_ids = batch[0].to(device)
            b_input_mask = batch[1].to(device)
            b_labels = batch[2].to(device)
            model.zero_grad()        

            outputs = model(b_input_ids, 
                              token_type_ids=None, 
                              attention_mask=b_input_mask, 
                              labels=b_labels)

            loss = outputs.loss
            logits = outputs.logits


            total_train_loss += loss.item()

            # Perform a backward pass to calculate the gradients.
            loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            #update weights
            optimizer.step()


        # Calculate the average loss over all of the batches.
        avg_train_loss = total_train_loss / len(train_dataloader)            

        # Measure how long this epoch took.
        training_time = format_time(time.time() - t0)

        print("")
        print("  Average training loss: {0:.2f}".format(avg_train_loss))
        print("  Training epcoh took: {:}".format(training_time))

        # ========================================
        #               Validation
        # ========================================
        # After the completion of each training epoch, measure our performance on
        # our validation set.

        print("")
        print("Running Validation...")

        t0 = time.time()

        # Put the model in evaluation mode--the dropout layers behave differently
        # during evaluation.
        model.eval()

        # Tracking variables 
        total_f1_score = 0
        total_eval_accuracy = 0
        total_eval_loss = 0
        nb_eval_steps = 0

        # Evaluate data for one epoch
        for batch in validation_dataloader:


            b_input_ids = batch[0].to(device)
            b_input_mask = batch[1].to(device)
            b_labels = batch[2].to(device)

            with torch.no_grad():        
                outputs = model(b_input_ids, 
                                        token_type_ids=None, 
                                        attention_mask=b_input_mask,
                                        labels=b_labels)
                loss = outputs.loss
                logits = outputs.logits

            # Accumulate the validation loss.
            total_eval_loss += loss.item()

            # Move logits and labels to CPU
            logits = logits.detach().cpu().numpy()
            label_ids = b_labels.to('cpu').numpy()

            # Calculate the accuracy for this batch of test sentences, and
            # accumulate it over all batches.
            total_eval_accuracy += flat_accuracy(logits, label_ids)
            total_f1_score += f1_score(np.argmax(logits,axis=1),label_ids)

        # Report the final accuracy and f1_score for this validation run.
        avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)
        print("  Accuracy: {0:.2f}".format(avg_val_accuracy))
        
        avg_f1_score = total_f1_score / len(validation_dataloader)
        print("  F1_score: {0:.2f}".format(avg_f1_score))

        # Calculate the average loss over all of the batches.
        avg_val_loss = total_eval_loss / len(validation_dataloader)

        # Measure how long the validation run took.
        validation_time = format_time(time.time() - t0)

        print("  Validation Loss: {0:.2f}".format(avg_val_loss))
        print("  Validation took: {:}".format(validation_time))

        # Record all statistics from this epoch.
        training_stats.append(
          {
              'epoch': epoch_i + 1,
              'Training Loss': avg_train_loss,
              'Valid. Loss': avg_val_loss,
              'Valid. Accur.': avg_val_accuracy,
              'f1_score' : avg_f1_score,
              'Training Time': training_time,
              'Validation Time': validation_time,
              'fold' : current_fold
              
          }
        )

    print("")
    print("Training complete!")

    print("Total training took {:} (h:mm:ss)".format(format_time(time.time()-total_t0)))

    # ========================================
    # Predicting and saving predictions for all folds
    # ========================================

    print("")
    print("now predicting for this fold")

    # Put model in evaluation mode
    model.eval()
    # Tracking variables 
    predictions  = []
    # Predict 
    for batch in prediction_dataloader:
        # Add batch to GPU
        batch = tuple(t.to(device) for t in batch)
        # Unpack the inputs from our dataloader
        b_input_ids, b_input_mask = batch
        # speeding up prediction
        with torch.no_grad():
            # Forward pass, calculate logit predictions
            outputs = model(b_input_ids, token_type_ids=None, 
                            attention_mask=b_input_mask)

        logits = outputs[0]

        # Move logits and labels to CPU
        logits = logits.detach().cpu().numpy()


        predictions.append(logits)

    stack = np.vstack(predictions)
    # final_preds = F.softmax(torch.from_numpy(stack))[:,1].numpy()

    final_preds = F.softmax(torch.from_numpy(stack), dim=1)[:, 1].numpy()
    all_folds_preds.append(final_preds)
print("Completed")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Training...

  Average training loss: 0.50
  Training epcoh took: 0:02:17

Running Validation...
  Accuracy: 0.79
  F1_score: 0.72
  Validation Loss: 0.43
  Validation took: 0:00:04

Training...

  Average training loss: 0.29
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.87
  F1_score: 0.82
  Validation Loss: 0.38
  Validation took: 0:00:04

Training...

  Average training loss: 0.14
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.89
  F1_score: 0.85
  Validation Loss: 0.43
  Validation took: 0:00:04

Training...

  Average training loss: 0.10
  Training epcoh took: 0:02:17

Running Validation...
  Accuracy: 0.90
  F1_score: 0.87
  Validation Loss: 0.42
  Validation took: 0:00:04

Training...

  Average training loss: 0.08
  Training epcoh took: 0:02:17

Running Validation...
  Accuracy: 0.88
  F1_score: 0.85
  Validation Loss: 0.56
  Validation took: 0:00:04

Training...

  Average training loss: 0.08
  Training epcoh took: 0:02:17

Running 

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Training...

  Average training loss: 0.50
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.80
  F1_score: 0.76
  Validation Loss: 0.44
  Validation took: 0:00:04

Training...

  Average training loss: 0.29
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.88
  F1_score: 0.84
  Validation Loss: 0.34
  Validation took: 0:00:04

Training...

  Average training loss: 0.16
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.88
  F1_score: 0.85
  Validation Loss: 0.51
  Validation took: 0:00:04

Training...

  Average training loss: 0.11
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.91
  F1_score: 0.88
  Validation Loss: 0.39
  Validation took: 0:00:04

Training...

  Average training loss: 0.08
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.91
  F1_score: 0.88
  Validation Loss: 0.48
  Validation took: 0:00:04

Training...

  Average training loss: 0.07
  Training epcoh took: 0:02:16

Running 

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Training...

  Average training loss: 0.50
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.80
  F1_score: 0.75
  Validation Loss: 0.44
  Validation took: 0:00:04

Training...

  Average training loss: 0.30
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.87
  F1_score: 0.84
  Validation Loss: 0.33
  Validation took: 0:00:04

Training...

  Average training loss: 0.16
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.91
  F1_score: 0.88
  Validation Loss: 0.35
  Validation took: 0:00:04

Training...

  Average training loss: 0.10
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.90
  F1_score: 0.88
  Validation Loss: 0.61
  Validation took: 0:00:04

Training...

  Average training loss: 0.08
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.88
  F1_score: 0.86
  Validation Loss: 0.61
  Validation took: 0:00:04

Training...

  Average training loss: 0.08
  Training epcoh took: 0:02:16

Running 

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Training...

  Average training loss: 0.51
  Training epcoh took: 0:02:16

Running Validation...


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


  Accuracy: 0.79
  F1_score: 0.67
  Validation Loss: 0.43
  Validation took: 0:00:04

Training...

  Average training loss: 0.29
  Training epcoh took: 0:02:16

Running Validation...


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


  Accuracy: 0.87
  F1_score: 0.83
  Validation Loss: 0.30
  Validation took: 0:00:04

Training...

  Average training loss: 0.16
  Training epcoh took: 0:02:16

Running Validation...


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


  Accuracy: 0.89
  F1_score: 0.86
  Validation Loss: 0.36
  Validation took: 0:00:04

Training...

  Average training loss: 0.11
  Training epcoh took: 0:02:16

Running Validation...


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


  Accuracy: 0.88
  F1_score: 0.85
  Validation Loss: 0.60
  Validation took: 0:00:04

Training...

  Average training loss: 0.09
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.89
  F1_score: 0.86
  Validation Loss: 0.53
  Validation took: 0:00:04

Training...

  Average training loss: 0.07
  Training epcoh took: 0:02:16

Running Validation...


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


  Accuracy: 0.79
  F1_score: 0.76
  Validation Loss: 1.06
  Validation took: 0:00:04

Training...

  Average training loss: 0.07
  Training epcoh took: 0:02:16

Running Validation...


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


  Accuracy: 0.87
  F1_score: 0.83
  Validation Loss: 0.59
  Validation took: 0:00:04

Training...

  Average training loss: 0.06
  Training epcoh took: 0:02:16

Running Validation...


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


  Accuracy: 0.92
  F1_score: 0.88
  Validation Loss: 0.41
  Validation took: 0:00:04

Training...

  Average training loss: 0.06
  Training epcoh took: 0:02:16

Running Validation...


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


  Accuracy: 0.87
  F1_score: 0.84
  Validation Loss: 0.83
  Validation took: 0:00:04

Training...

  Average training loss: 0.05
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.89
  F1_score: 0.85
  Validation Loss: 0.60
  Validation took: 0:00:04

Training...

  Average training loss: 0.05
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.89
  F1_score: 0.86
  Validation Loss: 0.63
  Validation took: 0:00:04

Training...

  Average training loss: 0.05
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.87
  F1_score: 0.83
  Validation Loss: 0.77
  Validation took: 0:00:04

Training...

  Average training loss: 0.07
  Training epcoh took: 0:02:16

Running Validation...


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


  Accuracy: 0.91
  F1_score: 0.87
  Validation Loss: 0.41
  Validation took: 0:00:04

Training...

  Average training loss: 0.05
  Training epcoh took: 0:02:16

Running Validation...


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


  Accuracy: 0.88
  F1_score: 0.85
  Validation Loss: 0.71
  Validation took: 0:00:04

Training...

  Average training loss: 0.05
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.89
  F1_score: 0.85
  Validation Loss: 0.62
  Validation took: 0:00:04

Training...

  Average training loss: 0.06
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.90
  F1_score: 0.86
  Validation Loss: 0.54
  Validation took: 0:00:04

Training...

  Average training loss: 0.05
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.90
  F1_score: 0.87
  Validation Loss: 0.52
  Validation took: 0:00:04

Training...

  Average training loss: 0.04
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.89
  F1_score: 0.86
  Validation Loss: 0.54
  Validation took: 0:00:04

Training...

  Average training loss: 0.05
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.90
  F1_score: 0.87
  Validation Loss: 0.51
  Validation took: 0:00:04


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


  Accuracy: 0.89
  F1_score: 0.85
  Validation Loss: 0.64
  Validation took: 0:00:04

Training complete!
Total training took 3:07:13 (h:mm:ss)

now predicting for this fold


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Training...

  Average training loss: 0.51
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.78
  F1_score: 0.71
  Validation Loss: 0.44
  Validation took: 0:00:04

Training...

  Average training loss: 0.33
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.89
  F1_score: 0.85
  Validation Loss: 0.33
  Validation took: 0:00:04

Training...

  Average training loss: 0.17
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.91
  F1_score: 0.89
  Validation Loss: 0.36
  Validation took: 0:00:04

Training...

  Average training loss: 0.12
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.88
  F1_score: 0.85
  Validation Loss: 0.50
  Validation took: 0:00:04

Training...

  Average training loss: 0.10
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.91
  F1_score: 0.88
  Validation Loss: 0.53
  Validation took: 0:00:04

Training...

  Average training loss: 0.07
  Training epcoh took: 0:02:16

Running 

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Training...

  Average training loss: 0.51
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.78
  F1_score: 0.71
  Validation Loss: 0.44
  Validation took: 0:00:04

Training...

  Average training loss: 0.32
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.86
  F1_score: 0.80
  Validation Loss: 0.35
  Validation took: 0:00:04

Training...

  Average training loss: 0.17
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.90
  F1_score: 0.86
  Validation Loss: 0.35
  Validation took: 0:00:04

Training...

  Average training loss: 0.12
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.87
  F1_score: 0.85
  Validation Loss: 0.62
  Validation took: 0:00:04

Training...

  Average training loss: 0.09
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.92
  F1_score: 0.89
  Validation Loss: 0.38
  Validation took: 0:00:04

Training...

  Average training loss: 0.08
  Training epcoh took: 0:02:16

Running 

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Training...

  Average training loss: 0.51
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.77
  F1_score: 0.64
  Validation Loss: 0.48
  Validation took: 0:00:04

Training...

  Average training loss: 0.31
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.89
  F1_score: 0.84
  Validation Loss: 0.30
  Validation took: 0:00:04

Training...

  Average training loss: 0.16
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.89
  F1_score: 0.86
  Validation Loss: 0.45
  Validation took: 0:00:04

Training...

  Average training loss: 0.11
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.86
  F1_score: 0.83
  Validation Loss: 0.74
  Validation took: 0:00:04

Training...

  Average training loss: 0.09
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.91
  F1_score: 0.89
  Validation Loss: 0.48
  Validation took: 0:00:04

Training...

  Average training loss: 0.08
  Training epcoh took: 0:02:16

Running 

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Training...

  Average training loss: 0.51
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.78
  F1_score: 0.71
  Validation Loss: 0.43
  Validation took: 0:00:04

Training...

  Average training loss: 0.32
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.87
  F1_score: 0.83
  Validation Loss: 0.32
  Validation took: 0:00:04

Training...

  Average training loss: 0.17
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.88
  F1_score: 0.85
  Validation Loss: 0.43
  Validation took: 0:00:04

Training...

  Average training loss: 0.12
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.89
  F1_score: 0.86
  Validation Loss: 0.55
  Validation took: 0:00:04

Training...

  Average training loss: 0.10
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.91
  F1_score: 0.89
  Validation Loss: 0.48
  Validation took: 0:00:04

Training...

  Average training loss: 0.09
  Training epcoh took: 0:02:16

Running 

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Training...

  Average training loss: 0.51
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.78
  F1_score: 0.67
  Validation Loss: 0.45
  Validation took: 0:00:04

Training...

  Average training loss: 0.35
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.87
  F1_score: 0.83
  Validation Loss: 0.35
  Validation took: 0:00:04

Training...

  Average training loss: 0.18
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.88
  F1_score: 0.85
  Validation Loss: 0.45
  Validation took: 0:00:04

Training...

  Average training loss: 0.12
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.88
  F1_score: 0.85
  Validation Loss: 0.49
  Validation took: 0:00:04

Training...

  Average training loss: 0.09
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.89
  F1_score: 0.87
  Validation Loss: 0.52
  Validation took: 0:00:04

Training...

  Average training loss: 0.08
  Training epcoh took: 0:02:16

Running 

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Training...

  Average training loss: 0.52
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.80
  F1_score: 0.71
  Validation Loss: 0.41
  Validation took: 0:00:04

Training...

  Average training loss: 0.34
  Training epcoh took: 0:02:16

Running Validation...
  Accuracy: 0.87
  F1_score: 0.82
  Validation Loss: 0.33
  Validation took: 0:00:04

Training...

  Average training loss: 0.19
  Training epcoh took: 0:02:19

Running Validation...
  Accuracy: 0.89
  F1_score: 0.85
  Validation Loss: 0.41
  Validation took: 0:00:04

Training...

  Average training loss: 0.12
  Training epcoh took: 0:02:17

Running Validation...
  Accuracy: 0.93
  F1_score: 0.90
  Validation Loss: 0.34
  Validation took: 0:00:04

Training...

  Average training loss: 0.10
  Training epcoh took: 0:02:17

Running Validation...
  Accuracy: 0.92
  F1_score: 0.89
  Validation Loss: 0.33
  Validation took: 0:00:04

Training...

  Average training loss: 0.08
  Training epcoh took: 0:02:17

Running 

In [28]:
pd.set_option('display.precision', 2)
df_stats = pd.DataFrame(data=training_stats)
df_stats = df_stats.set_index('fold')
df_stats

Unnamed: 0_level_0,epoch,Training Loss,Valid. Loss,Valid. Accur.,f1_score,Training Time,Validation Time
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1,0.50,0.43,0.79,0.72,0:02:17,0:00:04
1,2,0.29,0.38,0.87,0.82,0:02:16,0:00:04
1,3,0.14,0.43,0.89,0.85,0:02:16,0:00:04
1,4,0.10,0.42,0.90,0.87,0:02:17,0:00:04
1,5,0.08,0.56,0.88,0.85,0:02:17,0:00:04
...,...,...,...,...,...,...,...
10,16,0.08,0.48,0.90,0.87,0:02:17,0:00:04
10,17,0.09,0.42,0.90,0.87,0:02:17,0:00:04
10,18,0.08,0.42,0.91,0.88,0:02:17,0:00:04
10,19,0.07,0.34,0.93,0.90,0:02:17,0:00:04


In [29]:
final_preds

array([0.99677926, 0.00211995, 0.00211944, ..., 0.00211969, 0.0021199 ,
       0.99677426], dtype=float32)

In [30]:
final_preds = np.mean(all_folds_preds, axis=0)

In [31]:
final_preds

array([0.9959502 , 0.12816313, 0.00244647, ..., 0.00244815, 0.00253691,
       0.6034086 ], dtype=float32)

In [32]:
all_folds_preds[0]

array([0.9973163 , 0.26504335, 0.00221469, ..., 0.00226765, 0.00229953,
       0.0128843 ], dtype=float32)

In [33]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

binary_predictions = (final_preds > 0.5).astype(int)

labels_true = test_data.label.values

accuracy = accuracy_score(labels_true, binary_predictions)
f1 = f1_score(labels_true, binary_predictions)
recall = recall_score(labels_true, binary_predictions)
precision = precision_score(labels_true, binary_predictions)

print("Accuracy:", accuracy)
print("F1-score:", f1)
print("Recall:", recall)
print("Precision:", precision)

test_data['predictions'] = binary_predictions

Accuracy: 0.9405
F1-score: 0.9301233118027011
Recall: 0.9658536585365853
Precision: 0.8969422423556059


In [35]:
# Accuracy: 0.9405
# F1-score: 0.9301233118027011
# Recall: 0.9658536585365853
# Precision: 0.8969422423556059