In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
#!pip install pyswarms

In [5]:
import torch
import json
import pandas as pd
import torch.nn as nn
import numpy as np
import torch.optim as optim
from tqdm.auto import tqdm
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score, f1_score
from transformers import BertTokenizer, BertForSequenceClassification, get_scheduler
from transformers import ElectraTokenizer, ElectraForSequenceClassification
from pyswarms.single.global_best import GlobalBestPSO
import random

In [6]:
import os

# Define the new directory path
new_directory = '/content/drive/MyDrive/Colab Notebooks'

# Change the current working directory to the new one
os.chdir(new_directory)

# Verify the change by printing the current working directory
print("Current working directory:", os.getcwd())

Current working directory: /content/drive/MyDrive/Colab Notebooks


In [7]:
# Load the datasets
def load_dataset(file_path):
    with open(file_path, 'r') as file:
        data = [json.loads(line) for line in file]
    return data

train_data = load_dataset('Datasets/train.jsonl')
validation_data = load_dataset('Datasets/validation.jsonl')


# Limitation of size of data
train_data = random.sample(train_data, k= 4000)
validation_data = random.sample(validation_data, k= 500)


def text_extract (data):
    text, label = [], []
    for idx in range(len(data)):
        text.append(data[idx]['text'])
        label.append(data[idx]['label'])
    return text, label

train_texts, train_labels= text_extract (train_data)

dev_texts, dev_labels= text_extract (validation_data)

#test_texts, test_labels= text_extract (test_data)

# Load the pre-trained BERT tokenizer and model
#tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
tokenizer = ElectraTokenizer.from_pretrained('google/electra-base-discriminator')

# Tokenize the input texts
tokenized_train_texts = tokenizer(train_texts, padding=True, truncation=True, return_tensors='pt')

tokenized_dev_texts = tokenizer(dev_texts, padding=True, truncation=True, return_tensors='pt')

# Convert the labels to tensor
train_labels = torch.tensor(train_labels)

dev_labels = torch.tensor(dev_labels)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/27.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/666 [00:00<?, ?B/s]

In [8]:
# Define the objective function
def objective_function(params):

    Costs = np.zeros(params.shape[0])
    for i in range(params.shape[0]):
        learning_rate= params[i, 0]
        batch_size= int(params[i, 1])
        weight_decay= params[i, 2]
        num_epochs= 3


    # Create TensorDatasets for train, dev, and test sets
    train_dataset = TensorDataset(tokenized_train_texts['input_ids'], tokenized_train_texts['attention_mask'], train_labels)

    dev_dataset = TensorDataset(tokenized_dev_texts['input_ids'], tokenized_dev_texts['attention_mask'], dev_labels)

    # Create DataLoaders for train, dev, and test sets
    train_dataloader = DataLoader(train_dataset, batch_size= batch_size, shuffle= True)

    dev_dataloader = DataLoader(dev_dataset, batch_size= batch_size, shuffle= False)

    # Load the pre-trained BERT model for sequence classification
    #model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels= 6)
    model = ElectraForSequenceClassification.from_pretrained('google/electra-base-discriminator', num_labels=6)

    # Freeze BERT parameters
    for param in model.base_model.parameters():
        param.requires_grad = False

    # Define the optimizer for training the softmax layer
    optimizer = optim.Adam(model.classifier.parameters(), lr= learning_rate, weight_decay= weight_decay)

    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    model.to(device)

    # Define the loss function
    criterion = nn.CrossEntropyLoss()


    # Train the model
    best_dev_accuracy = 0.0
    best_model_state_dict = None
    Validation_results= []

    for epoch in range(num_epochs):
        # Training loop
        model.train()
        for batch in train_dataloader:
            input_ids, attention_mask, batch_labels = batch

            input_ids= input_ids.to(device)
            attention_mask= attention_mask.to(device)
            batch_labels= batch_labels.to(device)

            optimizer.zero_grad()
            outputs = model(input_ids= input_ids, attention_mask= attention_mask, labels= batch_labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

        # Validation loop
        model.eval()
        dev_correct = 0
        total_dev = 0
        y_true = []
        y_pred = []
        loss_epoch= []
        with torch.no_grad():
            for batch in dev_dataloader:
                input_ids, attention_mask, batch_labels = batch

                input_ids= input_ids.to(device)
                attention_mask= attention_mask.to(device)
                batch_labels= batch_labels.to(device)

                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                logits = outputs.logits
                _, predicted = torch.max(logits, 1)

                # Append true labels and predicted labels for later use
                y_true.extend(batch_labels.tolist())
                y_pred.extend(predicted.tolist())

                # Calculate the loss
                loss = criterion(logits, batch_labels)
                loss_epoch.append(loss)

        # Calculate accuracy and F1 score
        f1 = f1_score(y_true, y_pred, average='weighted')
        accuracy = accuracy_score(y_true, y_pred)

        # Calculate the average loss
        loss_epoch_np = [tensor.cpu().detach().numpy() for tensor in loss_epoch]
        average_loss= np.mean(loss_epoch_np)
        print(f'learning_rate: {learning_rate}, batch_size: {batch_size}, weight_decay: {weight_decay}, num_epochs: {num_epochs}')
        print(f'epoch No. : {epoch}, Devset Accuracy : {round(accuracy,5)}, Devset f1_score : {round(f1,5)}, Average loss: {round(average_loss.tolist(),5)}')
        print()
    validation_loss= average_loss

    Costs[i]= (1/f1)**2

    # Return the validation loss as the objective value
    return Costs

In [9]:
# Define the bounds for the hyperparameters
lower_bound = np.array([1e-5, 16, 1e-5])
upper_bound = np.array([1e-2, 256, 1e-3])
bounds = (lower_bound, upper_bound)

In [10]:
# Initialize the optimizer
options={'c1': 0.5, 'c2': 0.3, 'w': 0.9}
optimizer = GlobalBestPSO(n_particles= 6, dimensions= 3, options=options, bounds=bounds)

# Run the optimization
best_costs, best_hyperparams = optimizer.optimize(objective_function, iters= 20)

# Print the best hyperparameters found
print("Best position:", best_hyperparams)
print("Best cost:", best_costs)

2024-02-05 14:01:53,787 - pyswarms.single.global_best - INFO - Optimize for 20 iters with {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
pyswarms.single.global_best:   0%|          |0/20

pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


learning_rate: 0.0016997161546032083, batch_size: 190, weight_decay: 0.00029402594132142354, num_epochs: 3
epoch No. : 0, Devset Accuracy : 0.364, Devset f1_score : 0.27563, Average loss: 1.60002

learning_rate: 0.0016997161546032083, batch_size: 190, weight_decay: 0.00029402594132142354, num_epochs: 3
epoch No. : 1, Devset Accuracy : 0.394, Devset f1_score : 0.27864, Average loss: 1.57359



pyswarms.single.global_best:   5%|▌         |1/20, best_cost=0

learning_rate: 0.0016997161546032083, batch_size: 190, weight_decay: 0.00029402594132142354, num_epochs: 3
epoch No. : 2, Devset Accuracy : 0.426, Devset f1_score : 0.32896, Average loss: 1.56007



Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


learning_rate: 0.0002635488129192191, batch_size: 194, weight_decay: 0.0009324350851422423, num_epochs: 3
epoch No. : 0, Devset Accuracy : 0.396, Devset f1_score : 0.28632, Average loss: 1.62304

learning_rate: 0.0002635488129192191, batch_size: 194, weight_decay: 0.0009324350851422423, num_epochs: 3
epoch No. : 1, Devset Accuracy : 0.378, Devset f1_score : 0.27937, Average loss: 1.58546



pyswarms.single.global_best:  10%|█         |2/20, best_cost=0

learning_rate: 0.0002635488129192191, batch_size: 194, weight_decay: 0.0009324350851422423, num_epochs: 3
epoch No. : 2, Devset Accuracy : 0.382, Devset f1_score : 0.24645, Average loss: 1.58172



Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


learning_rate: 0.009207606678370743, batch_size: 204, weight_decay: 0.0009839133343799186, num_epochs: 3
epoch No. : 0, Devset Accuracy : 0.354, Devset f1_score : 0.25721, Average loss: 1.62754

learning_rate: 0.009207606678370743, batch_size: 204, weight_decay: 0.0009839133343799186, num_epochs: 3
epoch No. : 1, Devset Accuracy : 0.366, Devset f1_score : 0.24446, Average loss: 1.57733



pyswarms.single.global_best:  15%|█▌        |3/20, best_cost=0

learning_rate: 0.009207606678370743, batch_size: 204, weight_decay: 0.0009839133343799186, num_epochs: 3
epoch No. : 2, Devset Accuracy : 0.406, Devset f1_score : 0.30266, Average loss: 1.53491



Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


learning_rate: 0.004147474833099714, batch_size: 213, weight_decay: 0.0007067414029695109, num_epochs: 3
epoch No. : 0, Devset Accuracy : 0.29, Devset f1_score : 0.17335, Average loss: 1.60289

learning_rate: 0.004147474833099714, batch_size: 213, weight_decay: 0.0007067414029695109, num_epochs: 3
epoch No. : 1, Devset Accuracy : 0.404, Devset f1_score : 0.31975, Average loss: 1.57028



pyswarms.single.global_best:  20%|██        |4/20, best_cost=0

learning_rate: 0.004147474833099714, batch_size: 213, weight_decay: 0.0007067414029695109, num_epochs: 3
epoch No. : 2, Devset Accuracy : 0.362, Devset f1_score : 0.27891, Average loss: 1.59809



Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


learning_rate: 0.009454725550987924, batch_size: 213, weight_decay: 0.0004919559335539695, num_epochs: 3
epoch No. : 0, Devset Accuracy : 0.372, Devset f1_score : 0.28772, Average loss: 1.64456

learning_rate: 0.009454725550987924, batch_size: 213, weight_decay: 0.0004919559335539695, num_epochs: 3
epoch No. : 1, Devset Accuracy : 0.364, Devset f1_score : 0.28395, Average loss: 1.60368



pyswarms.single.global_best:  25%|██▌       |5/20, best_cost=0

learning_rate: 0.009454725550987924, batch_size: 213, weight_decay: 0.0004919559335539695, num_epochs: 3
epoch No. : 2, Devset Accuracy : 0.388, Devset f1_score : 0.2842, Average loss: 1.61437



Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


learning_rate: 0.004943790965088559, batch_size: 213, weight_decay: 0.0005257125022506914, num_epochs: 3
epoch No. : 0, Devset Accuracy : 0.39, Devset f1_score : 0.29738, Average loss: 1.60024

learning_rate: 0.004943790965088559, batch_size: 213, weight_decay: 0.0005257125022506914, num_epochs: 3
epoch No. : 1, Devset Accuracy : 0.402, Devset f1_score : 0.30522, Average loss: 1.56902



pyswarms.single.global_best:  30%|███       |6/20, best_cost=0

learning_rate: 0.004943790965088559, batch_size: 213, weight_decay: 0.0005257125022506914, num_epochs: 3
epoch No. : 2, Devset Accuracy : 0.406, Devset f1_score : 0.32458, Average loss: 1.57565



Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


learning_rate: 0.0027741900111479885, batch_size: 208, weight_decay: 0.0006098442404510475, num_epochs: 3
epoch No. : 0, Devset Accuracy : 0.37, Devset f1_score : 0.26079, Average loss: 1.58944

learning_rate: 0.0027741900111479885, batch_size: 208, weight_decay: 0.0006098442404510475, num_epochs: 3
epoch No. : 1, Devset Accuracy : 0.314, Devset f1_score : 0.21878, Average loss: 1.61273



pyswarms.single.global_best:  35%|███▌      |7/20, best_cost=0

learning_rate: 0.0027741900111479885, batch_size: 208, weight_decay: 0.0006098442404510475, num_epochs: 3
epoch No. : 2, Devset Accuracy : 0.388, Devset f1_score : 0.24738, Average loss: 1.56123



Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


learning_rate: 0.00468966852185271, batch_size: 195, weight_decay: 0.00029398216879257923, num_epochs: 3
epoch No. : 0, Devset Accuracy : 0.366, Devset f1_score : 0.21013, Average loss: 1.59749

learning_rate: 0.00468966852185271, batch_size: 195, weight_decay: 0.00029398216879257923, num_epochs: 3
epoch No. : 1, Devset Accuracy : 0.394, Devset f1_score : 0.31262, Average loss: 1.57032



pyswarms.single.global_best:  40%|████      |8/20, best_cost=0

learning_rate: 0.00468966852185271, batch_size: 195, weight_decay: 0.00029398216879257923, num_epochs: 3
epoch No. : 2, Devset Accuracy : 0.41, Devset f1_score : 0.31792, Average loss: 1.5294



Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


learning_rate: 0.0006864650904850008, batch_size: 186, weight_decay: 0.00010760741672911648, num_epochs: 3
epoch No. : 0, Devset Accuracy : 0.37, Devset f1_score : 0.29247, Average loss: 1.58176

learning_rate: 0.0006864650904850008, batch_size: 186, weight_decay: 0.00010760741672911648, num_epochs: 3
epoch No. : 1, Devset Accuracy : 0.388, Devset f1_score : 0.27737, Average loss: 1.55708



pyswarms.single.global_best:  45%|████▌     |9/20, best_cost=0

learning_rate: 0.0006864650904850008, batch_size: 186, weight_decay: 0.00010760741672911648, num_epochs: 3
epoch No. : 2, Devset Accuracy : 0.426, Devset f1_score : 0.33375, Average loss: 1.54241



Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


learning_rate: 0.0032043707937876487, batch_size: 181, weight_decay: 0.00035424796173918593, num_epochs: 3
epoch No. : 0, Devset Accuracy : 0.386, Devset f1_score : 0.29596, Average loss: 1.62647

learning_rate: 0.0032043707937876487, batch_size: 181, weight_decay: 0.00035424796173918593, num_epochs: 3
epoch No. : 1, Devset Accuracy : 0.392, Devset f1_score : 0.30394, Average loss: 1.59388



pyswarms.single.global_best:  50%|█████     |10/20, best_cost=0

learning_rate: 0.0032043707937876487, batch_size: 181, weight_decay: 0.00035424796173918593, num_epochs: 3
epoch No. : 2, Devset Accuracy : 0.42, Devset f1_score : 0.3309, Average loss: 1.54498



Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


learning_rate: 0.0008487880256938152, batch_size: 178, weight_decay: 0.00016106508463402736, num_epochs: 3
epoch No. : 0, Devset Accuracy : 0.296, Devset f1_score : 0.20005, Average loss: 1.5944

learning_rate: 0.0008487880256938152, batch_size: 178, weight_decay: 0.00016106508463402736, num_epochs: 3
epoch No. : 1, Devset Accuracy : 0.416, Devset f1_score : 0.32427, Average loss: 1.57035



pyswarms.single.global_best:  55%|█████▌    |11/20, best_cost=0

learning_rate: 0.0008487880256938152, batch_size: 178, weight_decay: 0.00016106508463402736, num_epochs: 3
epoch No. : 2, Devset Accuracy : 0.396, Devset f1_score : 0.28589, Average loss: 1.53219



Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


learning_rate: 0.005137929602280369, batch_size: 184, weight_decay: 0.0005804448307034473, num_epochs: 3
epoch No. : 0, Devset Accuracy : 0.378, Devset f1_score : 0.29758, Average loss: 1.61162

learning_rate: 0.005137929602280369, batch_size: 184, weight_decay: 0.0005804448307034473, num_epochs: 3
epoch No. : 1, Devset Accuracy : 0.372, Devset f1_score : 0.26063, Average loss: 1.56761



pyswarms.single.global_best:  60%|██████    |12/20, best_cost=0

learning_rate: 0.005137929602280369, batch_size: 184, weight_decay: 0.0005804448307034473, num_epochs: 3
epoch No. : 2, Devset Accuracy : 0.422, Devset f1_score : 0.33313, Average loss: 1.56298



Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


learning_rate: 0.00456165937921754, batch_size: 195, weight_decay: 0.00047165587950405517, num_epochs: 3
epoch No. : 0, Devset Accuracy : 0.302, Devset f1_score : 0.20603, Average loss: 1.64211

learning_rate: 0.00456165937921754, batch_size: 195, weight_decay: 0.00047165587950405517, num_epochs: 3
epoch No. : 1, Devset Accuracy : 0.378, Devset f1_score : 0.30045, Average loss: 1.59447



pyswarms.single.global_best:  65%|██████▌   |13/20, best_cost=0

learning_rate: 0.00456165937921754, batch_size: 195, weight_decay: 0.00047165587950405517, num_epochs: 3
epoch No. : 2, Devset Accuracy : 0.388, Devset f1_score : 0.30879, Average loss: 1.5712



Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


learning_rate: 0.00038435440872315466, batch_size: 207, weight_decay: 0.0006722929812182964, num_epochs: 3
epoch No. : 0, Devset Accuracy : 0.372, Devset f1_score : 0.29181, Average loss: 1.62661

learning_rate: 0.00038435440872315466, batch_size: 207, weight_decay: 0.0006722929812182964, num_epochs: 3
epoch No. : 1, Devset Accuracy : 0.382, Devset f1_score : 0.30446, Average loss: 1.59305



pyswarms.single.global_best:  70%|███████   |14/20, best_cost=0

learning_rate: 0.00038435440872315466, batch_size: 207, weight_decay: 0.0006722929812182964, num_epochs: 3
epoch No. : 2, Devset Accuracy : 0.39, Devset f1_score : 0.27864, Average loss: 1.58769



Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


learning_rate: 0.0030688692144887485, batch_size: 215, weight_decay: 0.0008994020541505999, num_epochs: 3
epoch No. : 0, Devset Accuracy : 0.264, Devset f1_score : 0.11713, Average loss: 1.67168

learning_rate: 0.0030688692144887485, batch_size: 215, weight_decay: 0.0008994020541505999, num_epochs: 3
epoch No. : 1, Devset Accuracy : 0.41, Devset f1_score : 0.30836, Average loss: 1.57306



pyswarms.single.global_best:  75%|███████▌  |15/20, best_cost=0

learning_rate: 0.0030688692144887485, batch_size: 215, weight_decay: 0.0008994020541505999, num_epochs: 3
epoch No. : 2, Devset Accuracy : 0.408, Devset f1_score : 0.29856, Average loss: 1.57061



Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


learning_rate: 0.003041953405311181, batch_size: 216, weight_decay: 0.0006064377161571813, num_epochs: 3
epoch No. : 0, Devset Accuracy : 0.312, Devset f1_score : 0.21079, Average loss: 1.63956

learning_rate: 0.003041953405311181, batch_size: 216, weight_decay: 0.0006064377161571813, num_epochs: 3
epoch No. : 1, Devset Accuracy : 0.42, Devset f1_score : 0.31995, Average loss: 1.57412



pyswarms.single.global_best:  80%|████████  |16/20, best_cost=0

learning_rate: 0.003041953405311181, batch_size: 216, weight_decay: 0.0006064377161571813, num_epochs: 3
epoch No. : 2, Devset Accuracy : 0.436, Devset f1_score : 0.33779, Average loss: 1.55606



Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


learning_rate: 0.0001398761509810377, batch_size: 217, weight_decay: 0.0008173616835966927, num_epochs: 3
epoch No. : 0, Devset Accuracy : 0.366, Devset f1_score : 0.20009, Average loss: 1.61236

learning_rate: 0.0001398761509810377, batch_size: 217, weight_decay: 0.0008173616835966927, num_epochs: 3
epoch No. : 1, Devset Accuracy : 0.37, Devset f1_score : 0.28271, Average loss: 1.61355



pyswarms.single.global_best:  85%|████████▌ |17/20, best_cost=0

learning_rate: 0.0001398761509810377, batch_size: 217, weight_decay: 0.0008173616835966927, num_epochs: 3
epoch No. : 2, Devset Accuracy : 0.372, Devset f1_score : 0.27425, Average loss: 1.59769



Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


learning_rate: 0.005987106621197414, batch_size: 218, weight_decay: 0.0009786891858496191, num_epochs: 3
epoch No. : 0, Devset Accuracy : 0.382, Devset f1_score : 0.26515, Average loss: 1.60847

learning_rate: 0.005987106621197414, batch_size: 218, weight_decay: 0.0009786891858496191, num_epochs: 3
epoch No. : 1, Devset Accuracy : 0.296, Devset f1_score : 0.17655, Average loss: 1.61071



pyswarms.single.global_best:  90%|█████████ |18/20, best_cost=0

learning_rate: 0.005987106621197414, batch_size: 218, weight_decay: 0.0009786891858496191, num_epochs: 3
epoch No. : 2, Devset Accuracy : 0.418, Devset f1_score : 0.3382, Average loss: 1.59263



Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


learning_rate: 0.009755231580132239, batch_size: 218, weight_decay: 0.0006931419888689351, num_epochs: 3
epoch No. : 0, Devset Accuracy : 0.368, Devset f1_score : 0.20133, Average loss: 1.67837

learning_rate: 0.009755231580132239, batch_size: 218, weight_decay: 0.0006931419888689351, num_epochs: 3
epoch No. : 1, Devset Accuracy : 0.37, Devset f1_score : 0.20847, Average loss: 1.60812



pyswarms.single.global_best:  95%|█████████▌|19/20, best_cost=0

learning_rate: 0.009755231580132239, batch_size: 218, weight_decay: 0.0006931419888689351, num_epochs: 3
epoch No. : 2, Devset Accuracy : 0.38, Devset f1_score : 0.28673, Average loss: 1.58275



Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


learning_rate: 0.00934195625647743, batch_size: 218, weight_decay: 0.0004690122200195672, num_epochs: 3
epoch No. : 0, Devset Accuracy : 0.37, Devset f1_score : 0.21931, Average loss: 1.63837

learning_rate: 0.00934195625647743, batch_size: 218, weight_decay: 0.0004690122200195672, num_epochs: 3
epoch No. : 1, Devset Accuracy : 0.376, Devset f1_score : 0.27905, Average loss: 1.6079



pyswarms.single.global_best: 100%|██████████|20/20, best_cost=0
2024-02-05 14:20:54,632 - pyswarms.single.global_best - INFO - Optimization finished | best cost: 0.0, best pos: [8.17743188e-03 2.18181349e+02 2.54979231e-04]


learning_rate: 0.00934195625647743, batch_size: 218, weight_decay: 0.0004690122200195672, num_epochs: 3
epoch No. : 2, Devset Accuracy : 0.392, Devset f1_score : 0.29636, Average loss: 1.58194

Best position: [8.17743188e-03 2.18181349e+02 2.54979231e-04]
Best cost: 0.0


In [11]:
learning_rate= best_hyperparams[0]
batch_size= int(best_hyperparams[1])
weight_decay= best_hyperparams[2]

learning_rate, batch_size, weight_decay

(0.008177431877937414, 218, 0.00025497923120386347)