<a href="https://colab.research.google.com/github/krishan30/AiLERT-FYP/blob/main/DCL_Framework_with_huggingface_hyper_parameter_tuning_RoBERTa_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Importing Libraries

In [None]:
!pip install transformers
!pip install evaluate
!pip install accelerate -U
!pip install torchmetrics
!pip install optuna_dashboard
!pip install optuna
!pip install -U "neptune[optuna]"

Collecting evaluate
  Downloading evaluate-0.4.1-py3-none-any.whl (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets>=2.0.0 (from evaluate)
  Downloading datasets-2.15.0-py3-none-any.whl (521 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.2/521.2 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
Collecting dill (from evaluate)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from evaluate)
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
Collecting responses<0.19 (from evaluate)
  Downloading responses-0.18.0-py3-none-any.whl (38 kB)
Collecting pyarrow-hotfix (from datasets>=2.0.0->ev

In [None]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader,WeightedRandomSampler
from transformers import AutoModel,AutoTokenizer,AdamW,get_scheduler
from tqdm import tqdm
import evaluate
from datasets import load_dataset
from tqdm.auto import tqdm
import numpy as np
import random
from torchmetrics.classification import BinaryAccuracy
import optuna
from google.colab import userdata
import neptune
import uuid
import neptune.integrations.optuna as npt_utils
import  hashlib
import time

## Setting up the GPU or CPU

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["NEPTUNE_API_TOKEN"] = userdata.get('NEPTUNE_API_TOKEN')
os.environ["NEPTUNE_PROJECT"] = userdata.get('NEPTUNE_PROJECT')
device = torch.device("cuda:0"  if torch.cuda.is_available() else "cpu")

## Hyperparameter Values

In [None]:
#@title #Hyperparameter Values
BATCH_SIZE = 144 # @param {type:"integer"}
EPOCHS = 2 # @param {type:"integer"}
DROPOUT =0.5# @param {type:"number"}
ALPHA = 0.4601079528693497 # @param {type:"number"}
GAMMA = 4.932651675660353 # @param {type:"number"}
TEMP_1 = 0.14630528484684996 # @param {type:"number"}
TEMP_2 =  0.07242998464641397# @param {type:"number"}
LAMBDA = 1
SEED = 1234 # @param {type:"integer"}
PADDING_MAX_LENGTH = 45 # @param {type:"integer"}


## Setting Random Seed for Reproducibility

In [None]:
def setup_seed(seed:int):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    #torch.random.manual_seed(seed)
    torch.backends.cudnn.deterministic = True



In [None]:
setup_seed(SEED)

In [None]:
# @title Bert Model
bert_model_name = "roberta-base" # @param {type:"string"}

## Loading Train,Dev,Test Datasets

In [None]:
dataset = load_dataset("krishan-CSE/HatEval_New")

Downloading readme:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/1.09M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/139k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/385k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

## Loading the Tokernizer for the Transformer Model

In [None]:
tokenizer = AutoTokenizer.from_pretrained(bert_model_name)

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [None]:
def tokenize_function(examples):
    return tokenizer.batch_encode_plus(examples["text"], padding='max_length',max_length=PADDING_MAX_LENGTH,add_special_tokens=True,truncation=True)

## Tokenize the Dataset

In [None]:
dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'labels'],
        num_rows: 8982
    })
    validation: Dataset({
        features: ['text', 'labels'],
        num_rows: 998
    })
    test: Dataset({
        features: ['text', 'labels'],
        num_rows: 3000
    })
})

In [None]:
tokenized_datasets = dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/8982 [00:00<?, ? examples/s]

Map:   0%|          | 0/998 [00:00<?, ? examples/s]

Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

In [None]:
tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['text', 'labels', 'input_ids', 'attention_mask'],
        num_rows: 8982
    })
    validation: Dataset({
        features: ['text', 'labels', 'input_ids', 'attention_mask'],
        num_rows: 998
    })
    test: Dataset({
        features: ['text', 'labels', 'input_ids', 'attention_mask'],
        num_rows: 3000
    })
})

## Remove Unwanted Coloumns

In [None]:
tokenized_datasets=tokenized_datasets.remove_columns(['text'])

## Format the coloumns

In [None]:
tokenized_datasets=tokenized_datasets.with_format("torch")

In [None]:
tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['labels', 'input_ids', 'attention_mask'],
        num_rows: 8982
    })
    validation: Dataset({
        features: ['labels', 'input_ids', 'attention_mask'],
        num_rows: 998
    })
    test: Dataset({
        features: ['labels', 'input_ids', 'attention_mask'],
        num_rows: 3000
    })
})

## Creating DataLoaders for Train,Dev,Test Datasets

## Define the Bert Architecture



In [None]:
class DCLArchitecture(nn.Module):
    def __init__(self,dropout:float,bert_model_name:str='bert-base-cased'):
        super(DCLArchitecture, self).__init__()
        self.bert = AutoModel.from_pretrained(bert_model_name)
        self.dim = 768
        self.dense = nn.Linear(self.dim, 1)
        self.dropout = nn.Dropout(dropout)

    def forward(self,batch_tokenized, if_train=False):
        input_ids = batch_tokenized['input_ids']
        attention_mask = batch_tokenized['attention_mask']
        bert_output = self.bert(input_ids, attention_mask=attention_mask, output_hidden_states=True)
        bert_cls_hidden_state = bert_output[1]
        torch.cuda.empty_cache()

        if if_train:
            bert_cls_hidden_state_aug = self.dropout(bert_cls_hidden_state)
            bert_cls_hidden_state = torch.cat((bert_cls_hidden_state, bert_cls_hidden_state_aug), dim=1).reshape(-1, self.dim)
        else:
            bert_cls_hidden_state = self.dropout(bert_cls_hidden_state)

        linear_output = self.dense(bert_cls_hidden_state)
        linear_output = linear_output.squeeze(1)

        return bert_cls_hidden_state, linear_output

## Define Focal Loss

In [None]:
class FocalLoss(nn.Module):
    def __init__(self, alpha:float=0.4, gamma:float=2, size_average:bool=True):
        super(FocalLoss, self).__init__()
        self.alpha = torch.tensor(alpha)
        self.gamma = gamma
        self.size_average = size_average

    def forward(self, pred, target):

        device = target.device
        self.alpha = self.alpha.to(device)

        pred = nn.Sigmoid()(pred)
        pred = pred.view(-1, 1)
        target = target.view(-1, 1)
        pred = torch.cat((1-pred, pred), dim=1)

        class_mask = torch.zeros(pred.shape[0], pred.shape[1]).to(device)
        class_mask.scatter_(1, target.view(-1, 1).long(), 1.)
        probs = (pred * class_mask).sum(dim=1).view(-1, 1)
        probs = probs.clamp(min=0.0001, max=1.0)

        log_p = probs.log()
        alpha = torch.ones(pred.shape[0], pred.shape[1]).to(device)
        alpha[:, 0] = alpha[:, 0] * (1 - self.alpha)
        alpha[:, 1] = alpha[:, 1] * self.alpha
        alpha = (alpha * class_mask).sum(dim=1).view(-1, 1)

        batch_loss = -alpha * (torch.pow((1 - probs), self.gamma)) * log_p

        if self.size_average:
            loss = batch_loss.mean()
        else:
            loss = batch_loss.sum()

        return loss

## Define Unsupervised Contrastive loss

In [None]:
def simcse_loss(batch_emb,temp_1:float):
    batch_size = batch_emb.size(0)
    y_true = torch.cat([torch.arange(1, batch_size, step=2, dtype=torch.long).unsqueeze(1),
                        torch.arange(0, batch_size, step=2, dtype=torch.long).unsqueeze(1)],
                       dim=1).reshape([batch_size,]).to(device)
    norm_emb = F.normalize(batch_emb, dim=1, p=2)
    sim_score = torch.matmul(norm_emb, norm_emb.transpose(0,1))
    sim_score = sim_score - (torch.eye(batch_size) * 1e12).to(device)
    sim_score = sim_score / temp_1
    loss_func = nn.CrossEntropyLoss()
    loss = loss_func(sim_score, y_true)

    return loss


## Define Supervised Contrastive loss

In [None]:
def sup_simcse_loss(batch_emb, label,temp_2:float):
    n = batch_emb.size(0)

    similarity_matrix = F.cosine_similarity(batch_emb.unsqueeze(1), batch_emb.unsqueeze(0), dim=2)
    mask = torch.ones_like(similarity_matrix) * (label.expand(n, n).eq(label.expand(n, n).t()))

    mask_no_sim = torch.ones_like(mask) - mask
    mask_dui_jiao_0 = ((torch.ones(n,n) - torch.eye(n,n)) * 1e12).to(device)
    similarity_matrix = torch.exp(similarity_matrix/temp_2)
    similarity_matrix = similarity_matrix * mask_dui_jiao_0

    sim = mask*similarity_matrix
    no_sim = similarity_matrix - sim
    no_sim_sum = torch.sum(no_sim , dim=1)
    no_sim_sum_expend = no_sim_sum.repeat(n, 1).T

    sim_sum  = sim + no_sim_sum_expend
    loss = torch.div(sim , sim_sum)
    loss = mask_no_sim + loss + (torch.eye(n, n)/1e12).to(device)
    loss = -torch.log(loss)
    loss = torch.sum(torch.sum(loss, dim=1))/(2*n)
    return loss

## Fine Tuning the Model

In [None]:
# @title Input study_name
study_name = "DCL_Experiments_2" # @param {type:"string"}



In [None]:
optuna_storage_location="content/drive/MyDrive/FYP/Hyper_Parameter_Tuning/RoBERTa_Model/optimization_study.db"
study_id=str(hashlib.md5(str(time.time()).encode()).hexdigest()[:8])

###Setting up Neptuna

In [None]:
sweep_id = uuid.uuid1()

In [None]:
run_study_level = neptune.init_run(capture_stdout=True,capture_stderr=True,capture_traceback=True, capture_hardware_metrics=True,tags=["huggingface", "hyper parameter tuning", "RoBERTa"],name=(study_name+'_'+study_id))

https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-81


In [None]:
run_study_level["sweep-id"] = str(sweep_id)

In [None]:
run_study_level["sys/tags"].add("study-level")

## Format the Labels

In [None]:
def copy_label(labels):
    labels = labels.unsqueeze(1)
    labels = torch.cat((labels, labels), dim=1).reshape(-1, 1).squeeze(1)
    return labels

### Train the Model

In [None]:
def train_model(model, train_dataloader,optimizer,criteon,device,temp_1:float,temp_2:float,lamda:float,run_trial_level):
    accuracy_metric = BinaryAccuracy()
    accuracy_metric.to(device)
    progress_bar = tqdm(range(len(train_dataloader)))
    model.train()
    total_train_loss = 0.0
    for batch in train_dataloader:
        batch = {k: v.to(device) for k, v in batch.items()}
        labels = copy_label(batch["labels"])
        emb, pred = model(batch, if_train=True)
        loss = criteon(pred, labels.float())
        loss_sim = simcse_loss(emb,temp_1=TEMP_1)
        loss_supsim = sup_simcse_loss(emb, labels,temp_2=TEMP_2)
        total_train_loss += loss.item()
        run_trial_level["train_batch/loss"].append(loss.item())
        run_trial_level["train_batch/Unsupervised_Contrastive_loss"].append(loss_sim)
        run_trial_level["train_batch/supervised_Contrastive_loss"].append(loss_supsim)
        pred = torch.sigmoid(pred)
        batch_level_accuracy=accuracy_metric(pred,labels)
        run_trial_level["train_batch/accuracy"].append(batch_level_accuracy.item())
        loss = loss + lamda*(loss_sim + loss_supsim)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        progress_bar.update(1)
    average_epoch_train_loss = total_train_loss / len(train_dataloader)  # Compute average epoch loss
    train_accuracy = accuracy_metric.compute()
    accuracy_metric.reset()
    return average_epoch_train_loss,train_accuracy


### Evaluate the Model

In [None]:
def evaluate_model(model, dev_dataloader,criteon, device):
    accuracy_metric = BinaryAccuracy()
    accuracy_metric.to(device)
    model.eval()

    total_dev_loss = 0.0
    for batch in dev_dataloader:
        batch = {k: v.to(device) for k, v in batch.items()}
        labels = batch["labels"]
        with torch.no_grad():
            emb, pred = model(batch , False)
            loss = criteon(pred, labels.float())
        total_dev_loss += loss.item()
        pred = torch.sigmoid(pred)
        accuracy_metric(pred,labels)

    average_epoch_dev_loss = total_dev_loss / len(dev_dataloader)  # Compute average epoch loss
    dev_accuracy = accuracy_metric.compute()
    accuracy_metric.reset()
    return average_epoch_dev_loss,dev_accuracy

### Train & Evaluate Model

## Hyperparameter Tuning

In [None]:
storage = optuna.storages.RDBStorage("sqlite:////"+optuna_storage_location)

In [None]:
def define_model(bert_model_name,trial):
    DROPOUT = 0.5
    model = DCLArchitecture(bert_model_name=bert_model_name,dropout=DROPOUT)
    model.to(device)
    return model

In [None]:
def tokenized_batch_data(trial,tokenized_datasets):
    BATCH_SIZE = trial.suggest_int("batch_size",100,220)
    train_dataloader=DataLoader(tokenized_datasets["train"], batch_size=BATCH_SIZE , shuffle=True)
    dev_dataloader=DataLoader(tokenized_datasets["validation"], batch_size=BATCH_SIZE , shuffle=True)
    return train_dataloader,dev_dataloader,BATCH_SIZE


In [None]:
def objective(trial):
    # Define the hyperparameters to optimize

    EPOCHS = trial.suggest_int("epochs", 1,2)
    ALPHA= trial.suggest_float("alpha", 0.1, 0.5)
    GAMMA = trial.suggest_float("gamma", 1.0, 5)
    TEMP_1 = trial.suggest_float("temp_1", 0.05, 0.15)
    TEMP_2 = trial.suggest_float("temp_2", 0.01,0.1)
    train_dataloader,dev_dataloader,BATCH_SIZE =tokenized_batch_data(trial,tokenized_datasets)
    model = define_model(bert_model_name,trial)
    optimizer = torch.optim.AdamW(model.parameters(),lr = 1e-4)
    criteon = FocalLoss(ALPHA, GAMMA)
    param = {
        "EPOCHS": EPOCHS,
        "ALPHA": ALPHA,
        "GAMMA": GAMMA,
        "TEMP_1": TEMP_1,
        "TEMP_2":TEMP_2,
        "BATCH_SIZE":BATCH_SIZE,
        "lr":1e-4,
        "DROPOUT":0.5,
        "LAMBDA":1,
        "SEED":SEED


    }
     # Create a trial-level run
    run_trial_level = neptune.init_run(capture_stdout=True,capture_stderr=True,capture_traceback=True, capture_hardware_metrics=True,
    name=study_name+str(study_id)+"_"+str(trial.number),tags=["hyper parameter tuning",(study_name+'_'+study_id),str(trial.number+1)])

    # Log sweep ID to trial-level run
    run_trial_level["sys/tags"].add("trial-level")
    run_trial_level["sweep-id"] = str(sweep_id)

    # Log parameters of a trial-level run
    run_trial_level["parameters"] = param

    for epoch in range(EPOCHS):
        average_epoch_train_loss,train_accuracy = train_model(model, train_dataloader,optimizer,criteon,device,TEMP_1,TEMP_2,LAMBDA,run_trial_level)
        average_epoch_dev_loss,dev_accuracy = evaluate_model(model, dev_dataloader,criteon, device)

        trial.report(dev_accuracy, epoch)

        # Log score of a trial-level Run
        run_trial_level["train/epoch/loss"].append(average_epoch_train_loss)
        run_trial_level["train/epoch/accuracy"].append(train_accuracy)
        run_trial_level["evaluation/epoch/loss"].append(average_epoch_dev_loss)
        run_trial_level["evaluation/epoch/accuracy"].append(dev_accuracy)
        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            run_trial_level.stop()
            raise optuna.exceptions.TrialPruned()
    run_trial_level["train/loss"]=average_epoch_train_loss
    run_trial_level["train/accuracy"]=train_accuracy
    run_trial_level["evaluation/loss"]=average_epoch_dev_loss
    run_trial_level["evaluation/accuracy"]=dev_accuracy
    torch.save(model.state_dict(),'model_'+str(trial.number)+'.pt')
    torch.save(criteon.state_dict(),'criteon_'+str(trial.number)+'.pt')
    run_trial_level["model_checkpoints/model"].upload('model_'+str(trial.number)+'.pt')
    run_trial_level["model_checkpoints/criteon"].upload('criteon_'+str(trial.number)+'.pt')
    # Stop trial-level Run
    run_trial_level.stop()
    print("Train_accuracy",train_accuracy)

    return dev_accuracy


In [None]:
neptune_callback = npt_utils.NeptuneCallback(run_study_level)

In [None]:
study = optuna.create_study(study_name=study_name,storage=storage,load_if_exists=True,direction='maximize')
study.optimize(objective, n_trials=50,callbacks=[neptune_callback])

trial = study.best_trial

print('Accuracy: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))

[I 2023-12-08 16:43:20,861] A new study created in RDB with name: DCL-Experiments_2


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-82


  0%|          | 0/88 [00:00<?, ?it/s]

  0%|          | 0/88 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 11 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 11 operations (0.00% done). Please wait.
All 11 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-82/metadata


[I 2023-12-08 16:46:32,537] Trial 0 finished with value: 0.7474949955940247 and parameters: {'epochs': 2, 'alpha': 0.37221336361926904, 'gamma': 1.1794391717927133, 'temp_1': 0.09768151231482006, 'temp_2': 0.09207712734460553, 'batch_size': 103}. Best is trial 0 with value: 0.7474949955940247.


Train_accuracy tensor(0.7418, device='cuda:0')


[W 2023-12-08 16:46:33,292] Param batch_size unique value length is less than 2.
[W 2023-12-08 16:46:33,294] Param epochs unique value length is less than 2.
[W 2023-12-08 16:46:33,296] Param gamma unique value length is less than 2.
[W 2023-12-08 16:46:33,298] Param temp_1 unique value length is less than 2.
[W 2023-12-08 16:46:33,300] Param temp_2 unique value length is less than 2.
[W 2023-12-08 16:46:33,302] Param alpha unique value length is less than 2.
[W 2023-12-08 16:46:33,304] Param epochs unique value length is less than 2.
[W 2023-12-08 16:46:33,305] Param gamma unique value length is less than 2.
[W 2023-12-08 16:46:33,306] Param temp_1 unique value length is less than 2.
[W 2023-12-08 16:46:33,307] Param temp_2 unique value length is less than 2.
[W 2023-12-08 16:46:33,309] Param alpha unique value length is less than 2.
[W 2023-12-08 16:46:33,310] Param batch_size unique value length is less than 2.
[W 2023-12-08 16:46:33,311] Param gamma unique value length is less than

https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-83


  0%|          | 0/47 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 19 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 7 operations (63.16% done). Please wait.
All 19 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-83/metadata


[I 2023-12-08 16:48:24,793] Trial 1 finished with value: 0.5831663608551025 and parameters: {'epochs': 1, 'alpha': 0.18153435722029784, 'gamma': 2.485969905048106, 'temp_1': 0.10272412188763846, 'temp_2': 0.04810944505467431, 'batch_size': 193}. Best is trial 0 with value: 0.7474949955940247.


Train_accuracy tensor(0.5439, device='cuda:0')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-84


  0%|          | 0/80 [00:00<?, ?it/s]

  0%|          | 0/80 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 11 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 11 operations (0.00% done). Please wait.
All 11 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-84/metadata


[I 2023-12-08 16:51:22,585] Trial 2 finished with value: 0.6963927745819092 and parameters: {'epochs': 2, 'alpha': 0.34361045131630286, 'gamma': 3.13296023969609, 'temp_1': 0.055596046964737626, 'temp_2': 0.09042824566053255, 'batch_size': 113}. Best is trial 0 with value: 0.7474949955940247.


Train_accuracy tensor(0.7346, device='cuda:0')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-85


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/89 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 15 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 15 operations (0.00% done). Please wait.
All 15 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-85/metadata


[I 2023-12-08 16:54:23,631] Trial 3 finished with value: 0.6843687295913696 and parameters: {'epochs': 2, 'alpha': 0.22000128387082227, 'gamma': 2.768524901253711, 'temp_1': 0.10313246999494222, 'temp_2': 0.016360221855119716, 'batch_size': 102}. Best is trial 0 with value: 0.7474949955940247.


Train_accuracy tensor(0.7142, device='cuda:0')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-86


  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 19 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 19 operations (0.00% done). Please wait.
All 19 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-86/metadata


[I 2023-12-08 16:57:18,755] Trial 4 finished with value: 0.6943888068199158 and parameters: {'epochs': 2, 'alpha': 0.4655211361721737, 'gamma': 2.9047376527200277, 'temp_1': 0.12699533048809514, 'temp_2': 0.05788938742397827, 'batch_size': 147}. Best is trial 0 with value: 0.7474949955940247.


Train_accuracy tensor(0.6969, device='cuda:0')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-87


  0%|          | 0/74 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 17 operations to synchronize with Neptune. Do not kill this process.
All 17 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-87/metadata


[I 2023-12-08 16:58:34,152] Trial 5 pruned. 
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-88


  0%|          | 0/42 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 3 operations (0.00% done). Please wait.
All 3 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-88/metadata


[I 2023-12-08 17:00:21,940] Trial 6 finished with value: 0.6633266806602478 and parameters: {'epochs': 1, 'alpha': 0.27613647843114825, 'gamma': 3.8712491057782166, 'temp_1': 0.09973344697993453, 'temp_2': 0.0953669367188125, 'batch_size': 214}. Best is trial 0 with value: 0.7474949955940247.


Train_accuracy tensor(0.5273, device='cuda:0')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-89


  0%|          | 0/78 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 9 operations to synchronize with Neptune. Do not kill this process.
All 9 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-89/metadata


[I 2023-12-08 17:01:38,334] Trial 7 pruned. 
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-90


  0%|          | 0/45 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 5 operations to synchronize with Neptune. Do not kill this process.
All 5 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-90/metadata


[I 2023-12-08 17:02:50,570] Trial 8 pruned. 
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-91


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 19 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 19 operations (0.00% done). Please wait.
All 19 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-91/metadata


[I 2023-12-08 17:05:48,852] Trial 9 finished with value: 0.7104208469390869 and parameters: {'epochs': 2, 'alpha': 0.3624714606318509, 'gamma': 4.371774529368401, 'temp_1': 0.11880977255667903, 'temp_2': 0.03271056045354828, 'batch_size': 191}. Best is trial 0 with value: 0.7474949955940247.


Train_accuracy tensor(0.7504, device='cuda:0')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-92


  0%|          | 0/59 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 15 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 15 operations (0.00% done). Please wait.
All 15 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-92/metadata


[I 2023-12-08 17:07:40,167] Trial 10 finished with value: 0.702404797077179 and parameters: {'epochs': 1, 'alpha': 0.48625134006618953, 'gamma': 1.0607396740995028, 'temp_1': 0.08006750228302734, 'temp_2': 0.07888727442844483, 'batch_size': 154}. Best is trial 0 with value: 0.7474949955940247.


Train_accuracy tensor(0.5925, device='cuda:0')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-93


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 5 operations to synchronize with Neptune. Do not kill this process.
All 5 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-93/metadata


[I 2023-12-08 17:10:02,373] Trial 11 pruned. 
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-94


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 11 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 11 operations (0.00% done). Please wait.
All 11 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-94/metadata


[I 2023-12-08 17:13:02,013] Trial 12 finished with value: 0.7124248743057251 and parameters: {'epochs': 2, 'alpha': 0.10472727696045758, 'gamma': 3.747158007278, 'temp_1': 0.08756959445793336, 'temp_2': 0.03679293211652779, 'batch_size': 173}. Best is trial 0 with value: 0.7474949955940247.


Train_accuracy tensor(0.7384, device='cuda:0')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-95


  0%|          | 0/54 [00:00<?, ?it/s]

  0%|          | 0/54 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 5 operations to synchronize with Neptune. Do not kill this process.
All 5 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-95/metadata


[I 2023-12-08 17:15:21,135] Trial 13 pruned. 
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-96


  0%|          | 0/67 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 5 operations to synchronize with Neptune. Do not kill this process.
All 5 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-96/metadata


[I 2023-12-08 17:16:38,140] Trial 14 pruned. 
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-97


  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/55 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 11 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 11 operations (0.00% done). Please wait.
All 11 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-97/metadata


[I 2023-12-08 17:19:39,005] Trial 15 finished with value: 0.7304609417915344 and parameters: {'epochs': 2, 'alpha': 0.3022072911122412, 'gamma': 1.0393562864085195, 'temp_1': 0.06578692927449342, 'temp_2': 0.08171205847692756, 'batch_size': 164}. Best is trial 0 with value: 0.7474949955940247.


Train_accuracy tensor(0.7091, device='cuda:0')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-98


  0%|          | 0/68 [00:00<?, ?it/s]

  0%|          | 0/68 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 5 operations to synchronize with Neptune. Do not kill this process.
All 5 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-98/metadata


[I 2023-12-08 17:22:09,650] Trial 16 pruned. 
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-99


  0%|          | 0/56 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 13 operations to synchronize with Neptune. Do not kill this process.
All 13 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-99/metadata


[I 2023-12-08 17:23:23,257] Trial 17 pruned. 
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-100


  0%|          | 0/65 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 5 operations to synchronize with Neptune. Do not kill this process.
All 5 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-100/metadata


[I 2023-12-08 17:24:43,070] Trial 18 pruned. 
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-101


  0%|          | 0/90 [00:00<?, ?it/s]

  0%|          | 0/90 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 15 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 15 operations (0.00% done). Please wait.
All 15 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-101/metadata


[I 2023-12-08 17:27:43,105] Trial 19 finished with value: 0.7324649095535278 and parameters: {'epochs': 2, 'alpha': 0.3931645624376177, 'gamma': 1.3423382611658172, 'temp_1': 0.06644975279345222, 'temp_2': 0.09998504434735553, 'batch_size': 100}. Best is trial 0 with value: 0.7474949955940247.


Train_accuracy tensor(0.7458, device='cuda:0')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-102


  0%|          | 0/90 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 15 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 15 operations (0.00% done). Please wait.
All 15 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-102/metadata


[I 2023-12-08 17:29:33,970] Trial 20 finished with value: 0.7154308557510376 and parameters: {'epochs': 1, 'alpha': 0.4116665245947976, 'gamma': 1.4301198701809472, 'temp_1': 0.09519332382537252, 'temp_2': 0.09697484753739571, 'batch_size': 100}. Best is trial 0 with value: 0.7474949955940247.


Train_accuracy tensor(0.6790, device='cuda:0')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-103


  0%|          | 0/72 [00:00<?, ?it/s]

  0%|          | 0/72 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 15 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 15 operations (0.00% done). Please wait.
All 15 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-103/metadata


[I 2023-12-08 17:32:33,917] Trial 21 finished with value: 0.7394789457321167 and parameters: {'epochs': 2, 'alpha': 0.39694430390996177, 'gamma': 1.0026427968609208, 'temp_1': 0.06453538986710373, 'temp_2': 0.08708157833197905, 'batch_size': 125}. Best is trial 0 with value: 0.7474949955940247.


Train_accuracy tensor(0.7311, device='cuda:0')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-104


  0%|          | 0/72 [00:00<?, ?it/s]

  0%|          | 0/72 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 14 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 14 operations (0.00% done). Please wait.
All 14 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-104/metadata


[I 2023-12-08 17:35:35,484] Trial 22 finished with value: 0.7124248743057251 and parameters: {'epochs': 2, 'alpha': 0.40236795442766154, 'gamma': 1.77420545331134, 'temp_1': 0.05765736799496394, 'temp_2': 0.09001373268329492, 'batch_size': 125}. Best is trial 0 with value: 0.7474949955940247.


Train_accuracy tensor(0.7725, device='cuda:0')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-105


  0%|          | 0/84 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 9 operations to synchronize with Neptune. Do not kill this process.
All 9 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-105/metadata


[I 2023-12-08 17:36:52,093] Trial 23 pruned. 
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-106


  0%|          | 0/73 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 9 operations to synchronize with Neptune. Do not kill this process.
All 9 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-106/metadata


[I 2023-12-08 17:38:08,652] Trial 24 pruned. 
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-107


  0%|          | 0/84 [00:00<?, ?it/s]

  0%|          | 0/84 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 18 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 18 operations (0.00% done). Please wait.
All 18 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-107/metadata


[I 2023-12-08 17:41:11,411] Trial 25 finished with value: 0.757515013217926 and parameters: {'epochs': 2, 'alpha': 0.4429593504449712, 'gamma': 1.8775783926917704, 'temp_1': 0.05137844981376246, 'temp_2': 0.07135548353185929, 'batch_size': 108}. Best is trial 25 with value: 0.757515013217926.


Train_accuracy tensor(0.7419, device='cuda:0')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-108


  0%|          | 0/81 [00:00<?, ?it/s]

  0%|          | 0/81 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 3 operations (0.00% done). Please wait.
All 3 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-108/metadata


[I 2023-12-08 17:44:09,883] Trial 26 finished with value: 0.7154308557510376 and parameters: {'epochs': 2, 'alpha': 0.4350664917513217, 'gamma': 1.960271385795929, 'temp_1': 0.05010817244720112, 'temp_2': 0.07416220656180463, 'batch_size': 112}. Best is trial 25 with value: 0.757515013217926.


Train_accuracy tensor(0.7198, device='cuda:0')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-109


  0%|          | 0/70 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 13 operations to synchronize with Neptune. Do not kill this process.
All 13 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-109/metadata


[I 2023-12-08 17:45:23,718] Trial 27 pruned. 
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-110


  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 11 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 11 operations (0.00% done). Please wait.
All 11 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-110/metadata


[I 2023-12-08 17:48:21,046] Trial 28 finished with value: 0.7505009770393372 and parameters: {'epochs': 2, 'alpha': 0.4978060855680688, 'gamma': 2.397003362767625, 'temp_1': 0.07278359556745029, 'temp_2': 0.07904764168441412, 'batch_size': 147}. Best is trial 25 with value: 0.757515013217926.


Train_accuracy tensor(0.7693, device='cuda:0')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-111


  0%|          | 0/77 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 15 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 15 operations (0.00% done). Please wait.
All 15 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-111/metadata


[I 2023-12-08 17:50:13,815] Trial 29 finished with value: 0.7334669232368469 and parameters: {'epochs': 1, 'alpha': 0.4899791032341932, 'gamma': 2.3727060659535626, 'temp_1': 0.07507920215738678, 'temp_2': 0.06496630399883344, 'batch_size': 117}. Best is trial 25 with value: 0.757515013217926.


Train_accuracy tensor(0.5970, device='cuda:0')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-112


  0%|          | 0/61 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 13 operations to synchronize with Neptune. Do not kill this process.
All 13 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-112/metadata


[I 2023-12-08 17:51:27,638] Trial 30 pruned. 
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-113


  0%|          | 0/84 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 5 operations to synchronize with Neptune. Do not kill this process.
All 5 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-113/metadata


[I 2023-12-08 17:52:43,166] Trial 31 pruned. 
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-114


  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 3 operations (0.00% done). Please wait.
All 3 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-114/metadata


[I 2023-12-08 17:55:42,830] Trial 32 finished with value: 0.7404809594154358 and parameters: {'epochs': 2, 'alpha': 0.47015097069579054, 'gamma': 2.6292945559982153, 'temp_1': 0.070814210448653, 'temp_2': 0.08520234253729725, 'batch_size': 145}. Best is trial 25 with value: 0.757515013217926.


Train_accuracy tensor(0.7413, device='cuda:0')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-115


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/63 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 11 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 11 operations (0.00% done). Please wait.
All 11 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-115/metadata


[I 2023-12-08 17:58:42,331] Trial 33 finished with value: 0.7294589281082153 and parameters: {'epochs': 2, 'alpha': 0.4655543900926524, 'gamma': 2.63070990204062, 'temp_1': 0.07148330447003902, 'temp_2': 0.07760022583577794, 'batch_size': 143}. Best is trial 25 with value: 0.757515013217926.


Train_accuracy tensor(0.7402, device='cuda:0')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-116


  0%|          | 0/60 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 5 operations to synchronize with Neptune. Do not kill this process.
All 5 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-116/metadata


[I 2023-12-08 17:59:58,934] Trial 34 pruned. 
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-117


  0%|          | 0/69 [00:00<?, ?it/s]

  0%|          | 0/69 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 11 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 11 operations (0.00% done). Please wait.
All 11 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-117/metadata


[I 2023-12-08 18:03:08,152] Trial 35 finished with value: 0.7324649095535278 and parameters: {'epochs': 2, 'alpha': 0.47365528033939736, 'gamma': 2.2874912158174587, 'temp_1': 0.08095897508273582, 'temp_2': 0.09221450650402625, 'batch_size': 131}. Best is trial 25 with value: 0.757515013217926.


Train_accuracy tensor(0.6428, device='cuda:0')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-118


  0%|          | 0/58 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 9 operations to synchronize with Neptune. Do not kill this process.
All 9 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-118/metadata


[I 2023-12-08 18:04:24,633] Trial 36 pruned. 
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-119


  0%|          | 0/84 [00:00<?, ?it/s]

  0%|          | 0/84 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 14 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 14 operations (0.00% done). Please wait.
All 14 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-119/metadata


[I 2023-12-08 18:07:25,438] Trial 37 finished with value: 0.7174348831176758 and parameters: {'epochs': 2, 'alpha': 0.48238088812266744, 'gamma': 2.9756334406388407, 'temp_1': 0.054407296192426055, 'temp_2': 0.06842414982524853, 'batch_size': 108}. Best is trial 25 with value: 0.757515013217926.


Train_accuracy tensor(0.7667, device='cuda:0')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-120


  0%|          | 0/50 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 9 operations to synchronize with Neptune. Do not kill this process.
All 9 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-120/metadata


[I 2023-12-08 18:08:39,841] Trial 38 pruned. 
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-121


  0%|          | 0/79 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 9 operations to synchronize with Neptune. Do not kill this process.
All 9 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-121/metadata


[I 2023-12-08 18:09:59,208] Trial 39 pruned. 
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-122


  0%|          | 0/42 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 9 operations to synchronize with Neptune. Do not kill this process.
All 9 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-122/metadata


[I 2023-12-08 18:12:23,005] Trial 40 pruned. 
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-123


  0%|          | 0/75 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 17 operations to synchronize with Neptune. Do not kill this process.
All 17 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-123/metadata


[I 2023-12-08 18:13:36,611] Trial 41 pruned. 
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-124


  0%|          | 0/65 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 17 operations to synchronize with Neptune. Do not kill this process.
All 17 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-124/metadata


[I 2023-12-08 18:14:52,116] Trial 42 pruned. 
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-125


  0%|          | 0/70 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 9 operations to synchronize with Neptune. Do not kill this process.
All 9 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-125/metadata


[I 2023-12-08 18:16:04,899] Trial 43 pruned. 
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-126


  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 15 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 15 operations (0.00% done). Please wait.
All 15 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-126/metadata


[I 2023-12-08 18:19:05,523] Trial 44 finished with value: 0.7384769320487976 and parameters: {'epochs': 2, 'alpha': 0.4480101131521158, 'gamma': 1.2569745268887946, 'temp_1': 0.0695754725122979, 'temp_2': 0.09136569795995267, 'batch_size': 146}. Best is trial 25 with value: 0.757515013217926.


Train_accuracy tensor(0.7361, device='cuda:0')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-127


  0%|          | 0/84 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 9 operations to synchronize with Neptune. Do not kill this process.
All 9 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-127/metadata


[I 2023-12-08 18:20:22,549] Trial 45 pruned. 
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-128


  0%|          | 0/76 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 11 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 11 operations (0.00% done). Please wait.
All 11 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-128/metadata


[I 2023-12-08 18:23:19,453] Trial 46 finished with value: 0.7394789457321167 and parameters: {'epochs': 2, 'alpha': 0.4755936681874527, 'gamma': 2.0363349647349382, 'temp_1': 0.05533785149173878, 'temp_2': 0.08545125786317909, 'batch_size': 119}. Best is trial 25 with value: 0.757515013217926.


Train_accuracy tensor(0.7614, device='cuda:0')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-129


  0%|          | 0/85 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 17 operations to synchronize with Neptune. Do not kill this process.
All 17 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-129/metadata


[I 2023-12-08 18:24:38,169] Trial 47 pruned. 
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-130


  0%|          | 0/80 [00:00<?, ?it/s]

  0%|          | 0/80 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 3 operations (0.00% done). Please wait.
All 3 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-130/metadata


[I 2023-12-08 18:27:34,592] Trial 48 finished with value: 0.7474949955940247 and parameters: {'epochs': 2, 'alpha': 0.3984229430148454, 'gamma': 1.0991810675615639, 'temp_1': 0.07555356257043547, 'temp_2': 0.08053641483334309, 'batch_size': 113}. Best is trial 25 with value: 0.757515013217926.


Train_accuracy tensor(0.7410, device='cuda:0')


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-131


  0%|          | 0/45 [00:00<?, ?it/s]

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 9 operations to synchronize with Neptune. Do not kill this process.
All 9 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-131/metadata


[I 2023-12-08 18:28:47,730] Trial 49 pruned. 


Accuracy: 0.757515013217926
Best hyperparameters: {'epochs': 2, 'alpha': 0.4429593504449712, 'gamma': 1.8775783926917704, 'temp_1': 0.05137844981376246, 'temp_2': 0.07135548353185929, 'batch_size': 108}


In [None]:
run_study_level.stop()

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 13 operations to synchronize with Neptune. Do not kill this process.
All 13 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/FYP-DCL/DCL-Framework/e/DCLFRAM-81/metadata


In [None]:
trial 25
Accuracy: 0.757515013217926
Best hyperparameters: {'epochs': 2, 'alpha': 0.4429593504449712, 'gamma': 1.8775783926917704, 'temp_1': 0.05137844981376246, 'temp_2': 0.07135548353185929, 'batch_size': 108}