<a href="https://colab.research.google.com/github/krishan30/AiLERT-FYP/blob/main/DCL_Framework_with_huggingface_hyper_parameter_tuning_base_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Importing Libraries

In [None]:
!pip install transformers
!pip install evaluate
!pip install accelerate -U
!pip install torchmetrics
!pip install optuna_dashboard
!pip install optuna

Collecting evaluate
  Downloading evaluate-0.4.1-py3-none-any.whl (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m999.7 kB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets>=2.0.0 (from evaluate)
  Downloading datasets-2.15.0-py3-none-any.whl (521 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.2/521.2 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
Collecting dill (from evaluate)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from evaluate)
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m19.2 MB/s[0m eta [36m0:00:00[0m
Collecting responses<0.19 (from evaluate)
  Downloading responses-0.18.0-py3-none-any.whl (38 kB)
Collecting pyarrow-hotfix (from datasets>=2.0.0

In [None]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader,WeightedRandomSampler
from transformers import AutoModel,AutoTokenizer,AdamW,get_scheduler
from tqdm import tqdm
import evaluate
from datasets import load_dataset
from tqdm.auto import tqdm
import numpy as np
import random
from torchmetrics.classification import BinaryAccuracy
import optuna

## Setting up the GPU or CPU

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
device = torch.device("cuda:0"  if torch.cuda.is_available() else "cpu")

## Hyperparameter Values

In [None]:
#@title #Hyperparameter Values
BATCH_SIZE = 144 # @param {type:"integer"}
EPOCHS = 2 # @param {type:"integer"}
DROPOUT =0.5# @param {type:"number"}
ALPHA = 0.4601079528693497 # @param {type:"number"}
GAMMA = 4.932651675660353 # @param {type:"number"}
TEMP_1 = 0.14630528484684996 # @param {type:"number"}
TEMP_2 =  0.07242998464641397# @param {type:"number"}
LAMBDA = 1
SEED = 1234 # @param {type:"integer"}


In [None]:
{'epochs': 2, 'alpha': 0.4601079528693497, 'gamma': 4.932651675660353, 'temp_1': 0.14630528484684996, 'temp_2': 0.07242998464641397, 'batch_size': 144}

## Setting Random Seed for Reproducibility

In [None]:
def setup_seed(seed:int):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    #torch.random.manual_seed(seed)
    torch.backends.cudnn.deterministic = True



In [None]:
setup_seed(SEED)

In [None]:
# @title Bert Model
bert_model_name = "bert-base-cased" # @param {type:"string"}

## Loading Train,Dev,Test Datasets

In [None]:
dataset = load_dataset("krishan-CSE/HatEval_New")

Downloading readme:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/1.09M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/139k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/385k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

## Loading the Tokernizer for the Transformer Model

In [None]:
tokenizer = AutoTokenizer.from_pretrained(bert_model_name)

tokenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

In [None]:
def tokenize_function(examples):
    return tokenizer.batch_encode_plus(examples["text"], padding='max_length',max_length=45,add_special_tokens=True,truncation=True)

## Tokenize the Dataset

In [None]:
dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'labels'],
        num_rows: 8982
    })
    validation: Dataset({
        features: ['text', 'labels'],
        num_rows: 998
    })
    test: Dataset({
        features: ['text', 'labels'],
        num_rows: 3000
    })
})

In [None]:
tokenized_datasets = dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/8982 [00:00<?, ? examples/s]

Map:   0%|          | 0/998 [00:00<?, ? examples/s]

Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

In [None]:
tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['text', 'labels', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 8982
    })
    validation: Dataset({
        features: ['text', 'labels', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 998
    })
    test: Dataset({
        features: ['text', 'labels', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 3000
    })
})

## Remove Unwanted Coloumns

In [None]:
tokenized_datasets=tokenized_datasets.remove_columns(['text'])

## Format the coloumns

In [None]:
tokenized_datasets=tokenized_datasets.with_format("torch")

In [None]:
tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 8982
    })
    validation: Dataset({
        features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 998
    })
    test: Dataset({
        features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 3000
    })
})

## Creating DataLoaders for Train,Dev,Test Datasets

In [None]:
train_dataloader=DataLoader(tokenized_datasets["train"], batch_size=BATCH_SIZE ,shuffle=True)

In [None]:
dev_dataloader=DataLoader(tokenized_datasets["validation"], batch_size=BATCH_SIZE , shuffle=True)

In [None]:
test_dataloader=DataLoader(tokenized_datasets["test"], batch_size=BATCH_SIZE , shuffle=True)

In [None]:
for batch in train_dataloader:
    break
batch.items()

dict_items([('labels', tensor([1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0,
        1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0,
        0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
        0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0,
        0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1,
        0, 1, 0, 0, 1, 1, 0, 1])), ('input_ids', tensor([[  101, 12118,  4386,  ...,     0,     0,     0],
        [  101,   144, 17643,  ...,     0,     0,     0],
        [  101,  1422,  4906,  ...,     0,     0,     0],
        ...,
        [  101,  2009,  5718,  ...,     0,     0,     0],
        [  101,  2066,  2676,  ...,     0,     0,     0],
        [  101,  1706,  4392,  ...,     0,     0,     0]])), ('token_type_ids', tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 

## Define the Bert Architecture



In [None]:
class DCLArchitecture(nn.Module):
    def __init__(self,dropout:float,bert_model_name:str='bert-base-cased'):
        super(DCLArchitecture, self).__init__()
        #model_class, pretrained_weights = (AutoModel, bert_model_name)
        self.bert = AutoModel.from_pretrained(bert_model_name)
        self.dim = 768
        self.dense = nn.Linear(self.dim, 1)
        self.dropout = nn.Dropout(dropout)
        #self.dropout_2 = nn.Dropout(0.7)

    def forward(self,batch_tokenized, if_train=False):
        input_ids = batch_tokenized['input_ids']
        attention_mask = batch_tokenized['attention_mask']
        bert_output = self.bert(input_ids, attention_mask=attention_mask, output_hidden_states=True)
        #bert_cls_hidden_state = bert_output[0][:,0,:]
        bert_cls_hidden_state = bert_output[1]
        torch.cuda.empty_cache()
        # hidden_states = bert_output[2][-1]
        # bert_cls_hidden_state = torch.mean(hidden_states, 1)

        if if_train:
            bert_cls_hidden_state_aug = self.dropout(bert_cls_hidden_state)
            bert_cls_hidden_state = torch.cat((bert_cls_hidden_state, bert_cls_hidden_state_aug), dim=1).reshape(-1, self.dim)
        else:
            bert_cls_hidden_state = self.dropout(bert_cls_hidden_state)

        linear_output = self.dense(bert_cls_hidden_state)
        linear_output = linear_output.squeeze(1)

        return bert_cls_hidden_state, linear_output

## Define Focal Loss

In [None]:
class FocalLoss(nn.Module):
    def __init__(self, alpha:float=0.4, gamma:float=2, size_average:bool=True):
        super(FocalLoss, self).__init__()
        self.alpha = torch.tensor(alpha)
        self.gamma = gamma
        self.size_average = size_average

    def forward(self, pred, target):

        device = target.device
        self.alpha = self.alpha.to(device)

        pred = nn.Sigmoid()(pred)
        pred = pred.view(-1, 1)
        target = target.view(-1, 1)
        pred = torch.cat((1-pred, pred), dim=1)

        class_mask = torch.zeros(pred.shape[0], pred.shape[1]).to(device)
        class_mask.scatter_(1, target.view(-1, 1).long(), 1.)
        probs = (pred * class_mask).sum(dim=1).view(-1, 1)
        probs = probs.clamp(min=0.0001, max=1.0)

        log_p = probs.log()
        alpha = torch.ones(pred.shape[0], pred.shape[1]).to(device)
        alpha[:, 0] = alpha[:, 0] * (1 - self.alpha)
        alpha[:, 1] = alpha[:, 1] * self.alpha
        alpha = (alpha * class_mask).sum(dim=1).view(-1, 1)

        batch_loss = -alpha * (torch.pow((1 - probs), self.gamma)) * log_p

        if self.size_average:
            loss = batch_loss.mean()
        else:
            loss = batch_loss.sum()

        return loss

## Define Unsupervised Contrastive loss

In [None]:
def simcse_loss(batch_emb,temp_1:float):
    batch_size = batch_emb.size(0)
    y_true = torch.cat([torch.arange(1, batch_size, step=2, dtype=torch.long).unsqueeze(1),
                        torch.arange(0, batch_size, step=2, dtype=torch.long).unsqueeze(1)],
                       dim=1).reshape([batch_size,]).to(device)
    norm_emb = F.normalize(batch_emb, dim=1, p=2)
    sim_score = torch.matmul(norm_emb, norm_emb.transpose(0,1))
    sim_score = sim_score - (torch.eye(batch_size) * 1e12).to(device)
    sim_score = sim_score / temp_1
    loss_func = nn.CrossEntropyLoss()
    loss = loss_func(sim_score, y_true)

    return loss


## Define Supervised Contrastive loss

In [None]:
def sup_simcse_loss(batch_emb, label,temp_2:float):
    n = batch_emb.size(0)

    similarity_matrix = F.cosine_similarity(batch_emb.unsqueeze(1), batch_emb.unsqueeze(0), dim=2)
    mask = torch.ones_like(similarity_matrix) * (label.expand(n, n).eq(label.expand(n, n).t()))

    mask_no_sim = torch.ones_like(mask) - mask
    mask_dui_jiao_0 = ((torch.ones(n,n) - torch.eye(n,n)) * 1e12).to(device)
    similarity_matrix = torch.exp(similarity_matrix/temp_2)
    similarity_matrix = similarity_matrix * mask_dui_jiao_0

    sim = mask*similarity_matrix
    no_sim = similarity_matrix - sim
    no_sim_sum = torch.sum(no_sim , dim=1)
    no_sim_sum_expend = no_sim_sum.repeat(n, 1).T

    sim_sum  = sim + no_sim_sum_expend
    loss = torch.div(sim , sim_sum)
    loss = mask_no_sim + loss + (torch.eye(n, n)/1e12).to(device)
    loss = -torch.log(loss)
    loss = torch.sum(torch.sum(loss, dim=1))/(2*n)
    return loss

## Fine Tuning the Model

## Format the Labels

In [None]:
def copy_label(labels):
    labels = labels.unsqueeze(1)
    labels = torch.cat((labels, labels), dim=1).reshape(-1, 1).squeeze(1)
    return labels

### Train the Model

In [None]:
def train_model(model, train_dataloader,optimizer,criteon,device,temp_1:float,temp_2:float,lamda:float):
    accuracy_metric = BinaryAccuracy()
    accuracy_metric.to(device)
    progress_bar = tqdm(range(len(train_dataloader)))
    model.train()
    total_train_loss = 0.0
    for batch in train_dataloader:
        batch = {k: v.to(device) for k, v in batch.items()}
        labels = copy_label(batch["labels"])
        emb, pred = model(batch, if_train=True)
        loss = criteon(pred, labels.float())
        loss_sim = simcse_loss(emb,temp_1=TEMP_1)
        loss_supsim = sup_simcse_loss(emb, labels,temp_2=TEMP_2)
        total_train_loss += loss.item()
        pred = torch.sigmoid(pred)
        accuracy_metric(pred,labels)

        loss = loss + lamda*(loss_sim + loss_supsim)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        progress_bar.update(1)
    average_epoch_train_loss = total_train_loss / len(train_dataloader)  # Compute average epoch loss
    train_accuracy = accuracy_metric.compute()
    return average_epoch_train_loss,train_accuracy


### Evaluate the Model

In [None]:
def evaluate_model(model, dev_dataloader,criteon, device):
    accuracy_metric = BinaryAccuracy()
    accuracy_metric.to(device)
    model.eval()

    total_dev_loss = 0.0
    for batch in dev_dataloader:
        batch = {k: v.to(device) for k, v in batch.items()}
        labels = batch["labels"]
        with torch.no_grad():
            emb, pred = model(batch , False)
            loss = criteon(pred, labels.float())
        total_dev_loss += loss.item()
        pred = torch.sigmoid(pred)
        accuracy_metric(pred,labels)

    average_epoch_dev_loss = total_dev_loss / len(dev_dataloader)  # Compute average epoch loss
    dev_accuracy = accuracy_metric.compute()
    return average_epoch_dev_loss,dev_accuracy

### Train & Evaluate Model

## Hyperparameter Tuning

In [None]:
storage = optuna.storages.RDBStorage("sqlite:////content/drive/MyDrive/FYP/Hyper_Parameter_Tuning/optimization_study.db")

In [None]:
def define_model(bert_model_name,trial):
    DROPOUT = 0.5
    model = DCLArchitecture(bert_model_name=bert_model_name,dropout=DROPOUT)
    model.to(device)
    return model

In [None]:
def tokenized_batch_data(trial,tokenized_datasets):
    BATCH_SIZE = trial.suggest_int("batch_size",100,220)
    train_dataloader=DataLoader(tokenized_datasets["train"], batch_size=BATCH_SIZE , shuffle=True)
    dev_dataloader=DataLoader(tokenized_datasets["validation"], batch_size=BATCH_SIZE , shuffle=True)
    return train_dataloader,dev_dataloader


In [None]:
checkpoint_path="/content/drive/MyDrive/FYP/Hyper_Parameter_Tuning/"

In [None]:
def objective(trial):
    # Define the hyperparameters to optimize

    EPOCHS = trial.suggest_int("epochs", 1,2)
    ALPHA= trial.suggest_float("alpha", 0.1, 0.5)
    GAMMA = trial.suggest_float("gamma", 1.0, 5)
    TEMP_1 = trial.suggest_float("temp_1", 0.05, 0.15)
    TEMP_2 = trial.suggest_float("temp_2", 0.01,0.1)
    train_dataloader,dev_dataloader =tokenized_batch_data(trial,tokenized_datasets)
    model = define_model(bert_model_name,trial)
    optimizer = torch.optim.AdamW(model.parameters(),lr = 1e-4)
    criteon = FocalLoss(ALPHA, GAMMA)


    for epoch in range(EPOCHS):
        average_epoch_train_loss,train_accuracy = train_model(model, train_dataloader,optimizer,criteon,device,TEMP_1,TEMP_2,LAMBDA)
        average_epoch_dev_loss,dev_accuracy = evaluate_model(model, dev_dataloader,criteon, device)

        trial.report(dev_accuracy, epoch)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    torch.save(model.state_dict(), checkpoint_path+'model_'+str(trial.number)+'.pt')
    torch.save(criteon.state_dict(),checkpoint_path+'criteon_'+str(trial.number)+'.pt')


    return dev_accuracy


In [None]:
study = optuna.create_study(study_name='DCL-Experiments_1',storage=storage,load_if_exists=True,direction='maximize')
study.optimize(objective, n_trials=50)

trial = study.best_trial

print('Accuracy: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))

[I 2023-12-07 05:36:37,480] A new study created in RDB with name: DCL-Experiments_1


  0%|          | 0/79 [00:00<?, ?it/s]

[I 2023-12-07 05:37:52,159] Trial 0 finished with value: 0.6853707432746887 and parameters: {'epochs': 1, 'alpha': 0.39017422514975963, 'gamma': 4.258022814994655, 'temp_1': 0.09051337258511409, 'temp_2': 0.09840686596798195, 'batch_size': 114}. Best is trial 0 with value: 0.6853707432746887.


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/63 [00:00<?, ?it/s]

[I 2023-12-07 05:40:11,650] Trial 1 finished with value: 0.7805611491203308 and parameters: {'epochs': 2, 'alpha': 0.4601079528693497, 'gamma': 4.932651675660353, 'temp_1': 0.14630528484684996, 'temp_2': 0.07242998464641397, 'batch_size': 144}. Best is trial 1 with value: 0.7805611491203308.


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/46 [00:00<?, ?it/s]

[I 2023-12-07 05:42:28,105] Trial 2 finished with value: 0.7384769320487976 and parameters: {'epochs': 2, 'alpha': 0.35871506417406784, 'gamma': 2.5915739315919146, 'temp_1': 0.08400466583118096, 'temp_2': 0.07422316440769548, 'batch_size': 196}. Best is trial 1 with value: 0.7805611491203308.


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

[I 2023-12-07 05:44:47,812] Trial 3 finished with value: 0.7104208469390869 and parameters: {'epochs': 2, 'alpha': 0.49178521356787297, 'gamma': 2.990213619416572, 'temp_1': 0.14208213647666185, 'temp_2': 0.09912994625757302, 'batch_size': 156}. Best is trial 1 with value: 0.7805611491203308.


  0%|          | 0/75 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

[I 2023-12-07 05:47:06,293] Trial 4 finished with value: 0.6743487119674683 and parameters: {'epochs': 2, 'alpha': 0.13212789713007653, 'gamma': 2.850956868834612, 'temp_1': 0.055961720244502314, 'temp_2': 0.09754713278485426, 'batch_size': 120}. Best is trial 1 with value: 0.7805611491203308.


  0%|          | 0/43 [00:00<?, ?it/s]

[I 2023-12-07 05:48:14,993] Trial 5 pruned. 


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/89 [00:00<?, ?it/s]

[I 2023-12-07 05:50:31,362] Trial 6 pruned. 


  0%|          | 0/63 [00:00<?, ?it/s]

[I 2023-12-07 05:51:40,799] Trial 7 pruned. 


  0%|          | 0/41 [00:00<?, ?it/s]

[I 2023-12-07 05:52:50,312] Trial 8 pruned. 


  0%|          | 0/42 [00:00<?, ?it/s]

[I 2023-12-07 05:53:58,227] Trial 9 pruned. 


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2023-12-07 05:55:08,435] Trial 10 finished with value: 0.6843687295913696 and parameters: {'epochs': 1, 'alpha': 0.4213406001523291, 'gamma': 4.789131301334573, 'temp_1': 0.1478449334636924, 'temp_2': 0.05488159706448267, 'batch_size': 183}. Best is trial 1 with value: 0.7805611491203308.


  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/49 [00:00<?, ?it/s]

[I 2023-12-07 05:57:24,968] Trial 11 finished with value: 0.7444889545440674 and parameters: {'epochs': 2, 'alpha': 0.3895805292871935, 'gamma': 1.6854186960200903, 'temp_1': 0.11812180736379926, 'temp_2': 0.07016579358669744, 'batch_size': 184}. Best is trial 1 with value: 0.7805611491203308.


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

[I 2023-12-07 05:59:41,064] Trial 12 finished with value: 0.757515013217926 and parameters: {'epochs': 2, 'alpha': 0.43323547157203074, 'gamma': 1.4330357652406023, 'temp_1': 0.12693586500551354, 'temp_2': 0.0627699574125589, 'batch_size': 175}. Best is trial 1 with value: 0.7805611491203308.


  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

[I 2023-12-07 06:01:58,136] Trial 13 pruned. 


  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/55 [00:00<?, ?it/s]

[I 2023-12-07 06:04:10,518] Trial 14 pruned. 


  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/55 [00:00<?, ?it/s]

[I 2023-12-07 06:06:25,388] Trial 15 finished with value: 0.7545090317726135 and parameters: {'epochs': 2, 'alpha': 0.43433526463677374, 'gamma': 2.0128705219530323, 'temp_1': 0.11453962660239411, 'temp_2': 0.042373714010185616, 'batch_size': 165}. Best is trial 1 with value: 0.7805611491203308.


  0%|          | 0/68 [00:00<?, ?it/s]

[I 2023-12-07 06:07:36,438] Trial 16 pruned. 


  0%|          | 0/51 [00:00<?, ?it/s]

[I 2023-12-07 06:08:44,445] Trial 17 pruned. 


  0%|          | 0/59 [00:00<?, ?it/s]

[I 2023-12-07 06:09:54,103] Trial 18 pruned. 


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/46 [00:00<?, ?it/s]

[I 2023-12-07 06:12:08,882] Trial 19 pruned. 


  0%|          | 0/69 [00:00<?, ?it/s]

[I 2023-12-07 06:13:20,753] Trial 20 pruned. 


  0%|          | 0/54 [00:00<?, ?it/s]

  0%|          | 0/54 [00:00<?, ?it/s]

[I 2023-12-07 06:15:34,444] Trial 21 pruned. 


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

[I 2023-12-07 06:17:48,255] Trial 22 pruned. 


  0%|          | 0/61 [00:00<?, ?it/s]

[I 2023-12-07 06:18:56,033] Trial 23 pruned. 


  0%|          | 0/56 [00:00<?, ?it/s]

[I 2023-12-07 06:20:04,180] Trial 24 pruned. 


  0%|          | 0/47 [00:00<?, ?it/s]

[I 2023-12-07 06:21:13,029] Trial 25 pruned. 


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2023-12-07 06:22:21,107] Trial 26 pruned. 


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

[I 2023-12-07 06:24:35,701] Trial 27 pruned. 


  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

[I 2023-12-07 06:26:56,198] Trial 28 pruned. 


  0%|          | 0/75 [00:00<?, ?it/s]

[I 2023-12-07 06:28:06,599] Trial 29 finished with value: 0.7364729642868042 and parameters: {'epochs': 1, 'alpha': 0.39640049148166917, 'gamma': 1.8518089783273775, 'temp_1': 0.13132324265168707, 'temp_2': 0.0739868600841203, 'batch_size': 121}. Best is trial 1 with value: 0.7805611491203308.


  0%|          | 0/65 [00:00<?, ?it/s]

[I 2023-12-07 06:29:15,654] Trial 30 pruned. 


  0%|          | 0/48 [00:00<?, ?it/s]

[I 2023-12-07 06:30:26,760] Trial 31 pruned. 


  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

[I 2023-12-07 06:32:46,762] Trial 32 pruned. 


  0%|          | 0/45 [00:00<?, ?it/s]

[I 2023-12-07 06:33:54,041] Trial 33 pruned. 


  0%|          | 0/53 [00:00<?, ?it/s]

[I 2023-12-07 06:35:03,420] Trial 34 pruned. 


  0%|          | 0/58 [00:00<?, ?it/s]

[I 2023-12-07 06:36:13,266] Trial 35 pruned. 


  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/49 [00:00<?, ?it/s]

[I 2023-12-07 06:38:27,322] Trial 36 finished with value: 0.7505009770393372 and parameters: {'epochs': 2, 'alpha': 0.4354166351002666, 'gamma': 3.026617034443181, 'temp_1': 0.12363947754533301, 'temp_2': 0.08931737980767152, 'batch_size': 186}. Best is trial 1 with value: 0.7805611491203308.


  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/44 [00:00<?, ?it/s]

[I 2023-12-07 06:40:38,974] Trial 37 pruned. 


  0%|          | 0/61 [00:00<?, ?it/s]

  0%|          | 0/61 [00:00<?, ?it/s]

[I 2023-12-07 06:42:54,889] Trial 38 finished with value: 0.7555110454559326 and parameters: {'epochs': 2, 'alpha': 0.4476480868063524, 'gamma': 3.1057192114449634, 'temp_1': 0.12420846758568227, 'temp_2': 0.08887288038574298, 'batch_size': 148}. Best is trial 1 with value: 0.7805611491203308.


  0%|          | 0/61 [00:00<?, ?it/s]

  0%|          | 0/61 [00:00<?, ?it/s]

[I 2023-12-07 06:45:09,350] Trial 39 pruned. 


  0%|          | 0/72 [00:00<?, ?it/s]

[I 2023-12-07 06:46:19,720] Trial 40 pruned. 


  0%|          | 0/56 [00:00<?, ?it/s]

[I 2023-12-07 06:47:28,519] Trial 41 pruned. 


  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

[I 2023-12-07 06:49:45,850] Trial 42 pruned. 


  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/67 [00:00<?, ?it/s]

[I 2023-12-07 06:52:08,079] Trial 43 pruned. 


  0%|          | 0/84 [00:00<?, ?it/s]

[I 2023-12-07 06:53:18,760] Trial 44 pruned. 


  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

[I 2023-12-07 06:55:33,988] Trial 45 pruned. 


  0%|          | 0/60 [00:00<?, ?it/s]

[I 2023-12-07 06:56:44,301] Trial 46 pruned. 


  0%|          | 0/48 [00:00<?, ?it/s]

[I 2023-12-07 06:57:55,157] Trial 47 pruned. 


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

[I 2023-12-07 07:00:13,406] Trial 48 pruned. 


  0%|          | 0/64 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

[I 2023-12-07 07:02:28,187] Trial 49 pruned. 


Accuracy: 0.7805611491203308
Best hyperparameters: {'epochs': 2, 'alpha': 0.4601079528693497, 'gamma': 4.932651675660353, 'temp_1': 0.14630528484684996, 'temp_2': 0.07242998464641397, 'batch_size': 144}
